[PATCH 2/5] Add manpages for move_mount(2) and open_tree(2)

2020-08-07 Thread David Howells
Add manual pages to document the move_mount and open_tree() system calls.

Signed-off-by: David Howells 
---

 man2/move_mount.2 |  275 +
 1 file changed, 275 insertions(+)
 create mode 100644 man2/move_mount.2

diff --git a/man2/move_mount.2 b/man2/move_mount.2
new file mode 100644
index 0..aae9013fa
--- /dev/null
+++ b/man2/move_mount.2
@@ -0,0 +1,275 @@
+'\" t
+.\" Copyright (c) 2020 David Howells 
+.\"
+.\" %%%LICENSE_START(VERBATIM)
+.\" Permission is granted to make and distribute verbatim copies of this
+.\" manual provided the copyright notice and this permission notice are
+.\" preserved on all copies.
+.\"
+.\" Permission is granted to copy and distribute modified versions of this
+.\" manual under the conditions for verbatim copying, provided that the
+.\" entire resulting derived work is distributed under the terms of a
+.\" permission notice identical to this one.
+.\"
+.\" Since the Linux kernel and libraries are constantly changing, this
+.\" manual page may be incorrect or out-of-date.  The author(s) assume no
+.\" responsibility for errors or omissions, or for damages resulting from
+.\" the use of the information contained herein.  The author(s) may not
+.\" have taken the same level of care in the production of this manual,
+.\" which is licensed free of charge, as they might when working
+.\" professionally.
+.\"
+.\" Formatted or processed versions of this manual, if unaccompanied by
+.\" the source, must acknowledge the copyright and authors of this work.
+.\" %%%LICENSE_END
+.\"
+.TH MOVE_MOUNT 2 2020-08-07 "Linux" "Linux Programmer's Manual"
+.SH NAME
+move_mount \- Move mount objects around the filesystem topology
+.SH SYNOPSIS
+.nf
+.B #include 
+.br
+.B #include 
+.br
+.B #include 
+.br
+.BR "#include" "/* Definition of AT_* constants */"
+.PP
+.BI "int move_mount(int " from_dirfd ", const char *" from_pathname ","
+.BI "   int " to_dirfd ", const char *" to_pathname ","
+.BI "   unsigned int " flags );
+.fi
+.PP
+.IR Note :
+There is no glibc wrapper for this system call.
+.SH DESCRIPTION
+The
+.BR move_mount ()
+call moves a mount from one place to another; it can also be used to attach an
+unattached mount created by
+.BR fsmount "() or " open_tree "() with " OPEN_TREE_CLONE .
+.PP
+If
+.BR move_mount ()
+is called repeatedly with a file descriptor that refers to a mount object,
+then the object will be attached/moved the first time and then moved again and
+again and again, detaching it from the previous mountpoint each time.
+.PP
+To access the source mount object or the destination mountpoint, no
+permissions are required on the object itself, but if either pathname is
+supplied, execute (search) permission is required on all of the directories
+specified in
+.IR from_pathname " or " to_pathname .
+.PP
+The caller does, however, require the appropriate capabilities or permission
+to effect a mount.
+.PP
+.BR move_mount ()
+uses
+.IR from_pathname ", " from_dirfd " and part of " flags
+to locate the mount object to be moved and
+.IR to_pathname ", " to_dirfd " and another part of " flags
+to locate the destination mountpoint.  Each lookup can be done in one of a
+variety of ways:
+.TP
+[*] By absolute path.
+The pathname points to an absolute path and the dirfd is ignored.  The file is
+looked up by name, starting from the root of the filesystem as seen by the
+calling process.
+.TP
+[*] By cwd-relative path.
+The pathname points to a relative path and the dirfd is
+.IR AT_FDCWD .
+The file is looked up by name, starting from the current working directory.
+.TP
+[*] By dir-relative path.
+The pathname points to relative path and the dirfd indicates a file descriptor
+pointing to a directory.  The file is looked up by name, starting from the
+directory specified by
+.IR dirfd .
+.TP
+[*] By file descriptor.
+The pathname points to "", the dirfd points directly to the mount object to
+move or the destination mount point and the appropriate
+.B *_EMPTY_PATH
+flag is set.
+.PP
+.I flags
+can be used to influence a path-based lookup.  A value for
+.I flags
+is constructed by OR'ing together zero or more of the following constants:
+.TP
+.BR MOVE_MOUNT_F_EMPTY_PATH
+.\" commit 65cfc6722361570bfe255698d9cd4dccaf47570d
+If
+.I from_pathname
+is an empty string, operate on the file referred to by
+.IR from_dirfd
+(which may have been obtained using the
+.BR open (2)
+.B O_PATH
+flag or
+.BR open_tree ())
+If
+.I from_dirfd
+is
+.BR AT_FDCWD ,
+the call operates on the current working directory.
+In this case,
+.I from_dirfd
+can refer to any type of file, not just a directory.
+This flag is Linux-specific; define
+.B _GNU_SOURCE
+.\" Before glibc 2.16, defining _ATFILE_SOURCE sufficed
+to obtain its definition.
+.TP
+.B MOVE_MOUNT_T_EMPTY_PATH
+As above, but operating on
+.IR to_pathname " and " to_dirfd .
+.TP
+.B MOVE_MOUNT_F_NO_AUTOMOUNT
+Don't automount the terminal ("basename") 

[tip:locking/urgent] BUILD SUCCESS 0cd39f4600ed4de859383018eb10f0f724900e1b

2020-08-07 Thread kernel test robot
defconfig
mips allyesconfig
mips allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a006-20200806
x86_64   randconfig-a001-20200806
x86_64   randconfig-a004-20200806
x86_64   randconfig-a005-20200806
x86_64   randconfig-a003-20200806
x86_64   randconfig-a002-20200806
i386 randconfig-a005-20200805
i386 randconfig-a004-20200805
i386 randconfig-a001-20200805
i386 randconfig-a003-20200805
i386 randconfig-a002-20200805
i386 randconfig-a006-20200805
i386 randconfig-a005-20200806
i386 randconfig-a004-20200806
i386 randconfig-a001-20200806
i386 randconfig-a002-20200806
i386 randconfig-a003-20200806
i386 randconfig-a006-20200806
i386 randconfig-a005-20200807
i386 randconfig-a004-20200807
i386 randconfig-a001-20200807
i386 randconfig-a002-20200807
i386 randconfig-a003-20200807
i386 randconfig-a006-20200807
x86_64   randconfig-a013-20200807
x86_64   randconfig-a011-20200807
x86_64   randconfig-a012-20200807
x86_64   randconfig-a016-20200807
x86_64   randconfig-a015-20200807
x86_64   randconfig-a014-20200807
i386 randconfig-a011-20200806
i386 randconfig-a012-20200806
i386 randconfig-a013-20200806
i386 randconfig-a015-20200806
i386 randconfig-a014-20200806
i386 randconfig-a016-20200806
i386 randconfig-a011-20200807
i386 randconfig-a012-20200807
i386 randconfig-a013-20200807
i386 randconfig-a015-20200807
i386 randconfig-a014-20200807
i386 randconfig-a016-20200807
i386 randconfig-a011-20200805
i386 randconfig-a012-20200805
i386 randconfig-a013-20200805
i386 randconfig-a014-20200805
i386 randconfig-a015-20200805
i386 randconfig-a016-20200805
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  kexec

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [PATCH] kconfig qconf: Add grey background for hidden options

2020-08-07 Thread Masahiro Yamada
On Wed, Jul 8, 2020 at 10:30 PM Maxime Chretien
 wrote:
>
> This is useful to see which configuration parameters can be edited
> or not when "Show All Options" is enabled.
>
> Signed-off-by: Maxime Chretien 
> ---
>  scripts/kconfig/qconf.cc | 7 +++
>  scripts/kconfig/qconf.h  | 4 
>  2 files changed, 11 insertions(+)


I like the idea, but
maybe this patch could be improved?

For example, in the following test code,
BAR is correctly painted grey when CONFIG_FOO=n,
but "my menu" is always white despite of
"depends on FOO"

(test code)--

config FOO
   bool "foo"

config BAR
   bool "bar"
   depends on FOO

menu "my menu"
   depends on FOO

endmenu

---(test code end)







> diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
> index c0ac8f7b5f1a..be9ff4651da1 100644
> --- a/scripts/kconfig/qconf.cc
> +++ b/scripts/kconfig/qconf.cc
> @@ -208,6 +208,13 @@ void ConfigItem::updateMenu(void)
> }
> if (!sym_has_value(sym) && visible)
> prompt += " (NEW)";
> +
> +   if(!visible) {
> +   setBackground(promptColIdx, QBrush(QColor("#E0E0E0")));
> +   } else {
> +   setBackground(promptColIdx, QBrush());
> +   }
> +


I think all the columns should be grey-grounded.
Please note you can click other columns to
toggle y/m/n.


How about something like this?



QBrush brush;

if (visible)
brush = QBrush()
else
brush = QBrush(QColor("#E0E0E0"));

setBackground(promptColIdx, brush);
setBackground(nameColIdx, brush);
setBackground(noColIdx, brush);
setBackground(modColIdx, brush);
setBackground(yesColIdx, brush);
setBackground(dataColIdx, brush);




>  set_prompt:
> setText(promptColIdx, prompt);
>  }
> diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h
> index c879d79ce817..79e47e8c1ae7 100644
> --- a/scripts/kconfig/qconf.h
> +++ b/scripts/kconfig/qconf.h
> @@ -174,6 +174,10 @@ class ConfigItem : public QTreeWidgetItem {
> {
> return Parent::text(idx);
> }
> +   void setBackground(colIdx idx, const QBrush& brush)
> +   {
> +   Parent::setBackground(idx, brush);
> +   }

I do not understand why this wrapper is useful...


> void setPixmap(colIdx idx, const QIcon )
> {
> Parent::setIcon(idx, icon);
> --
> 2.27.0
>


-- 
Best Regards
Masahiro Yamada


Re: [RFC PATCH v2 6/6] sched/fair: Implement starvation monitor

2020-08-07 Thread luca abeni
On Fri, 7 Aug 2020 15:43:53 +0200
Juri Lelli  wrote:

> On 07/08/20 15:28, luca abeni wrote:
> > Hi Juri,
> > 
> > On Fri, 7 Aug 2020 11:56:04 +0200
> > Juri Lelli  wrote:
> >   
> > > Starting deadline server for lower priority classes right away
> > > when first task is enqueued might break guarantees  
> > 
> > Which guarantees are you thinking about, here? Response times of
> > fixed priority tasks?  
> 
> Response time, but also wakeup latency (which, for better or worse, is
> another important metric).
> 
> > If fixed priority tasks are also scheduled through deadline servers,
> > then you can provide response-time guarantees to them even when
> > lower-priority/non-real-time tasks are scheduled through deadline
> > servers.  
> 
> Right, but I fear we won't be able to keep current behavior for
> wakeups: RT with highest prio always gets scheduled right away?

Uhm... I think this depends on how the servers' parameters are
designed: assigning "wrong" (or "bad") parameters to the server used to
schedule RT tasks, this property is broken.

(however, notice that even with the current patchset the highest
priority task might be scheduled with some delay --- if the SCHED_OTHER
deadline server is active because SCHED_OTHER tasks are being starved).



Luca


[PATCH v1] MIPS: uasm: false warning on use of uasm_i_lui()

2020-08-07 Thread Jim Quinlan
Currently, the example uasm code

uasm_i_lui(p, tmp, 0xa000);

issues a warning at Linux boot when the code is "assembled".  This is
because the "lui" instruction is defined by the macro "Ip_u1s2(_lui)" -- I
believe it should be Ip_u1u2(_lui) -- and its definition is associated with
the SIMM macro -- I believe it should be the UIMM macro.  The current code
takes a 32bit number and checks that it can be converted to a 16bit signed
immediate.  This check fails of course for an immediate such as 0xa000.

This is fixed.  However, there are two uses of uasm_i_lui() in
UASM_i_LA_mostly() which use 16bit signed immediates in the form of a
sign-extended 32 bit number.  Left alone these may now cause a warning when
being processed by build_imm().  These two uses have been modified by first
calling build_simm() on the argument to uasm_i_lui() as to convert it to a
proper 16 bit unsigned integer.

Signed-off-by: Jim Quinlan 
---
 arch/mips/include/asm/uasm.h  | 2 +-
 arch/mips/mm/uasm-micromips.c | 2 +-
 arch/mips/mm/uasm-mips.c  | 2 +-
 arch/mips/mm/uasm.c   | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index f7effca791a5..7ea1d338570b 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -127,7 +127,7 @@ Ip_u2s3u1(_lh);
 Ip_u2s3u1(_lhu);
 Ip_u2s3u1(_ll);
 Ip_u2s3u1(_lld);
-Ip_u1s2(_lui);
+Ip_u1u2(_lui);
 Ip_u2s3u1(_lw);
 Ip_u2s3u1(_lwu);
 Ip_u3u1u2(_lwx);
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index 75ef90486fe6..86ee1499e120 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -82,7 +82,7 @@ static const struct insn insn_table_MM[insn_invalid] = {
[insn_lh]   = {M(mm_lh32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
[insn_ll]   = {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS 
| RT | SIMM},
[insn_lld]  = {0, 0},
-   [insn_lui]  = {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM},
+   [insn_lui]  = {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | UIMM},
[insn_lw]   = {M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
[insn_mfc0] = {M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, 
mm_pool32axf_op), RT | RS | RD},
[insn_mfhi] = {M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, 
mm_pool32axf_op), RS},
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 7154a1d99aad..b45c15111d68 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -132,7 +132,7 @@ static const struct insn insn_table[insn_invalid] = {
[insn_ll]   = {M6(spec3_op, 0, 0, 0, ll6_op),  RS | RT | SIMM9},
[insn_lld]  = {M6(spec3_op, 0, 0, 0, lld6_op),  RS | RT | SIMM9},
 #endif
-   [insn_lui]  = {M(lui_op, 0, 0, 0, 0, 0),RT | SIMM},
+   [insn_lui]  = {M(lui_op, 0, 0, 0, 0, 0),RT | UIMM},
[insn_lw]   = {M(lw_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
[insn_lwu]  = {M(lwu_op, 0, 0, 0, 0, 0),  RS | RT | SIMM},
[insn_lwx]  = {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD},
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index c56f129c9a4b..ca5d47da3bd1 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -327,7 +327,7 @@ I_u2s3u1(_lh)
 I_u2s3u1(_lhu)
 I_u2s3u1(_ll)
 I_u2s3u1(_lld)
-I_u1s2(_lui)
+I_u1u2(_lui)
 I_u2s3u1(_lw)
 I_u2s3u1(_lwu)
 I_u1u2u3(_mfc0)
@@ -457,7 +457,7 @@ UASM_EXPORT_SYMBOL(uasm_rel_lo);
 void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
 {
if (!uasm_in_compat_space_p(addr)) {
-   uasm_i_lui(buf, rs, uasm_rel_highest(addr));
+   uasm_i_lui(buf, rs, build_simm(uasm_rel_highest(addr)));
if (uasm_rel_higher(addr))
uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr));
if (uasm_rel_hi(addr)) {
@@ -468,7 +468,7 @@ void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr)
} else
uasm_i_dsll32(buf, rs, rs, 0);
} else
-   uasm_i_lui(buf, rs, uasm_rel_hi(addr));
+   uasm_i_lui(buf, rs, build_simm(uasm_rel_hi(addr)));
 }
 UASM_EXPORT_SYMBOL(UASM_i_LA_mostly);
 
-- 
2.17.1



Re: [RFC PATCH v2 6/6] sched/fair: Implement starvation monitor

2020-08-07 Thread luca abeni
Hi Peter,

On Fri, 7 Aug 2020 12:46:18 +0200
pet...@infradead.org wrote:

> On Fri, Aug 07, 2020 at 11:56:04AM +0200, Juri Lelli wrote:
> > Starting deadline server for lower priority classes right away when
> > first task is enqueued might break guarantees, as tasks belonging to
> > intermediate priority classes could be uselessly preempted. E.g., a
> > well behaving (non hog) FIFO task can be preempted by NORMAL tasks
> > even if there are still CPU cycles available for NORMAL tasks to
> > run, as they'll be running inside the fair deadline server for some
> > period of time.
> > 
> > To prevent this issue, implement a starvation monitor mechanism that
> > starts the deadline server only if a (fair in this case) task hasn't
> > been scheduled for some interval of time after it has been enqueued.
> > Use pick/put functions to manage starvation monitor status.  
> 
> One thing I considerd was scheduling this as a least-laxity entity --
> such that it runs late, not early

Are you thinking about scheduling both RT and non-RT tasks through
deadline servers? If yes, then I think that using something like
laxity-based scheduling for the SCHED_OTHER server can be a good idea
(but then we need to understand how to combine deadline-based
scheduling with laxity-based scheduling, etc...)

Or are you thinking about keeping the SCHED_OTHER server throttled
until its laxity is 0 (or until its laxity is lower than some small
value)? In this second case, the approach would work even if RT tasks
are not scheduled through a server (but I do not know which kind of
performance guarantee we could provide).


> -- and start the server when
> rq->nr_running != rq->cfs.h_nr_running, IOW when there's !fair tasks
> around.

Yes, this could be a good optimization.



Luca
> 
> Not saying we should do it like that, but that's perhaps more
> deterministic than this.



Re: [PATCH 1/1] Drivers: hv: vmbus: Only notify Hyper-V for die events that are oops

2020-08-07 Thread Wei Liu
On Fri, Aug 07, 2020 at 11:06:47AM +0200, Vitaly Kuznetsov wrote:
> Michael Kelley  writes:
> 
> > Hyper-V currently may be notified of a panic for any die event. But
> > this results in false panic notifications for various user space traps
> > that are die events. Fix this by ignoring die events that aren't oops.
> >
> > Fixes: 510f7aef65bb ("Drivers: hv: vmbus: prefer 'die' notification chain 
> > to 'panic'")
> > Signed-off-by: Michael Kelley 
> > ---
> >  drivers/hv/vmbus_drv.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> > index b50081c..910b6e9 100644
> > --- a/drivers/hv/vmbus_drv.c
> > +++ b/drivers/hv/vmbus_drv.c
> > @@ -86,6 +86,10 @@ static int hyperv_die_event(struct notifier_block *nb, 
> > unsigned long val,
> > struct die_args *die = (struct die_args *)args;
> > struct pt_regs *regs = die->regs;
> >  
> > +   /* Don't notify Hyper-V if the die event is other than oops */
> > +   if (val != DIE_OOPS)
> > +   return NOTIFY_DONE;
> > +
> 
> Looking at die_val enum, DIE_PANIC also sounds like something we would
> want to report but it doesn't get emitted anywhere and honestly I don't
> quite understand how is was supposed to be different from DIE_OOPS.
> 
> Reviewed-by: Vitaly Kuznetsov 

Applied to hyperv-fixes.

Wei.

> 
> > /*
> >  * Hyper-V should be notified only once about a panic.  If we will be
> >  * doing hyperv_report_panic_msg() later with kmsg data, don't do
> 
> -- 
> Vitaly
> 


Re: [RFC PATCH v2 6/6] sched/fair: Implement starvation monitor

2020-08-07 Thread Juri Lelli
On 07/08/20 15:28, luca abeni wrote:
> Hi Juri,
> 
> On Fri, 7 Aug 2020 11:56:04 +0200
> Juri Lelli  wrote:
> 
> > Starting deadline server for lower priority classes right away when
> > first task is enqueued might break guarantees
> 
> Which guarantees are you thinking about, here? Response times of fixed
> priority tasks?

Response time, but also wakeup latency (which, for better or worse, is
another important metric).

> If fixed priority tasks are also scheduled through deadline servers,
> then you can provide response-time guarantees to them even when
> lower-priority/non-real-time tasks are scheduled through deadline
> servers.

Right, but I fear we won't be able to keep current behavior for wakeups:
RT with highest prio always gets scheduled right away?



RE: [EXT] [PATCH] mtd: spinand: micron: add support for MT29F2G01AAAED

2020-08-07 Thread Shivamurthy Shastri (sshivamurthy)
Hi Thirumalesha,

> 
> The MT29F2G01AAAED is a single die, 2Gb Micron SPI NAND Flash with 4-bit
> ECC
> 
> Signed-off-by: Thirumalesha Narasimhappa 
> ---
>  drivers/mtd/nand/spi/micron.c | 77 +++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c
> index 5d370cfcdaaa..7e0b61d7b90f 100644
> --- a/drivers/mtd/nand/spi/micron.c
> +++ b/drivers/mtd/nand/spi/micron.c
> @@ -17,6 +17,7 @@
>  #define MICRON_STATUS_ECC_1TO3_BITFLIPS  (1 << 4)
>  #define MICRON_STATUS_ECC_4TO6_BITFLIPS  (3 << 4)
>  #define MICRON_STATUS_ECC_7TO8_BITFLIPS  (5 << 4)

It is better to add comment here, like

/* For Micron  MT29F2G01AAAED Device */

> +#define MICRON_STATUS_ECC_1TO4_BITFLIPS  (1 << 4)
> 
>  #define MICRON_CFG_CRBIT(0)
> 
> @@ -44,6 +45,19 @@ static SPINAND_OP_VARIANTS(update_cache_variants,
>   SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
>   SPINAND_PROG_LOAD(false, 0, NULL, 0));
> 
> +/* Micron  MT29F2G01AAAED Device */
> +static SPINAND_OP_VARIANTS(read_cache_variants_mt29f2g01aaaed,
> + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
> + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
> + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
> + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(write_cache_variants_mt29f2g01aaaed,
> + SPINAND_PROG_LOAD(true, 0, NULL, 0));
> +
> +static SPINAND_OP_VARIANTS(update_cache_variants_mt29f2g01aaaed,
> + SPINAND_PROG_LOAD(false, 0, NULL, 0));
> +
>  static int micron_8_ooblayout_ecc(struct mtd_info *mtd, int section,
> struct mtd_oob_region *region)
>  {
> @@ -69,11 +83,41 @@ static int micron_8_ooblayout_free(struct mtd_info
> *mtd, int section,
>   return 0;
>  }
> 
> +static int mt29f2g01aaaed_ooblayout_ecc(struct mtd_info *mtd, int section,
> + struct mtd_oob_region *region)
> +{
> + if (section >= 4)
> + return -ERANGE;
> +
> + region->offset = (section * 16) + 8;
> + region->length = 8;
> +
> + return 0;
> +}
> +
> +static int mt29f2g01aaaed_ooblayout_free(struct mtd_info *mtd, int section,
> +  struct mtd_oob_region *region)
> +{
> + if (section >= 4)
> + return -ERANGE;
> +
> + /* Reserve 2 bytes for the BBM. */
> + region->offset = (section * 16) + 2;
> + region->length = 6;
> +
> + return 0;
> +}
> +
>  static const struct mtd_ooblayout_ops micron_8_ooblayout = {
>   .ecc = micron_8_ooblayout_ecc,
>   .free = micron_8_ooblayout_free,
>  };
> 
> +static const struct mtd_ooblayout_ops mt29f2g01aaaed_ooblayout = {
> + .ecc = mt29f2g01aaaed_ooblayout_ecc,
> + .free = mt29f2g01aaaed_ooblayout_free,
> +};
> +
>  static int micron_select_target(struct spinand_device *spinand,
>   unsigned int target)
>  {
> @@ -114,6 +158,27 @@ static int micron_8_ecc_get_status(struct
> spinand_device *spinand,
>   return -EINVAL;
>  }
> 
> +static int mt29f2g01aaaed_ecc_get_status(struct spinand_device *spinand,
> +  u8 status)
> +{
> + switch (status & MICRON_STATUS_ECC_MASK) {
> + case STATUS_ECC_NO_BITFLIPS:
> + return 0;
> +
> + case STATUS_ECC_UNCOR_ERROR:
> + return -EBADMSG;
> +
> + /* 1 to 4-bit error detected and corrected */
> + case MICRON_STATUS_ECC_1TO4_BITFLIPS:
> + return 4;
> +
> + default:
> + break;
> + }
> +
> + return -EINVAL;
> +}
> +
>  static const struct spinand_info micron_spinand_table[] = {
>   /* M79A 2Gb 3.3V */
>   SPINAND_INFO("MT29F2G01ABAGD",
> @@ -217,6 +282,18 @@ static const struct spinand_info
> micron_spinand_table[] = {
>SPINAND_ECCINFO(_8_ooblayout,
>micron_8_ecc_get_status),
>SPINAND_SELECT_TARGET(micron_select_target)),
> + /* M70A 2Gb 3.3V */

This should be M69A.

> + SPINAND_INFO("MT29F2G01AAAED",
> +
> SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x9F),
> +  NAND_MEMORG(1, 2048, 256, 64, 2048, 80, 2, 1, 1),
> +  NAND_ECCREQ(4, 512),
> +
> SPINAND_INFO_OP_VARIANTS(_cache_variants_mt29f2g01aaaed,
> +
> _cache_variants_mt29f2g01aaaed,
> +
> _cache_variants_mt29f2g01aaaed),
> +  0,
> +  SPINAND_ECCINFO(_ooblayout,
> +  mt29f2g01aaaed_ecc_get_status),
> +  SPINAND_SELECT_TARGET(micron_select_target)),

This device doesn't have multiple targets, you can remove micron_select_target.

>  };
> 
>  static int micron_spinand_init(struct spinand_device *spinand)
> --
> 2.17.1

Thanks,
Shiva


Re: [PATCH v32 1/6] dt: bindings: lp50xx: Introduce the lp50xx family of RGB drivers

2020-08-07 Thread Dan Murphy

Pavel

On 8/4/20 2:55 PM, Dan Murphy wrote:

Pavel

On 7/28/20 8:39 AM, Dan Murphy wrote:

Pavel

On 7/22/20 10:31 AM, Dan Murphy wrote:
Introduce the bindings for the Texas Instruments LP5036, LP5030, 
LP5024,
LP5018, LP5012 and LP5009 RGB LED device driver.  The 
LP5036/30/24/18/12/9

can control RGB LEDs individually or as part of a control bank group.
These devices have the ability to adjust the mixing control for the RGB
LEDs to obtain different colors independent of the overall 
brightness of

the LED grouping.


Were you going to pull this in as a user of the Multicolor framework?

Gentle ping.  I saw you were going to push the Omnia these should be 
ready too


Did you need me to rebase these patches and add the Ack from Linus on 
the defconfig patch?


Dan



Re: [PATCH 02/18] spi: stm32-spi: defer probe for reset

2020-08-07 Thread Alain Volmat
On Wed, Aug 05, 2020 at 11:49:06AM +0100, Mark Brown wrote:
> On Wed, Aug 05, 2020 at 09:01:57AM +0200, Alain Volmat wrote:
> 
> > -   rst = devm_reset_control_get_exclusive(>dev, NULL);
> > -   if (!IS_ERR(rst)) {
> > +   rst = devm_reset_control_get_optional_exclusive(>dev, NULL);
> > +   if (rst) {
> > +   if (IS_ERR(rst)) {
> > +   ret = PTR_ERR(rst);
> > +   if (ret != -EPROBE_DEFER)
> > +   dev_err(>dev, "reset get failed: %d\n",
> > +   ret);
> > +   goto err_clk_disable;
> > +   }
> 
> This will not provide any diagnostics when deferring which isn't very
> helpful if there's issues.

Do you mean that a message when deferring would be needed ?

I am worrying that this would lead to having too much noise during boot
since probe deferring is kinda common. Of course it can also be due to a bad
configuration of the kernel as well but having looked around I think that
usually driver are rather silent in case of deferring.


Re: splice: infinite busy loop lockup bug

2020-08-07 Thread Ming Lei
On Fri, Aug 07, 2020 at 01:38:54PM +0100, Al Viro wrote:
> On Fri, Aug 07, 2020 at 01:27:27PM +0100, Al Viro wrote:
> > On Fri, Aug 07, 2020 at 07:35:08PM +0900, Tetsuo Handa wrote:
> > > syzbot is reporting hung task at pipe_release() [1], for for_each_bvec() 
> > > from
> > > iterate_bvec() from iterate_all_kinds() from iov_iter_alignment() from
> > > ext4_unaligned_io() from ext4_dio_write_iter() from 
> > > ext4_file_write_iter() from
> > > call_write_iter() from do_iter_readv_writev() from do_iter_write() from
> > > vfs_iter_write() from iter_file_splice_write() falls into infinite busy 
> > > loop
> > > with pipe->mutex held.
> > > 
> > > The reason of falling into infinite busy loop is that 
> > > iter_file_splice_write()
> > > for some reason generates "struct bio_vec" entry with .bv_len=0 and 
> > > .bv_offset=0
> > > while for_each_bvec() cannot handle .bv_len == 0.
> > 
> > broken in 1bdc76aea115 "iov_iter: use bvec iterator to implement 
> > iterate_bvec()",
> > unless I'm misreading it...
> > 
> > Zero-length segments are not disallowed; it's not all that hard to filter 
> > them
> > out in iter_file_splice_write(), but the intent had always been to have
> > iterate_all_kinds() et.al. able to cope with those.
> > 
> > How are these pipe_buffers with ->len == 0 generated in that reproducer, 
> > BTW?
> > There might be something else fishy going on...
> 
> FWIW, my preference would be to have for_each_bvec() advance past zero-length
> segments; I'll need to go through its uses elsewhere in the tree first, though
> (after I grab some sleep),

Usually block layer doesn't allow/support zero bvec, however we can make
for_each_bvec() to support it only.

Tetsuo, can you try the following patch?

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ac0c7299d5b8..b03c793dd28d 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -117,11 +117,19 @@ static inline bool bvec_iter_advance(const struct bio_vec 
*bv,
return true;
 }
 
+static inline void bvec_iter_skip_zero_vec(const struct bio_vec *bv,
+   struct bvec_iter *iter)
+{
+   iter->bi_idx++;
+   iter->bi_bvec_done = 0;
+}
+
 #define for_each_bvec(bvl, bio_vec, iter, start)   \
for (iter = (start);\
 (iter).bi_size &&  \
-   ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
-bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+   ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
+ (bvl).bv_len ? bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len) : \
+   bvec_iter_skip_zero_vec((bio_vec), &(iter)))
 
 /* for iterating one bio from start to end */
 #define BVEC_ITER_ALL_INIT (struct bvec_iter)  \

Thanks,
Ming



Re: [RFC PATCH v2 0/6] SCHED_DEADLINE server infrastructure

2020-08-07 Thread luca abeni
Hi Juri,

On Fri, 7 Aug 2020 15:30:41 +0200
Juri Lelli  wrote:
[...]
> > In the meanwhile, I have some questions/comments after a first quick
> > look.
> > 
> > If I understand well, the patchset does not apply deadline servers
> > to FIFO and RR tasks, right? How does this patchset interact with RT
> > throttling?  
> 
> Well, it's something like the dual of it, in that RT Throttling
> directly throttles RT tasks to make spare CPU cycles available to
> fair tasks while this patchset introduces deadline servers to
> schedule fair tasks, thus still reserving CPU time for those (when
> needed).

Ah, OK... I was thinking about using deadline servers for both RT and
non-RT tasks. And to use them not only to throttle, but also to provide
some kind of performance guarantees (to all the scheduling classes).
Think about what can be done when combining this mechanism with
cgroups/containers :)

[...]
> > I understand this is because you do not
> > want to delay RT tasks if they are not starving other tasks. But
> > then, maybe what you want is not deadline-based scheduling. Maybe a
> > reservation-based scheduler based on fixed priorities is what you
> > want? (with SCHED_DEADLINE, you could provide exact performance
> > guarantees to SCHED_OTHER tasks, but I suspect patch 6/6 breaks
> > these guarantees?)  
> 
> I agree that we are not interested in exact guarantees in this case,
> but why not using something that it's already there and would give us
> the functionality we need (fix starvation for fair)?

Ok, if performance guarantees to non-RT tasks are not the goal, then I
agree. I was thinking that since the patchset provides a mechanism to
schedule various classes of tasks through deadline servers, then
using these servers to provide some kinds of guarantees could be
interesting ;-)



Thanks,
Luca

> It would also
> work well in presence of "real" deadline tasks I think, in that you
> could account for these fair servers while performing admission
> control.
> 
> Best,
> 
> Juri
> 



Re: [PATCH net] net: qcom/emac: Fix missing clk_disable_unprepare() in error path of emac_probe

2020-08-07 Thread Timur Tabi

On 8/6/20 8:54 PM, wanghai (M) wrote:

Thanks for your suggestion. May I fix it like this?


Yes, this is what I had in mind.  Thanks.

Acked-by: Timur Tabi 


Re: [RFC PATCH 0/8] fsdax: introduce FS query interface to support reflink

2020-08-07 Thread Matthew Wilcox
On Fri, Aug 07, 2020 at 09:13:28PM +0800, Shiyang Ruan wrote:
> This patchset is a try to resolve the problem of tracking shared page
> for fsdax.
> 
> Instead of per-page tracking method, this patchset introduces a query
> interface: get_shared_files(), which is implemented by each FS, to
> obtain the owners of a shared page.  It returns an owner list of this
> shared page.  Then, the memory-failure() iterates the list to be able
> to notify each process using files that sharing this page.
> 
> The design of the tracking method is as follow:
> 1. dax_assocaite_entry() associates the owner's info to this page

I think that's the first problem with this design.  dax_associate_entry is
a horrendous idea which needs to be ripped out, not made more important.
It's all part of the general problem of trying to do something on a
per-page basis instead of per-extent basis.



Re: [PATCH 1/2 v2] rseq/membarrier: add MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU

2020-08-07 Thread peterz
On Thu, Aug 06, 2020 at 10:05:43AM -0700, Peter Oskolkov wrote:
> +#ifdef CONFIG_RSEQ
> +static void membarrier_rseq_ipi(void *arg)
> +{
> + if (current->mm != arg)  /* Not our process. */
> + return;
> + if (!current->rseq)  /* RSEQ not set up for the current task/thread. */
> + return;
> +
> + rseq_preempt(current);
> +}
> +#endif
> +
> +static int membarrier_private_restart_rseq_on_cpu(int cpu_id)
> +{
> +#ifdef CONFIG_RSEQ
> + /* syscalls are not allowed inside rseq critical sections. */
> + if (cpu_id == raw_smp_processor_id())
> + return 0;
> +
> + return smp_call_function_single(cpu_id, membarrier_rseq_ipi,
> + current->mm, true);
> +#else
> + return 0;
> +#endif
> +}

I'm thinking even this is a problem, we can end up sending IPIs to CPUs
outside out partition (they might be NOHZ_FULL) and that's a no-no too.

Something like so perhaps... that really limits it to CPUs that match
our mm.

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 6be66f52a2ad..bee5e98e6774 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -356,6 +356,7 @@ enum {
 
 enum {
MEMBARRIER_FLAG_SYNC_CORE   = (1U << 0),
+   MEMBARRIER_FLAG_RSEQ= (1U << 1),
 };
 
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 168479a7d61b..4d9b22c2f5e2 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -27,6 +27,11 @@
 
 static void ipi_mb(void *info)
 {
+   int *flags = info;
+
+   if (flags && (*flags & MEMBARRIER_FLAG_RSEQ))
+   rseq_preempt(current);
+
smp_mb();   /* IPIs should be serializing but paranoid. */
 }
 
@@ -129,11 +134,11 @@ static int membarrier_global_expedited(void)
return 0;
 }
 
-static int membarrier_private_expedited(int flags)
+static int membarrier_private_expedited(int flags, int cpu_id)
 {
-   int cpu;
-   cpumask_var_t tmpmask;
struct mm_struct *mm = current->mm;
+   cpumask_var_t tmpmask;
+   int cpu;
 
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
@@ -174,6 +179,10 @@ static int membarrier_private_expedited(int flags)
 */
if (cpu == raw_smp_processor_id())
continue;
+
+   if (cpu_id >= 0 && cpu != cpu_id)
+   continue;
+
p = rcu_dereference(cpu_rq(cpu)->curr);
if (p && p->mm == mm)
__cpumask_set_cpu(cpu, tmpmask);
@@ -181,7 +190,7 @@ static int membarrier_private_expedited(int flags)
rcu_read_unlock();
 
preempt_disable();
-   smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+   smp_call_function_many(tmpmask, ipi_mb, , 1);
preempt_enable();
 
free_cpumask_var(tmpmask);
@@ -362,11 +371,13 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
return membarrier_register_global_expedited();
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
-   return membarrier_private_expedited(0);
+   return membarrier_private_expedited(0, -1);
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
return membarrier_register_private_expedited(0);
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
-   return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
+   return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, 
-1);
+   case MEMBERRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
+   return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, 
flags);
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
return 
membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
default:


RFC: How to adjust the trace pid?

2020-08-07 Thread Christian König
Hi everybody,

in amdgpu we got the following issue which I'm seeking advise how to cleanly 
handle it.

We have a bunch of trace points which are related to the VM subsystem and 
executed in either a work item, kthread or foreign process context.

Now tracing the pid of the context which we are executing in is not really that 
useful, so I'm wondering if we could just overwrite the pid recorded in the 
trace entry?

The following patch does exactly that for the vm_grab_id() trace point, but I'm 
not 100% sure if that is legal or not.

Any ideas? Comments?

Thanks,
Christian.




[PATCH] drm/amdgpu: adjust the pid in the grab_id trace point

2020-08-07 Thread Christian König
Trace something useful instead of the pid of a kernel thread here.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 5da20fc166d9..07f99ef69d91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -228,6 +228,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
 ),
 
TP_fast_assign(
+  __entry->ent.pid = vm->task_info.pid;
   __entry->pasid = vm->pasid;
   __assign_str(ring, ring->name)
   __entry->vmid = job->vmid;
-- 
2.17.1



Re: [PATCH RFC v2 02/18] irq/dev-msi: Add support for a new DEV_MSI irq domain

2020-08-07 Thread Jason Gunthorpe
On Fri, Aug 07, 2020 at 02:38:31PM +0200, gre...@linuxfoundation.org wrote:
> On Fri, Aug 07, 2020 at 09:06:50AM -0300, Jason Gunthorpe wrote:
> > On Thu, Aug 06, 2020 at 10:21:11PM +0200, Thomas Gleixner wrote:
> > 
> > > Optionally? Please tell the hardware folks to make this mandatory. We
> > > have enough pain with non maskable MSI interrupts already so introducing
> > > yet another non maskable interrupt trainwreck is not an option.
> > 
> > Can you elaborate on the flows where Linux will need to trigger
> > masking?
> > 
> > I expect that masking will be available in our NIC HW too - but it
> > will require a spin loop if masking has to be done in an atomic
> > context.
> > 
> > > It's more than a decade now that I tell HW people not to repeat the
> > > non-maskable MSI failure, but obviously they still think that
> > > non-maskable interrupts are a brilliant idea. I know that HW folks
> > > believe that everything they omit can be fixed in software, but they
> > > have to finally understand that this particular issue _cannot_ be fixed
> > > at all.
> > 
> > Sure, the CPU should always be able to shut off an interrupt!
> > 
> > Maybe explaining the goals would help understand the HW perspective.
> > 
> > Today HW can process > 100k queues of work at once. Interrupt delivery
> > works by having a MSI index in each queue's metadata and the interrupt
> > indirects through a MSI-X table on-chip which has the
> > addr/data/mask/etc.
> > 
> > What IMS proposes is that the interrupt data can move into the queue
> > meta data (which is not required to be on-chip), eg along side the
> > producer/consumer pointers, and the central MSI-X table is not
> > needed. This is necessary because the PCI spec has very harsh design
> > requirements for a MSI-X table that make scaling it prohibitive.
> > 
> > So an IRQ can be silenced by deleting or stopping the queue(s)
> > triggering it. It can be masked by including masking in the queue
> > metadata. We can detect pending by checking the producer/consumer
> > values.
> > 
> > However synchronizing all the HW and all the state is now more
> > complicated than just writing a mask bit via MMIO to an on-die memory.
> 
> Because doing all of the work that used to be done in HW in software is
> so much faster and scalable?  Feels really wrong to me :(

Yes, it is more scalable. The problem with MSI-X is you need actual
physical silicon for each and every vector. This really limits the
number of vectors.

Placing the vector metadata with the queue means it can potentially
live in system memory which is significantly more scalable.

setup/mask/unmask will be slower. The driver might have
complexity. They are not performance path, right?

I don't think it is wrong or right. IMHO the current design where the
addr/data is hidden inside the platform is an artifact of x86's
compatibility legacy back when there was no such thing as message
interrupts. 

If you were starting from a green field I don't think a design would
include the IOAPIC/MSI/MSI-X indirection tables.

> Do you all have a pointer to the spec for this newly proposed stuff
> anywhere to try to figure out how the HW wants this to all work?

Intel's SIOV document is an interesting place to start:

https://software.intel.com/content/www/us/en/develop/download/intel-scalable-io-virtualization-technical-specification.html

Though it is more of a rational and a cookbook on how to combine
existing technology pieces. (eg PASID, platform_msi, etc)

The basic approach of SIOV's IMS is that there is no longer a generic
interrupt indirection from numbers to addr/data pairs like
IOAPIC/MSI/MSI-X owned by the common OS code.

Instead the driver itself is responsible to set the addr/data pair
into the device in a device specific way, deal with masking, etc.

This lets the device use an implementation that is not limited by the
harsh MSI-X semantics.

In Linux we already have 'IMS' it is called platform_msi and a few
embedded drivers already work like this. The idea here is to bring it
to PCI.

Jason


Re: [RFC PATCH v2 0/6] SCHED_DEADLINE server infrastructure

2020-08-07 Thread Juri Lelli
Hi Luca,

On 07/08/20 15:16, luca abeni wrote:
> Hi Juri,
> 
> thanks for sharing the v2 patchset!
> 
> In the next days I'll have a look at it, and try some tests...

Thanks!

> In the meanwhile, I have some questions/comments after a first quick
> look.
> 
> If I understand well, the patchset does not apply deadline servers to
> FIFO and RR tasks, right? How does this patchset interact with RT
> throttling?

Well, it's something like the dual of it, in that RT Throttling directly
throttles RT tasks to make spare CPU cycles available to fair tasks
while this patchset introduces deadline servers to schedule fair tasks,
thus still reserving CPU time for those (when needed).

> If I understand well, patch 6/6 does something like "use deadline
> servers for SCHED_OTHER only if FIFO/RR tasks risk to starve
> SCHED_OTHER tasks"... Right?

That's the basic idea, yes.

> I understand this is because you do not
> want to delay RT tasks if they are not starving other tasks. But then,
> maybe what you want is not deadline-based scheduling. Maybe a
> reservation-based scheduler based on fixed priorities is what you want?
> (with SCHED_DEADLINE, you could provide exact performance guarantees to
> SCHED_OTHER tasks, but I suspect patch 6/6 breaks these guarantees?)

I agree that we are not interested in exact guarantees in this case, but
why not using something that it's already there and would give us the
functionality we need (fix starvation for fair)? It would also work well
in presence of "real" deadline tasks I think, in that you could account
for these fair servers while performing admission control.

Best,

Juri



Re: [PATCH RFC leds + net-next v4 0/2] Add support for LEDs on Marvell PHYs

2020-08-07 Thread Andrew Lunn
> > And no, I don't want phydev name there.
> 
> Ummm. Can we get little more explanation on that? I fear that LED
> device renaming will be tricky and phydev would work around that
> nicely.

Hi Pavel

The phydev name is not particularly nice:

!mdio-mux!mdio@1!switch@0!mdio:00
!mdio-mux!mdio@1!switch@0!mdio:01
!mdio-mux!mdio@1!switch@0!mdio:02
!mdio-mux!mdio@2!switch@0!mdio:00
!mdio-mux!mdio@2!switch@0!mdio:01
!mdio-mux!mdio@2!switch@0!mdio:02
!mdio-mux!mdio@4!switch@0!mdio:00
!mdio-mux!mdio@4!switch@0!mdio:01
!mdio-mux!mdio@4!switch@0!mdio:02
400d.ethernet-1:00
400d.ethernet-1:01
fixed-0:00

The interface name are:

1: lo:
2: eth0:
3: eth1:
4: lan0@eth1:
5: lan1@eth1:
6: lan2@eth1:
7: lan3@eth1:
8: lan4@eth1:
9: lan5@eth1:
10: lan6@eth1:
11: lan7@eth1:
12: lan8@eth1:
13: optical3@eth1:
14: optical4@eth1:

You could make a good guess at matching to two together, but it is
error prone. Phys are low level things which the user is not really
involved in. They interact with interface names. ethtool, ip, etc, all
use interface names. In fact, i don't know of any tool which uses
phydev names.

 Andrew


Re: [RFC PATCH v2 6/6] sched/fair: Implement starvation monitor

2020-08-07 Thread luca abeni
Hi Juri,

On Fri, 7 Aug 2020 11:56:04 +0200
Juri Lelli  wrote:

> Starting deadline server for lower priority classes right away when
> first task is enqueued might break guarantees

Which guarantees are you thinking about, here? Response times of fixed
priority tasks?

If fixed priority tasks are also scheduled through deadline servers,
then you can provide response-time guarantees to them even when
lower-priority/non-real-time tasks are scheduled through deadline
servers.


Thanks,
Luca

> as tasks belonging to
> intermediate priority classes could be uselessly preempted. E.g., a
> well behaving (non hog) FIFO task can be preempted by NORMAL tasks
> even if there are still CPU cycles available for NORMAL tasks to run,
> as they'll be running inside the fair deadline server for some period
> of time.
> 
> To prevent this issue, implement a starvation monitor mechanism that
> starts the deadline server only if a (fair in this case) task hasn't
> been scheduled for some interval of time after it has been enqueued.
> Use pick/put functions to manage starvation monitor status.
> 
> Signed-off-by: Juri Lelli 
> ---
>  kernel/sched/fair.c  | 57
> ++-- kernel/sched/sched.h |
> 4  2 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 6a97ee2a4e26d..5cdf76e508074 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5494,6 +5494,53 @@ static int sched_idle_cpu(int cpu)
>  }
>  #endif
>  
> +
> +static void fair_server_watchdog(struct timer_list *list)
> +{
> + struct rq *rq = container_of(list, struct rq,
> fair_server_wd);
> + struct rq_flags rf;
> +
> + rq_lock_irqsave(rq, );
> + rq->fair_server_wd_running = 0;
> +
> + if (!rq->cfs.h_nr_running)
> + goto out;
> +
> + update_rq_clock(rq);
> + dl_server_start(>fair_server);
> + rq->fair_server_active = 1;
> + resched_curr(rq);
> +
> +out:
> + rq_unlock_irqrestore(rq, );
> +}
> +
> +static inline void fair_server_watchdog_start(struct rq *rq)
> +{
> + if (rq->fair_server_wd_running || rq->fair_server_active)
> + return;
> +
> + timer_setup(>fair_server_wd, fair_server_watchdog, 0);
> + rq->fair_server_wd.expires = jiffies +
> FAIR_SERVER_WATCHDOG_INTERVAL;
> + add_timer_on(>fair_server_wd, cpu_of(rq));
> + rq->fair_server_active = 0;
> + rq->fair_server_wd_running = 1;
> +}
> +
> +static inline void fair_server_watchdog_stop(struct rq *rq, bool
> stop_server) +{
> + if (!rq->fair_server_wd_running && !stop_server)
> + return;
> +
> + del_timer(>fair_server_wd);
> + rq->fair_server_wd_running = 0;
> +
> + if (stop_server && rq->fair_server_active) {
> + dl_server_stop(>fair_server);
> + rq->fair_server_active = 0;
> + }
> +}
> +
>  /*
>   * The enqueue_task method is called before nr_running is
>   * increased. Here we update the fair scheduling stats and
> @@ -5515,7 +5562,7 @@ enqueue_task_fair(struct rq *rq, struct
> task_struct *p, int flags) util_est_enqueue(>cfs, p);
>  
>   if (!rq->cfs.h_nr_running)
> - dl_server_start(>fair_server);
> + fair_server_watchdog_start(rq);
>  
>   /*
>* If in_iowait is set, the code below may not trigger any
> cpufreq @@ -5670,7 +5717,7 @@ static void dequeue_task_fair(struct rq
> *rq, struct task_struct *p, int flags) 
>  dequeue_throttle:
>   if (!rq->cfs.h_nr_running)
> - dl_server_stop(>fair_server);
> + fair_server_watchdog_stop(rq, true);
>  
>   util_est_dequeue(>cfs, p, task_sleep);
>   hrtick_update(rq);
> @@ -7123,6 +7170,7 @@ done: __maybe_unused;
>   hrtick_start_fair(rq, p);
>  
>   update_misfit_status(p, rq);
> + fair_server_watchdog_stop(rq, false);
>  
>   return p;
>  
> @@ -7178,6 +7226,8 @@ void fair_server_init(struct rq *rq)
>   dl_se->dl_period = 20 * TICK_NSEC;
>  
>   dl_server_init(dl_se, rq, fair_server_has_tasks,
> fair_server_pick); +
> + rq->fair_server_wd_running = 0;
>  }
>  
>  /*
> @@ -7192,6 +7242,9 @@ static void put_prev_task_fair(struct rq *rq,
> struct task_struct *prev) cfs_rq = cfs_rq_of(se);
>   put_prev_entity(cfs_rq, se);
>   }
> +
> + if (rq->cfs.h_nr_running)
> + fair_server_watchdog_start(rq);
>  }
>  
>  /*
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index bf8c9c07705c9..1e1a5436be725 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -375,6 +375,7 @@ extern void dl_server_init(struct sched_dl_entity
> *dl_se, struct rq *rq, dl_server_has_tasks_f has_tasks,
>   dl_server_pick_f pick);
>  
> +#define FAIR_SERVER_WATCHDOG_INTERVAL (HZ >> 1)
>  extern void fair_server_init(struct rq *);
>  
>  #ifdef CONFIG_CGROUP_SCHED
> @@ -962,6 +963,9 @@ struct rq {
>   struct dl_rq  

Re: [PATCH 2/5] spi: stm32: fix fifo threshold level in case of short transfer

2020-08-07 Thread Mark Brown
On Fri, Aug 07, 2020 at 03:21:22PM +0200, Alain Volmat wrote:

> + /* data_packet should not exceed transfer length */
> + packet = (half_fifo > xfer_len) ? xfer_len : half_fifo;

Please write normal conditional statements to improve legibility.


signature.asc
Description: PGP signature


[PATCH 1/5] spi: stm32h7: fix race condition at end of transfer

2020-08-07 Thread Alain Volmat
From: Antonio Borneo 

The caller of stm32_spi_transfer_one(), spi_transfer_one_message(),
is waiting for us to call spi_finalize_current_transfer() and will
eventually schedule a new transfer, if available.
We should guarantee that the spi controller is really available
before calling spi_finalize_current_transfer().

Move the call to spi_finalize_current_transfer() _after_ the call
to stm32_spi_disable().

Signed-off-by: Antonio Borneo 
Signed-off-by: Alain Volmat 
---
 drivers/spi/spi-stm32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 4a21feae0103..814a3ec3b8ad 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -971,8 +971,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void 
*dev_id)
spin_unlock_irqrestore(>lock, flags);
 
if (end) {
-   spi_finalize_current_transfer(master);
stm32h7_spi_disable(spi);
+   spi_finalize_current_transfer(master);
}
 
return IRQ_HANDLED;
-- 
2.7.4



[PATCH 3/5] spi: stm32: fix stm32_spi_prepare_mbr in case of odd clk_rate

2020-08-07 Thread Alain Volmat
From: Amelie Delaunay 

Fix spi->clk_rate when it is odd to the nearest lowest even value because
minimum SPI divider is 2.

Signed-off-by: Amelie Delaunay 
Signed-off-by: Alain Volmat 
---
 drivers/spi/spi-stm32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 005bc16bdf2a..bdd4e70c3f10 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -441,7 +441,8 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, u32 
speed_hz,
 {
u32 div, mbrdiv;
 
-   div = DIV_ROUND_UP(spi->clk_rate, speed_hz);
+   /* Ensure spi->clk_rate is even */
+   div = DIV_ROUND_UP(spi->clk_rate & ~0x1, speed_hz);
 
/*
 * SPI framework set xfer->speed_hz to master->max_speed_hz if
-- 
2.7.4



[PATCH 4/5] spi: stm32: fixes suspend/resume management

2020-08-07 Thread Alain Volmat
From: Amelie Delaunay 

This patch adds pinctrl power management, and reconfigure spi controller
in case of resume.

Fixes: 038ac869c9d2 ("spi: stm32: add runtime PM support")

Signed-off-by: Amelie Delaunay 
Signed-off-by: Alain Volmat 
---
 drivers/spi/spi-stm32.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index bdd4e70c3f10..e196dbc5c432 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2004,6 +2005,8 @@ static int stm32_spi_remove(struct platform_device *pdev)
 
pm_runtime_disable(>dev);
 
+   pinctrl_pm_select_sleep_state(>dev);
+
return 0;
 }
 
@@ -2015,13 +2018,18 @@ static int stm32_spi_runtime_suspend(struct device *dev)
 
clk_disable_unprepare(spi->clk);
 
-   return 0;
+   return pinctrl_pm_select_sleep_state(dev);
 }
 
 static int stm32_spi_runtime_resume(struct device *dev)
 {
struct spi_master *master = dev_get_drvdata(dev);
struct stm32_spi *spi = spi_master_get_devdata(master);
+   int ret;
+
+   ret = pinctrl_pm_select_default_state(dev);
+   if (ret)
+   return ret;
 
return clk_prepare_enable(spi->clk);
 }
@@ -2051,10 +2059,23 @@ static int stm32_spi_resume(struct device *dev)
return ret;
 
ret = spi_master_resume(master);
-   if (ret)
+   if (ret) {
clk_disable_unprepare(spi->clk);
+   return ret;
+   }
 
-   return ret;
+   ret = pm_runtime_get_sync(dev);
+   if (ret) {
+   dev_err(dev, "Unable to power device:%d\n", ret);
+   return ret;
+   }
+
+   spi->cfg->config(spi);
+
+   pm_runtime_mark_last_busy(dev);
+   pm_runtime_put_autosuspend(dev);
+
+   return 0;
 }
 #endif
 
-- 
2.7.4



[PATCH 2/5] spi: stm32: fix fifo threshold level in case of short transfer

2020-08-07 Thread Alain Volmat
From: Amelie Delaunay 

When transfer is shorter than half of the fifo, set the data packet size
up to transfer size instead of up to half of the fifo.
Check also that threshold is set at least to 1 data frame.

Signed-off-by: Amelie Delaunay 
Signed-off-by: Alain Volmat 
---
 drivers/spi/spi-stm32.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 814a3ec3b8ad..005bc16bdf2a 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -467,20 +467,24 @@ static int stm32_spi_prepare_mbr(struct stm32_spi *spi, 
u32 speed_hz,
 /**
  * stm32h7_spi_prepare_fthlv - Determine FIFO threshold level
  * @spi: pointer to the spi controller data structure
+ * @xfer_len: length of the message to be transferred
  */
-static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi)
+static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len)
 {
-   u32 fthlv, half_fifo;
+   u32 fthlv, half_fifo, packet;
 
/* data packet should not exceed 1/2 of fifo space */
half_fifo = (spi->fifo_size / 2);
 
+   /* data_packet should not exceed transfer length */
+   packet = (half_fifo > xfer_len) ? xfer_len : half_fifo;
+
if (spi->cur_bpw <= 8)
-   fthlv = half_fifo;
+   fthlv = packet;
else if (spi->cur_bpw <= 16)
-   fthlv = half_fifo / 2;
+   fthlv = packet / 2;
else
-   fthlv = half_fifo / 4;
+   fthlv = packet / 4;
 
/* align packet size with data registers access */
if (spi->cur_bpw > 8)
@@ -488,6 +492,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi)
else
fthlv -= (fthlv % 4); /* multiple of 4 */
 
+   if (!fthlv)
+   fthlv = 1;
+
return fthlv;
 }
 
@@ -1393,7 +1400,7 @@ static void stm32h7_spi_set_bpw(struct stm32_spi *spi)
cfg1_setb |= (bpw << STM32H7_SPI_CFG1_DSIZE_SHIFT) &
 STM32H7_SPI_CFG1_DSIZE;
 
-   spi->cur_fthlv = stm32h7_spi_prepare_fthlv(spi);
+   spi->cur_fthlv = stm32h7_spi_prepare_fthlv(spi, spi->cur_xferlen);
fthlv = spi->cur_fthlv - 1;
 
cfg1_clrb |= STM32H7_SPI_CFG1_FTHLV;
@@ -1588,6 +1595,8 @@ static int stm32_spi_transfer_one_setup(struct stm32_spi 
*spi,
 
spin_lock_irqsave(>lock, flags);
 
+   spi->cur_xferlen = transfer->len;
+
if (spi->cur_bpw != transfer->bits_per_word) {
spi->cur_bpw = transfer->bits_per_word;
spi->cfg->set_bpw(spi);
@@ -1635,8 +1644,6 @@ static int stm32_spi_transfer_one_setup(struct stm32_spi 
*spi,
goto out;
}
 
-   spi->cur_xferlen = transfer->len;
-
dev_dbg(spi->dev, "transfer communication mode set to %d\n",
spi->cur_comm);
dev_dbg(spi->dev,
-- 
2.7.4



[PATCH 5/5] spi: stm32: always perform registers configuration prior to transfer

2020-08-07 Thread Alain Volmat
SPI registers content may have been lost upon suspend/resume sequence.
So, always compute and apply the necessary configuration in
stm32_spi_transfer_one_setup routine.

Signed-off-by: Alain Volmat 
---
 drivers/spi/spi-stm32.c | 42 +-
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index e196dbc5c432..7968d23347b6 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1594,41 +1594,33 @@ static int stm32_spi_transfer_one_setup(struct 
stm32_spi *spi,
unsigned long flags;
unsigned int comm_type;
int nb_words, ret = 0;
+   int mbr;
 
spin_lock_irqsave(>lock, flags);
 
spi->cur_xferlen = transfer->len;
 
-   if (spi->cur_bpw != transfer->bits_per_word) {
-   spi->cur_bpw = transfer->bits_per_word;
-   spi->cfg->set_bpw(spi);
-   }
-
-   if (spi->cur_speed != transfer->speed_hz) {
-   int mbr;
-
-   /* Update spi->cur_speed with real clock speed */
-   mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz,
-   spi->cfg->baud_rate_div_min,
-   spi->cfg->baud_rate_div_max);
-   if (mbr < 0) {
-   ret = mbr;
-   goto out;
-   }
+   spi->cur_bpw = transfer->bits_per_word;
+   spi->cfg->set_bpw(spi);
 
-   transfer->speed_hz = spi->cur_speed;
-   stm32_spi_set_mbr(spi, mbr);
+   /* Update spi->cur_speed with real clock speed */
+   mbr = stm32_spi_prepare_mbr(spi, transfer->speed_hz,
+   spi->cfg->baud_rate_div_min,
+   spi->cfg->baud_rate_div_max);
+   if (mbr < 0) {
+   ret = mbr;
+   goto out;
}
 
-   comm_type = stm32_spi_communication_type(spi_dev, transfer);
-   if (spi->cur_comm != comm_type) {
-   ret = spi->cfg->set_mode(spi, comm_type);
+   transfer->speed_hz = spi->cur_speed;
+   stm32_spi_set_mbr(spi, mbr);
 
-   if (ret < 0)
-   goto out;
+   comm_type = stm32_spi_communication_type(spi_dev, transfer);
+   ret = spi->cfg->set_mode(spi, comm_type);
+   if (ret < 0)
+   goto out;
 
-   spi->cur_comm = comm_type;
-   }
+   spi->cur_comm = comm_type;
 
if (spi->cfg->set_data_idleness)
spi->cfg->set_data_idleness(spi, transfer->len);
-- 
2.7.4



[PATCH 0/5] spi: stm32: various driver fixes

2020-08-07 Thread Alain Volmat
This serie is a reduced version of the serie
[spi: stm32: various driver enhancements] previously sent.

Alain Volmat (1):
  spi: stm32: always perform registers configuration prior to transfer

Amelie Delaunay (3):
  spi: stm32: fix fifo threshold level in case of short transfer
  spi: stm32: fix stm32_spi_prepare_mbr in case of odd clk_rate
  spi: stm32: fixes suspend/resume management

Antonio Borneo (1):
  spi: stm32h7: fix race condition at end of transfer

 drivers/spi/spi-stm32.c | 95 ++---
 1 file changed, 58 insertions(+), 37 deletions(-)



[v2] i2c: mediatek: Fix i2c_spec_values description

2020-08-07 Thread matthias . bgg
From: Matthias Brugger 

The struct i2c_spec_values have it's members documented but is
missing the starting '@', which leads to warings like:

drivers/i2c/busses/i2c-mt65xx.c:267: warning: Function parameter or member 
'min_low_ns' not described in 'i2c_spec_values'

We also delete min_high_ns member as it is not used in the code.

Signed-off-by: Matthias Brugger 
---

Changes since v1:
delete mint_high_ns member

 drivers/i2c/busses/i2c-mt65xx.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index e889f74703e4..efc14041d45b 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -253,14 +253,13 @@ struct mtk_i2c {
 
 /**
  * struct i2c_spec_values:
- * min_low_ns: min LOW period of the SCL clock
- * min_su_sta_ns: min set-up time for a repeated START condition
- * max_hd_dat_ns: max data hold time
- * min_su_dat_ns: min data set-up time
+ * @min_low_ns: min LOW period of the SCL clock
+ * @min_su_sta_ns: min set-up time for a repeated START condition
+ * @max_hd_dat_ns: max data hold time
+ * @min_su_dat_ns: min data set-up time
  */
 struct i2c_spec_values {
unsigned int min_low_ns;
-   unsigned int min_high_ns;
unsigned int min_su_sta_ns;
unsigned int max_hd_dat_ns;
unsigned int min_su_dat_ns;
-- 
2.28.0



Re: [PATCH 1/2] module: Correctly truncate sysfs sections output

2020-08-07 Thread Jessica Yu

+++ Kees Cook [06/08/20 23:35 -0700]:

The only-root-readable /sys/module/$module/sections/$section files
did not truncate their output to the available buffer size. While most
paths into the kernfs read handlers end up using PAGE_SIZE buffers,
it's possible to get there through other paths (e.g. splice, sendfile).
Actually limit the output to the "count" passed into the read function,
and report it back correctly. *sigh*

Reported-by: kernel test robot 
Link: https://lore.kernel.org/lkml/20200805002015.GE23458@shao2-debian
Fixes: ed66f991bb19 ("module: Refactor section attr into bin attribute")
Cc: sta...@vger.kernel.org
Cc: Jessica Yu 
Signed-off-by: Kees Cook 


Oof, thanks for fixing this!

Acked-by: Jessica Yu 


---
kernel/module.c | 22 +++---
1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index aa183c9ac0a2..08c46084d8cc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1520,18 +1520,34 @@ struct module_sect_attrs {
struct module_sect_attr attrs[];
};

+#define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4))
static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
struct bin_attribute *battr,
char *buf, loff_t pos, size_t count)
{
struct module_sect_attr *sattr =
container_of(battr, struct module_sect_attr, battr);
+   char bounce[MODULE_SECT_READ_SIZE + 1];
+   size_t wrote;

if (pos != 0)
return -EINVAL;

-   return sprintf(buf, "0x%px\n",
-  kallsyms_show_value(file->f_cred) ? (void 
*)sattr->address : NULL);
+   /*
+* Since we're a binary read handler, we must account for the
+* trailing NUL byte that sprintf will write: if "buf" is
+* too small to hold the NUL, or the NUL is exactly the last
+* byte, the read will look like it got truncated by one byte.
+* Since there is no way to ask sprintf nicely to not write
+* the NUL, we have to use a bounce buffer.
+*/
+   wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n",
+kallsyms_show_value(file->f_cred)
+   ? (void *)sattr->address : NULL);
+   count = min(count, wrote);
+   memcpy(buf, bounce, count);
+
+   return count;
}

static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -1580,7 +1596,7 @@ static void add_sect_attrs(struct module *mod, const 
struct load_info *info)
goto out;
sect_attrs->nsections++;
sattr->battr.read = module_sect_read;
-   sattr->battr.size = 3 /* "0x", "\n" */ + (BITS_PER_LONG / 4);
+   sattr->battr.size = MODULE_SECT_READ_SIZE;
sattr->battr.attr.mode = 0400;
*(gattr++) = &(sattr++)->battr;
}
--
2.25.1



Re: [PATCH net 0/4] support multipacket broadcast message

2020-08-07 Thread Oleksij Rempel
On Fri, Aug 07, 2020 at 05:36:38PM +0800, Zhang Changzhong wrote:
> Hi Oleksij,
> 
> We have tested this j1939 stack according to SAE J1939-21. It works fine for
> most cases, but when we test multipacket broadcast message function we found
> the receiver can't receive those packets.
> 
> You can reproduce on CAN bus or vcan, for vcan case use cangw to connect vcan0
> and vcan1:
> sudo cangw -A -s vcan0 -d vcan1 -e
> sudo cangw -A -s vcan1 -d vcan0 -e
> 
> To reproduce it use following commands:
> testj1939 -B -r vcan1:0x90 &
> testj1939 -B -s20 vcan0:0x80 :,0x12300
> 
> Besides, candump receives correct packets while testj1939 receives nothing.

Ok, thank you!

i'm able to reproduce it and added following test:
https://github.com/linux-can/can-tests/blob/master/j1939/j1939_ac_1k_bam_local0.sh

-- 
Pengutronix e.K.   | |
Steuerwalder Str. 21   | http://www.pengutronix.de/  |
31137 Hildesheim, Germany  | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: PGP signature


[RFC PATCH 4/8] fsdax: copy data before write

2020-08-07 Thread Shiyang Ruan
Add dax_copy_edges() into each dax actor functions to perform CoW.

Signed-off-by: Shiyang Ruan 
---
 fs/dax.c | 39 +++
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 308678c58d4d..65553e3f7602 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1208,7 +1208,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t 
length, void *data,
return iov_iter_zero(min(length, end - pos), iter);
}
 
-   if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED))
+   if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED &&
+   !(iomap->flags & IOMAP_F_SHARED)))
return -EIO;
 
/*
@@ -1247,6 +1248,12 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t 
length, void *data,
break;
}
 
+   if (iomap != srcmap) {
+   ret = dax_copy_edges(pos, length, srcmap, kaddr, false);
+   if (ret)
+   break;
+   }
+
map_len = PFN_PHYS(map_len);
kaddr += offset;
map_len -= offset;
@@ -1358,6 +1365,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
vm_fault_t ret = 0;
void *entry;
pfn_t pfn;
+   void *kaddr;
 
trace_dax_pte_fault(inode, vmf, ret);
/*
@@ -1439,19 +1447,27 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
 
switch (iomap.type) {
case IOMAP_MAPPED:
+cow:
if (iomap.flags & IOMAP_F_NEW) {
count_vm_event(PGMAJFAULT);
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
error = dax_iomap_direct_access(, pos, PAGE_SIZE, ,
-   NULL);
+   );
if (error < 0)
goto error_finish_iomap;
 
entry = dax_insert_entry(, mapping, vmf, iomap.addr, entry,
 pfn, 0, write && !sync);
 
+   if (srcmap.type != IOMAP_HOLE) {
+   error = dax_copy_edges(pos, PAGE_SIZE, , kaddr,
+  false);
+   if (error)
+   goto error_finish_iomap;
+   }
+
/*
 * If we are doing synchronous page fault and inode needs fsync,
 * we can insert PTE into page tables only after that happens.
@@ -1475,12 +1491,15 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
 
goto finish_iomap;
case IOMAP_UNWRITTEN:
+   if (write && iomap.flags & IOMAP_F_SHARED)
+   goto cow;
+   fallthrough;
case IOMAP_HOLE:
if (!write) {
ret = dax_load_hole(, mapping, , vmf);
goto finish_iomap;
}
-   /*FALLTHRU*/
+   fallthrough;
default:
WARN_ON_ONCE(1);
error = -EIO;
@@ -1582,6 +1601,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
loff_t pos;
int error;
pfn_t pfn;
+   void *kaddr;
 
/*
 * Check whether offset isn't beyond end of file now. Caller is
@@ -1663,14 +1683,22 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
 
switch (iomap.type) {
case IOMAP_MAPPED:
+cow:
error = dax_iomap_direct_access(, pos, PMD_SIZE, ,
-   NULL);
+   );
if (error < 0)
goto finish_iomap;
 
entry = dax_insert_entry(, mapping, vmf, iomap.addr, entry,
 pfn, DAX_PMD, write && !sync);
 
+   if (srcmap.type != IOMAP_HOLE) {
+   error = dax_copy_edges(pos, PMD_SIZE, , kaddr,
+  true);
+   if (error)
+   goto unlock_entry;
+   }
+
/*
 * If we are doing synchronous page fault and inode needs fsync,
 * we can insert PMD into page tables only after that happens.
@@ -1689,6 +1717,9 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
result = vmf_insert_pfn_pmd(vmf, pfn, write);
break;
case IOMAP_UNWRITTEN:
+   if (write && iomap.flags & IOMAP_F_SHARED)
+   goto cow;
+   fallthrough;
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
   

Re: [PATCH stable v4.9 v2] arm64: entry: Place an SB sequence following an ERET instruction

2020-08-07 Thread Greg KH
On Thu, Aug 06, 2020 at 01:00:54PM -0700, Florian Fainelli wrote:
> 
> 
> On 7/20/2020 11:26 AM, Florian Fainelli wrote:
> > On 7/20/20 6:04 AM, Greg KH wrote:
> >> On Thu, Jul 09, 2020 at 12:50:23PM -0700, Florian Fainelli wrote:
> >>> From: Will Deacon 
> >>>
> >>> commit 679db70801da9fda91d26caf13bf5b5ccc74e8e8 upstream
> >>>
> >>> Some CPUs can speculate past an ERET instruction and potentially perform
> >>> speculative accesses to memory before processing the exception return.
> >>> Since the register state is often controlled by a lower privilege level
> >>> at the point of an ERET, this could potentially be used as part of a
> >>> side-channel attack.
> >>>
> >>> This patch emits an SB sequence after each ERET so that speculation is
> >>> held up on exception return.
> >>>
> >>> Signed-off-by: Will Deacon 
> >>> [florian: Adjust hyp-entry.S to account for the label
> >>>  added change to hyp/entry.S]
> >>> Signed-off-by: Florian Fainelli 
> >>> ---
> >>> Changes in v2:
> >>>
> >>> - added missing hunk in hyp/entry.S per Will's feedback
> >>
> >> What about 4.19.y and 4.14.y trees?  I can't take something for 4.9.y
> >> and then have a regression if someone moves to a newer release, right?
> > 
> > Sure, send you candidates for 4.14 and 4.19.
> 
> Greg, did you have a chance to queue those changes for 4.9, 4.14 and 4.19?
> 
> https://lore.kernel.org/linux-arm-kernel/20200720182538.13304-1-f.faine...@gmail.com/
> https://lore.kernel.org/linux-arm-kernel/20200720182937.14099-1-f.faine...@gmail.com/
> https://lore.kernel.org/linux-arm-kernel/20200709195034.15185-1-f.faine...@gmail.com/

Nope, I was waiting for Will's "ack" for these.

thanks,

greg k-h


Re: [PATCH v11 0/6] KASAN-KUnit Integration

2020-08-07 Thread Andrey Konovalov
On Wed, Aug 5, 2020 at 6:29 AM David Gow  wrote:
>
> This patchset contains everything needed to integrate KASAN and KUnit.
>
> KUnit will be able to:
> (1) Fail tests when an unexpected KASAN error occurs
> (2) Pass tests when an expected KASAN error occurs
>
> Convert KASAN tests to KUnit with the exception of copy_user_test
> because KUnit is unable to test those.
>
> Add documentation on how to run the KASAN tests with KUnit and what to
> expect when running these tests.
>
> This patchset depends on:
> - "kunit: extend kunit resources API" [1]
>  - This is included in the KUnit 5.9-rci pull request[8]
>
> I'd _really_ like to get this into 5.9 if possible: we also have some
> other changes which depend on some things here.

Found a small issue in patch #3, but otherwise:

Tested-by: Andrey Konovalov 

for the series.

The patches apply cleanly on top of the latest linux-next/akpm branch.

There are some tests that fail for tag-based mode, but those are
unrelated to this series, and require KASAN improvements.

>
> Changes from v10:
>  - Fixed some whitespace issues in patch 2.
>  - Split out the renaming of the KUnit test suite into a separate patch.
>
> Changes from v9:
>  - Rebased on top of linux-next (20200731) + kselftest/kunit and [7]
>  - Note that the kasan_rcu_uaf test has not been ported to KUnit, and
>remains in test_kasan_module. This is because:
>(a) KUnit's expect failure will not check if the RCU stacktraces
>show.
>(b) KUnit is unable to link the failure to the test, as it occurs in
>an RCU callback.
>
> Changes from v8:
>  - Rebased on top of kselftest/kunit
>  - (Which, with this patchset, should rebase cleanly on 5.8-rc7)
>  - Renamed the KUnit test suite, config name to patch the proposed
>naming guidelines for KUnit tests[6]
>
> Changes from v7:
>  - Rebased on top of kselftest/kunit
>  - Rebased on top of v4 of the kunit resources API[1]
>  - Rebased on top of v4 of the FORTIFY_SOURCE fix[2,3,4]
>  - Updated the Kconfig entry to support KUNIT_ALL_TESTS
>
> Changes from v6:
>  - Rebased on top of kselftest/kunit
>  - Rebased on top of Daniel Axtens' fix for FORTIFY_SOURCE
>incompatibilites [2]
>  - Removed a redundant report_enabled() check.
>  - Fixed some places with out of date Kconfig names in the
>documentation.
>
> Changes from v5:
>  - Split out the panic_on_warn changes to a separate patch.
>  - Fix documentation to fewer to the new Kconfig names.
>  - Fix some changes which were in the wrong patch.
>  - Rebase on top of kselftest/kunit (currently identical to 5.7-rc1)
>
> Changes from v4:
>  - KASAN no longer will panic on errors if both panic_on_warn and
>kasan_multishot are enabled.
>  - As a result, the KASAN tests will no-longer disable panic_on_warn.
>  - This also means panic_on_warn no-longer needs to be exported.
>  - The use of temporary "kasan_data" variables has been cleaned up
>somewhat.
>  - A potential refcount/resource leak should multiple KASAN errors
>appear during an assertion was fixed.
>  - Some wording changes to the KASAN test Kconfig entries.
>
> Changes from v3:
>  - KUNIT_SET_KASAN_DATA and KUNIT_DO_EXPECT_KASAN_FAIL have been
>  combined and included in KUNIT_DO_EXPECT_KASAN_FAIL() instead.
>  - Reordered logic in kasan_update_kunit_status() in report.c to be
>  easier to read.
>  - Added comment to not use the name "kasan_data" for any kunit tests
>  outside of KUNIT_EXPECT_KASAN_FAIL().
>
> Changes since v2:
>  - Due to Alan's changes in [1], KUnit can be built as a module.
>  - The name of the tests that could not be run with KUnit has been
>  changed to be more generic: test_kasan_module.
>  - Documentation on how to run the new KASAN tests and what to expect
>  when running them has been added.
>  - Some variables and functions are now static.
>  - Now save/restore panic_on_warn in a similar way to kasan_multi_shot
>  and renamed the init/exit functions to be more generic to accommodate.
>  - Due to [4] in kasan_strings, kasan_memchr, and
>  kasan_memcmp will fail if CONFIG_AMD_MEM_ENCRYPT is enabled so return
>  early and print message explaining this circumstance.
>  - Changed preprocessor checks to C checks where applicable.
>
> Changes since v1:
>  - Make use of Alan Maguire's suggestion to use his patch that allows
>static resources for integration instead of adding a new attribute to
>the kunit struct
>  - All KUNIT_EXPECT_KASAN_FAIL statements are local to each test
>  - The definition of KUNIT_EXPECT_KASAN_FAIL is local to the
>test_kasan.c file since it seems this is the only place this will
>be used.
>  - Integration relies on KUnit being builtin
>  - copy_user_test has been separated into its own file since KUnit
>is unable to test these. This can be run as a module just as before,
>using CONFIG_TEST_KASAN_USER
>  - The addition to the current task has been separated into its own
>patch as this is a significant enough change to be on its own.
>
>
> [1] 

[GIT PULL] Please pull powerpc/linux.git powerpc-5.9-1 tag

2020-08-07 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

Hi Linus,

Please pull powerpc updates for 5.9.

Just one minor conflict, in a comment in drivers/misc/ocxl/config.c.

Notable out of area changes:
  arch/m68k/include/asm/adb_iop.h   # c66da95a39ec macintosh/adb-iop: 
Implement SRQ autopolling
  drivers/md/dm-writecache.c# 3e79f082ebfc libnvdimm/nvdimm/flush: 
Allow architecture to override the flush barrier
  drivers/nvdimm/region_devs.c
  include/asm-generic/barrier.h
  drivers/nvdimm/of_pmem.c  # 8c26ab72663b powerpc/pmem: Initialize 
pmem device on newer hardware
  include/asm-generic/qspinlock.h   # 20c0e8269e9d powerpc/pseries: 
Implement paravirt qspinlocks for SPLPAR
  include/linux/cpuhotplug.h# 1a8f0886a600 powerpc/perf/hv-24x7: 
Add cpu hotplug support
  include/linux/kexec.h # f891f19736bd kexec_file: Allow archs 
to handle special regions while locating memory hole
  kernel/kexec_file.c
  include/trace/events/mmflags.h# 5c9fa16e8abd powerpc/64s: Remove 
PROT_SAO support
  include/linux/mm.h
  mm/ksm.c

cheers


The following changes since commit 48778464bb7d346b47157d21ffde2af6b2d39110:

  Linux 5.8-rc2 (2020-06-21 15:45:29 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.9-1

for you to fetch changes up to a7aaa2f26bfd932a654706b19859e7adf802bee2:

  selftests/powerpc: Fix pkey syscall redefinitions (2020-08-05 10:14:03 +1000)


- --
powerpc updates for 5.9

 - Add support for (optionally) using queued spinlocks & rwlocks.

 - Support for a new faster system call ABI using the scv instruction on Power9
   or later.

 - Drop support for the PROT_SAO mmap/mprotect flag as it will be unsupported on
   Power10 and future processors, leaving us with no way to implement the
   functionality it requests. This risks breaking userspace, though we believe
   it is unused in practice.

 - A bug fix for, and then the removal of, our custom stack expansion checking.
   We now allow stack expansion up to the rlimit, like other architectures.

 - Remove the remnants of our (previously disabled) topology update code, which
   tried to react to NUMA layout changes on virtualised systems, but was prone
   to crashes and other problems.

 - Add PMU support for Power10 CPUs.

 - A change to our signal trampoline so that we don't unbalance the link stack
   (branch return predictor) in the signal delivery path.

 - Lots of other cleanups, refactorings, smaller features and so on as usual.

Thanks to:
  Abhishek Goel, Alastair D'Silva, Alexander A. Klimov, Alexey Kardashevskiy,
  Alistair Popple, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anton
  Blanchard, Arnd Bergmann, Athira Rajeev, Balamuruhan S, Bharata B Rao, Bill
  Wendling, Bin Meng, Cédric Le Goater, Chris Packham, Christophe Leroy,
  Christoph Hellwig, Daniel Axtens, Dan Williams, David Lamparter, Desnes A.
  Nunes do Rosario, Erhard F., Finn Thain, Frederic Barrat, Ganesh Goudar,
  Gautham R. Shenoy, Geoff Levand, Greg Kurz, Gustavo A. R. Silva, Hari Bathini,
  Harish, Imre Kaloz, Joel Stanley, Joe Perches, John Crispin, Jordan Niethe,
  Kajol Jain, Kamalesh Babulal, Kees Cook, Laurent Dufour, Leonardo Bras, Li
  RongQing, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Michal
  Suchanek, Milton Miller, Mimi Zohar, Murilo Opsfelder Araujo, Nathan
  Chancellor, Nathan Lynch, Naveen N. Rao, Nayna Jain, Nicholas Piggin, Oliver
  O'Halloran, Palmer Dabbelt, Pedro Miraglia Franco de Carvalho, Philippe
  Bergheaud, Pingfan Liu, Pratik Rajesh Sampat, Qian Cai, Qinglang Miao, Randy
  Dunlap, Ravi Bangoria, Sachin Sant, Sam Bobroff, Sandipan Das, Santosh
  Sivaraj, Satheesh Rajendran, Shirisha Ganta, Sourabh Jain, Srikar Dronamraju,
  Stan Johnson, Stephen Rothwell, Thadeu Lima de Souza Cascardo, Thiago Jung
  Bauermann, Tom Lane, Vaibhav Jain, Vladis Dronov, Wei Yongjun, Wen Xiong,
  YueHaibing.

- --
Abhishek Goel (1):
  cpuidle/powernv : Remove dead code block

Alastair D'Silva (2):
  ocxl: Remove unnecessary externs
  ocxl: Address kernel doc errors & warnings

Alexander A. Klimov (5):
  ocxl: Replace HTTP links with HTTPS ones
  powerpc/Kconfig: Replace HTTP links with HTTPS ones
  powerpc: Replace HTTP links with HTTPS ones
  macintosh/adb: Replace HTTP links with HTTPS ones
  macintosh/therm_adt746x: Replace HTTP links with HTTPS ones

Alexey Kardashevskiy (2):
  powerpc/xive: Ignore kmemleak false positives
  powerpc/powernv/ioda: Return correct error if TCE level allocation failed

Aneesh Kumar K.V (37):
  powerpc/mm/book3s64: Skip 16G page reservation with radix
  powerpc/pmem: Restrict papr_scm to P8 and above.
  powerpc/pmem: Add new instructions for persistent storage and sync
  powerpc/pmem: Add 

[RFC PATCH 2/8] fsdax, mm: track files sharing dax page for memory-failure

2020-08-07 Thread Shiyang Ruan
When memory-failure occurs on a pmem device which contains a filesystem,
we need to find out which files are in using, and then notify processes
that are using these files to handle the error.

The design of the track method is as follow:
1. dax_assocaite_entry() associates the owner's info to this page
- For non-reflink case:
  page->mapping,->index stores the file's mapping, offset in file.
A dax page is not shared by other files. dax_associate_entry() is
called only once.  So, use page->mapping,->index to store the
owner's info.
- For reflink case:
  page->mapping,->index stores the block device, offset in device.
A dax page is shared more than once.  So, dax_assocaite_entry()
will be called more than once.  We introduce page->zone_device_data
as reflink counter, to indicate that this page is shared and how
many owners now is using this page. The page->mapping,->index is
used to store the block_device of the fs and page offset of this
device.

2. dax_lock_page() calls query interface to lock each dax entry
- For non-reflink case:
  owner's info is stored in page->mapping,->index.
So, It is easy to lock its dax entry.
- For reflink case:
  owner's info is obtained by calling get_shared_files(), which is
  implemented by FS.
The FS context could be found in block_device that stored by
page->mapping.  Then lock the dax entries of the owners.

In memory-failure(), since owners list has been obtained in
dax_lock_page(), just iterate the list and handle the error.

Signed-off-by: Shiyang Ruan 
---
 fs/dax.c| 111 +++
 include/linux/dax.h |   6 +-
 include/linux/mm.h  |   8 +++
 mm/memory-failure.c | 138 +++-
 4 files changed, 183 insertions(+), 80 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 11b16729b86f..47380f75ef38 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -329,7 +329,7 @@ static unsigned long dax_end_pfn(void *entry)
  * offsets.
  */
 static void dax_associate_entry(void *entry, struct address_space *mapping,
-   struct vm_area_struct *vma, unsigned long address)
+   struct vm_fault *vmf, pgoff_t offset)
 {
unsigned long size = dax_entry_size(entry), pfn, index;
int i = 0;
@@ -337,13 +337,27 @@ static void dax_associate_entry(void *entry, struct 
address_space *mapping,
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
 
-   index = linear_page_index(vma, address & ~(size - 1));
+   index = linear_page_index(vmf->vma, vmf->address & ~(size - 1));
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
 
-   WARN_ON_ONCE(page->mapping);
-   page->mapping = mapping;
-   page->index = index + i++;
+   BUG_ON(!page->mapping && IS_FSDAX_SHARED(page));
+
+   /* Use zone_device_data as reflink counter here.
+* If one page is associated for the first time, then use the
+* ->mapping,->index as normal.  For the second time it is
+* assocated, we store the block_device that this page belongs
+* to in ->mapping and the offset within this block_device in
+* ->index, and increase the reflink counter.
+*/
+   if (!page->mapping) {
+   page->mapping = mapping;
+   page->index = index + i++;
+   } else {
+   page->mapping = (struct address_space 
*)mapping->host->i_sb->s_bdev;
+   page->index = offset;
+   page->zone_device_data++;
+   }
}
 }
 
@@ -359,9 +373,12 @@ static void dax_disassociate_entry(void *entry, struct 
address_space *mapping,
struct page *page = pfn_to_page(pfn);
 
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
-   WARN_ON_ONCE(page->mapping && page->mapping != mapping);
-   page->mapping = NULL;
-   page->index = 0;
+   if (!IS_FSDAX_SHARED(page)) {
+   page->mapping = NULL;
+   page->index = 0;
+   } else {
+   page->zone_device_data--;
+   }
}
 }
 
@@ -386,7 +403,7 @@ static struct page *dax_busy_page(void *entry)
  * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
  * not be locked.
  */
-dax_entry_t dax_lock_page(struct page *page)
+void _dax_lock_page(struct page *page, struct shared_files *sfp)
 {
XA_STATE(xas, NULL, 0);
void *entry;
@@ -394,7 +411,7 @@ dax_entry_t dax_lock_page(struct page *page)
/* Ensure page->mapping isn't freed while we look at it */
rcu_read_lock();
for (;;) {
-   struct address_space *mapping = READ_ONCE(page->mapping);
+   struct address_space *mapping = READ_ONCE(sfp->mapping);
 

[RFC PATCH 8/8] fs/xfs: support dedupe for fsdax

2020-08-07 Thread Shiyang Ruan
Use xfs_break_layouts() to break files' layouts when locking them.  And
call dax_file_range_compare() function to compare range for files both
have DAX flag.

Signed-off-by: Shiyang Ruan 
---
 fs/xfs/xfs_reflink.c | 78 ++--
 1 file changed, 46 insertions(+), 32 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index f87ab78dd421..b2901ad1a269 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -29,6 +29,7 @@
 #include "xfs_iomap.h"
 #include "xfs_sb.h"
 #include "xfs_ag_resv.h"
+#include 
 
 /*
  * Copy on Write of Shared Blocks
@@ -1185,47 +1186,41 @@ xfs_reflink_remap_blocks(
  * back out both locks.
  */
 static int
-xfs_iolock_two_inodes_and_break_layout(
-   struct inode*src,
-   struct inode*dest)
+xfs_reflink_remap_lock_and_break_layouts(
+   struct file *file_in,
+   struct file *file_out)
 {
int error;
+   struct inode*inode_in = file_inode(file_in);
+   struct xfs_inode*src = XFS_I(inode_in);
+   struct inode*inode_out = file_inode(file_out);
+   struct xfs_inode*dest = XFS_I(inode_out);
+   uintiolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 
-   if (src > dest)
+   if (inode_in > inode_out) {
+   swap(inode_in, inode_out);
swap(src, dest);
-
-retry:
-   /* Wait to break both inodes' layouts before we start locking. */
-   error = break_layout(src, true);
-   if (error)
-   return error;
-   if (src != dest) {
-   error = break_layout(dest, true);
-   if (error)
-   return error;
}
 
-   /* Lock one inode and make sure nobody got in and leased it. */
-   inode_lock(src);
-   error = break_layout(src, false);
+   inode_lock(inode_in);
+   xfs_ilock(src, XFS_MMAPLOCK_EXCL);
+   error = xfs_break_layouts(inode_in, , BREAK_UNMAP);
+   xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
if (error) {
-   inode_unlock(src);
-   if (error == -EWOULDBLOCK)
-   goto retry;
+   inode_unlock(inode_in);
return error;
}
 
-   if (src == dest)
+   if (inode_in == inode_out)
return 0;
 
-   /* Lock the other inode and make sure nobody got in and leased it. */
-   inode_lock_nested(dest, I_MUTEX_NONDIR2);
-   error = break_layout(dest, false);
+   inode_lock_nested(inode_out, I_MUTEX_NONDIR2);
+   xfs_ilock(dest, XFS_MMAPLOCK_EXCL);
+   error = xfs_break_layouts(inode_out, , BREAK_UNMAP);
+   xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (error) {
-   inode_unlock(src);
-   inode_unlock(dest);
-   if (error == -EWOULDBLOCK)
-   goto retry;
+   inode_unlock(inode_in);
+   inode_unlock(inode_out);
return error;
}
 
@@ -1244,6 +1239,11 @@ xfs_reflink_remap_unlock(
struct xfs_inode*dest = XFS_I(inode_out);
boolsame_inode = (inode_in == inode_out);
 
+   if (inode_in > inode_out) {
+   swap(inode_in, inode_out);
+   swap(src, dest);
+   }
+
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
@@ -1274,6 +1274,14 @@ xfs_reflink_zero_posteof(
_buffered_write_iomap_ops);
 }
 
+int xfs_reflink_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+ struct inode *dest, loff_t destoff,
+ loff_t len, bool *is_same)
+{
+   return dax_file_range_compare(src, srcoff, dest, destoff, len, is_same,
+ _read_iomap_ops);
+}
+
 /*
  * Prepare two files for range cloning.  Upon a successful return both inodes
  * will have the iolock and mmaplock held, the page cache of the out file will
@@ -1318,9 +1326,10 @@ xfs_reflink_remap_prep(
struct xfs_inode*dest = XFS_I(inode_out);
boolsame_inode = (inode_in == inode_out);
ssize_t ret;
+   compare_range_t cmp;
 
/* Lock both files against IO */
-   ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out);
+   ret = xfs_reflink_remap_lock_and_break_layouts(file_in, file_out);
if (ret)
return ret;
if (same_inode)
@@ -1335,12 +1344,17 @@ xfs_reflink_remap_prep(
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
goto out_unlock;
 
-   /* Don't share DAX file data for now. */
-   if (IS_DAX(inode_in) || IS_DAX(inode_out))
+   /* Don't share DAX file data with non-DAX file. */
+   if (IS_DAX(inode_in) != 

[RFC PATCH 0/8] fsdax: introduce FS query interface to support reflink

2020-08-07 Thread Shiyang Ruan
This patchset is a try to resolve the problem of tracking shared page
for fsdax.

Instead of per-page tracking method, this patchset introduces a query
interface: get_shared_files(), which is implemented by each FS, to
obtain the owners of a shared page.  It returns an owner list of this
shared page.  Then, the memory-failure() iterates the list to be able
to notify each process using files that sharing this page.

The design of the tracking method is as follow:
1. dax_assocaite_entry() associates the owner's info to this page
- For non-reflink case:
  page->mapping,->index stores the file's mapping, offset in file.
A dax page is not shared by other files. dax_associate_entry() is
called only once.  So, use page->mapping,->index to store the
owner's info.
- For reflink case:
  page->mapping,->index stores the block device, offset in device.
A dax page is shared more than once.  So, dax_assocaite_entry()
will be called more than once.  We introduce page->zone_device_data
as reflink counter, to indicate that this page is shared and how
many owners now is using this page. The page->mapping,->index is
used to store the block_device of the fs and page offset of this
device.

2. dax_lock_page() calls query interface to lock each dax entry
- For non-reflink case:
  owner's info is stored in page->mapping,->index.
So, It is easy to lock its dax entry.
- For reflink case:
  owner's info is obtained by calling get_shared_files(), which is
  implemented by FS.
The FS context could be found in block_device that stored by
page->mapping.  Then lock the dax entries of the owners.

In memory-failure(), since the owner list has been obtained in 
dax_lock_page(), just iterate the list and handle the error.  This
patchset didn't handle the memory failure on metadata of FS because
I haven't found a way to distinguish whether this page contains
matadata yet.  Still working on it.

==
I also borrowed and made some changes on Goldwyn's patchsets.
These patches makes up for the lack of CoW mechanism in fsdax.

The rests are dax & reflink support for xfs.

(Rebased on v5.8)
==
Shiyang Ruan (8):
  fs: introduce get_shared_files() for dax
  fsdax, mm: track files sharing dax page for memory-failure
  fsdax: introduce dax_copy_edges() for COW
  fsdax: copy data before write
  fsdax: replace mmap entry in case of CoW
  fsdax: dedup file range to use a compare function
  fs/xfs: handle CoW for fsdax write() path
  fs/xfs: support dedupe for fsdax

 fs/btrfs/reflink.c |   3 +-
 fs/dax.c   | 302 +++--
 fs/ocfs2/file.c|   2 +-
 fs/read_write.c|  11 +-
 fs/xfs/xfs_bmap_util.c |   6 +-
 fs/xfs/xfs_file.c  |  10 +-
 fs/xfs/xfs_iomap.c |   3 +-
 fs/xfs/xfs_iops.c  |  11 +-
 fs/xfs/xfs_reflink.c   |  80 ++-
 fs/xfs/xfs_super.c |  67 +
 include/linux/dax.h|  18 ++-
 include/linux/fs.h |  11 +-
 include/linux/mm.h |   8 ++
 mm/memory-failure.c| 138 ---
 14 files changed, 529 insertions(+), 141 deletions(-)

-- 
2.27.0





[RFC PATCH 6/8] fsdax: dedup file range to use a compare function

2020-08-07 Thread Shiyang Ruan
With dax we cannot deal with readpage() etc. So, we create a
funciton callback to perform the file data comparison and pass
it to generic_remap_file_range_prep() so it can use iomap-based
functions.

This may not be the best way to solve this. Suggestions welcome.

Signed-off-by: Goldwyn Rodrigues 
Signed-off-by: Shiyang Ruan 
---
 fs/btrfs/reflink.c   |  3 +-
 fs/dax.c | 67 
 fs/ocfs2/file.c  |  2 +-
 fs/read_write.c  | 11 
 fs/xfs/xfs_reflink.c |  2 +-
 include/linux/dax.h  |  5 
 include/linux/fs.h   |  9 +-
 7 files changed, 90 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 040009d1cc31..d0412bc338da 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -765,7 +765,8 @@ static int btrfs_remap_file_range_prep(struct file 
*file_in, loff_t pos_in,
return ret;
 
return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-   len, remap_flags);
+   len, remap_flags,
+   vfs_dedupe_file_range_compare);
 }
 
 loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
diff --git a/fs/dax.c b/fs/dax.c
index 28b8e23b11ac..80a9946fd25a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -30,6 +30,8 @@
 #define CREATE_TRACE_POINTS
 #include 
 
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
 static inline unsigned int pe_order(enum page_entry_size pe_size)
 {
if (pe_size == PE_SIZE_PTE)
@@ -1874,3 +1876,68 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
return dax_insert_pfn_mkwrite(vmf, pfn, order);
 }
 EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
+
+int dax_file_range_compare(struct inode *src, loff_t srcoff, struct inode 
*dest,
+   loff_t destoff, loff_t len, bool *is_same,
+   const struct iomap_ops *ops)
+{
+   void *saddr, *daddr;
+   struct iomap smap = { 0 };
+   struct iomap dmap = { 0 };
+   bool same = false;
+   loff_t cmp_len;
+   int id, ret = 0;
+
+   id = dax_read_lock();
+   while (len) {
+   ret = ops->iomap_begin(src, srcoff, len, 0, , NULL);
+   if (ret < 0)
+   goto out_src;
+   cmp_len = MIN(len, smap.offset + smap.length - srcoff);
+
+   ret = ops->iomap_begin(dest, destoff, cmp_len, 0, , NULL);
+   if (ret < 0)
+   goto out_dest;
+   cmp_len = MIN(cmp_len, dmap.offset + dmap.length - destoff);
+
+   if (smap.type == IOMAP_HOLE && dmap.type == IOMAP_HOLE)
+   goto next;
+
+   if (smap.type == IOMAP_HOLE || dmap.type == IOMAP_HOLE) {
+   same = false;
+   goto next;
+   }
+
+   ret = dax_iomap_direct_access(, srcoff,
+ ALIGN(srcoff + cmp_len, PAGE_SIZE), NULL, );
+   if (ret < 0)
+   goto out_dest;
+
+   ret = dax_iomap_direct_access(, destoff,
+ ALIGN(destoff + cmp_len, PAGE_SIZE), NULL, );
+   if (ret < 0)
+   goto out_dest;
+
+   same = !memcmp(saddr, daddr, cmp_len);
+   if (!same)
+   break;
+next:
+   len -= cmp_len;
+   srcoff += cmp_len;
+   destoff += cmp_len;
+out_dest:
+   if (ops->iomap_end)
+   ops->iomap_end(dest, destoff, cmp_len, 0, 0, );
+out_src:
+   if (ops->iomap_end)
+   ops->iomap_end(src, srcoff, len, 0, 0, );
+
+   if (ret < 0)
+   goto out;
+   }
+   *is_same = same;
+out:
+   dax_read_unlock(id);
+   return ret;
+}
+EXPORT_SYMBOL_GPL(dax_file_range_compare);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 85979e2214b3..9d101f129d16 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2591,7 +2591,7 @@ static loff_t ocfs2_remap_file_range(struct file 
*file_in, loff_t pos_in,
goto out_unlock;
 
ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-   , remap_flags);
+   , remap_flags, vfs_dedupe_file_range_compare);
if (ret < 0 || len == 0)
goto out_unlock;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 4fb797822567..2974a624f232 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1927,7 +1927,7 @@ static void vfs_unlock_two_pages(struct page *page1, 
struct page *page2)
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
  */
-static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 

[RFC PATCH 5/8] fsdax: replace mmap entry in case of CoW

2020-08-07 Thread Shiyang Ruan
We replace the existing entry to the newly allocated one
in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE
so writeback marks this entry as writeprotected. This
helps us snapshots so new write pagefaults after snapshots
trigger a CoW.

Signed-off-by: Goldwyn Rodrigues 
Signed-off-by: Shiyang Ruan 
---
 fs/dax.c | 31 +++
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 65553e3f7602..28b8e23b11ac 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -766,6 +766,9 @@ static int copy_user_dax(struct block_device *bdev, struct 
dax_device *dax_dev,
return 0;
 }
 
+#define DAX_IF_DIRTY   (1ULL << 0)
+#define DAX_IF_COW (1ULL << 1)
+
 /*
  * By this point grab_mapping_entry() has ensured that we have a locked entry
  * of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -776,14 +779,16 @@ static int copy_user_dax(struct block_device *bdev, 
struct dax_device *dax_dev,
 static void *dax_insert_entry(struct xa_state *xas,
struct address_space *mapping, struct vm_fault *vmf,
pgoff_t bdoff, void *entry, pfn_t pfn, unsigned long flags,
-   bool dirty)
+   bool insert_flags)
 {
void *new_entry = dax_make_entry(pfn, flags);
+   bool dirty = insert_flags & DAX_IF_DIRTY;
+   bool cow = insert_flags & DAX_IF_COW;
 
if (dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-   if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+   if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
unsigned long index = xas->xa_index;
/* we are replacing a zero page with block mapping */
if (dax_is_pmd_entry(entry))
@@ -795,7 +800,7 @@ static void *dax_insert_entry(struct xa_state *xas,
 
xas_reset(xas);
xas_lock_irq(xas);
-   if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+   if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
void *old;
 
dax_disassociate_entry(entry, mapping, false);
@@ -819,6 +824,9 @@ static void *dax_insert_entry(struct xa_state *xas,
if (dirty)
xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
 
+   if (cow)
+   xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
+
xas_unlock_irq(xas);
return entry;
 }
@@ -1366,6 +1374,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
void *entry;
pfn_t pfn;
void *kaddr;
+   unsigned long insert_flags = 0;
 
trace_dax_pte_fault(inode, vmf, ret);
/*
@@ -1491,8 +1500,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
 
goto finish_iomap;
case IOMAP_UNWRITTEN:
-   if (write && iomap.flags & IOMAP_F_SHARED)
+   if (write && (iomap.flags & IOMAP_F_SHARED)) {
+   insert_flags |= DAX_IF_COW;
goto cow;
+   }
fallthrough;
case IOMAP_HOLE:
if (!write) {
@@ -1602,6 +1613,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
int error;
pfn_t pfn;
void *kaddr;
+   unsigned long insert_flags = 0;
 
/*
 * Check whether offset isn't beyond end of file now. Caller is
@@ -1717,14 +1729,17 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
result = vmf_insert_pfn_pmd(vmf, pfn, write);
break;
case IOMAP_UNWRITTEN:
-   if (write && iomap.flags & IOMAP_F_SHARED)
+   if (write && (iomap.flags & IOMAP_F_SHARED)) {
+   insert_flags |= DAX_IF_COW;
goto cow;
+   }
fallthrough;
case IOMAP_HOLE:
-   if (WARN_ON_ONCE(write))
+   if (!write) {
+   result = dax_pmd_load_hole(, vmf, , );
break;
-   result = dax_pmd_load_hole(, vmf, , );
-   break;
+   }
+   fallthrough;
default:
WARN_ON_ONCE(1);
break;
-- 
2.27.0





[RFC PATCH 3/8] fsdax: introduce dax_copy_edges() for COW

2020-08-07 Thread Shiyang Ruan
Add address output in dax_iomap_pfn() in order to perform a memcpy() in CoW
case.  Since this function both output address and pfn, rename it to
dax_iomap_direct_access().

dax_copy_edges() is a helper functions performs a copy from one part of
the device to another for data not page aligned.

Signed-off-by: Goldwyn Rodrigues 
Signed-off-by: Shiyang Ruan 
---
 fs/dax.c | 64 ++--
 1 file changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 47380f75ef38..308678c58d4d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1043,8 +1043,8 @@ static sector_t dax_iomap_sector(struct iomap *iomap, 
loff_t pos)
return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
 }
 
-static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
-pfn_t *pfnp)
+static int dax_iomap_direct_access(struct iomap *iomap, loff_t pos, size_t 
size,
+pfn_t *pfnp, void **addr)
 {
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
@@ -1055,12 +1055,14 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t 
pos, size_t size,
if (rc)
return rc;
id = dax_read_lock();
-   length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-  NULL, pfnp);
+   length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), addr,
+  pfnp);
if (length < 0) {
rc = length;
goto out;
}
+   if (!pfnp)
+   goto out_check_addr;
rc = -EINVAL;
if (PFN_PHYS(length) < size)
goto out;
@@ -1070,11 +1072,59 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t 
pos, size_t size,
if (length > 1 && !pfn_t_devmap(*pfnp))
goto out;
rc = 0;
+
+out_check_addr:
+   if (!addr)
+   goto out;
+   if (!*addr)
+   rc = -EFAULT;
 out:
dax_read_unlock(id);
return rc;
 }
 
+/*
+ * dax_copy_edges - Copies the part of the pages not included in
+ * the write, but required for CoW because
+ * offset/offset+length are not page aligned.
+ */
+static int dax_copy_edges(loff_t pos, loff_t length, struct iomap *srcmap,
+ void *daddr, bool pmd)
+{
+   size_t page_size = pmd ? PMD_SIZE : PAGE_SIZE;
+   loff_t offset = pos & (page_size - 1);
+   size_t size = ALIGN(offset + length, page_size);
+   loff_t end = pos + length;
+   loff_t pg_end = round_up(end, page_size);
+   void *saddr = 0;
+   int ret = 0;
+
+   ret = dax_iomap_direct_access(srcmap, pos, size, NULL, );
+   if (ret)
+   return ret;
+   /*
+* Copy the first part of the page
+* Note: we pass offset as length
+*/
+   if (offset) {
+   if (saddr)
+   ret = memcpy_mcsafe(daddr, saddr, offset);
+   else
+   memset(daddr, 0, offset);
+   }
+
+   /* Copy the last part of the range */
+   if (end < pg_end) {
+   if (saddr)
+   ret = memcpy_mcsafe(daddr + offset + length,
+  saddr + offset + length, pg_end - end);
+   else
+   memset(daddr + offset + length, 0,
+   pg_end - end);
+   }
+   return ret;
+}
+
 /*
  * The user has performed a load from a hole in the file.  Allocating a new
  * page in the file would cause excessive storage usage for workloads with
@@ -1394,7 +1444,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
major = VM_FAULT_MAJOR;
}
-   error = dax_iomap_pfn(, pos, PAGE_SIZE, );
+   error = dax_iomap_direct_access(, pos, PAGE_SIZE, ,
+   NULL);
if (error < 0)
goto error_finish_iomap;
 
@@ -1612,7 +1663,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault 
*vmf, pfn_t *pfnp,
 
switch (iomap.type) {
case IOMAP_MAPPED:
-   error = dax_iomap_pfn(, pos, PMD_SIZE, );
+   error = dax_iomap_direct_access(, pos, PMD_SIZE, ,
+   NULL);
if (error < 0)
goto finish_iomap;
 
-- 
2.27.0





[RFC PATCH 7/8] fs/xfs: handle CoW for fsdax write() path

2020-08-07 Thread Shiyang Ruan
In fsdax mode, WRITE and ZERO on a shared extent need CoW mechanism
performed.  After CoW, new extents needs to be remapped to the file.

Signed-off-by: Shiyang Ruan 
---
 fs/xfs/xfs_bmap_util.c |  6 +-
 fs/xfs/xfs_file.c  | 10 +++---
 fs/xfs/xfs_iomap.c |  3 ++-
 fs/xfs/xfs_iops.c  | 11 ---
 fs/xfs/xfs_reflink.c   |  2 ++
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f37f5cc4b19f..5d09d6c454b6 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -969,10 +969,14 @@ xfs_free_file_space(
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
-   _buffered_write_iomap_ops);
+ IS_DAX(VFS_I(ip)) ?
+ _direct_write_iomap_ops : _buffered_write_iomap_ops);
if (error)
return error;
 
+   if (xfs_is_reflink_inode(ip))
+   xfs_reflink_end_cow(ip, offset, len);
+
/*
 * If we zeroed right up to EOF and EOF straddles a page boundary we
 * must make sure that the post-EOF area is also zeroed because the
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 00db81eac80d..45041913129b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -588,9 +588,13 @@ xfs_file_dax_write(
 
trace_xfs_file_dax_write(ip, count, pos);
ret = dax_iomap_rw(iocb, from, _direct_write_iomap_ops);
-   if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
-   i_size_write(inode, iocb->ki_pos);
-   error = xfs_setfilesize(ip, pos, ret);
+   if (ret > 0) {
+   if (iocb->ki_pos > i_size_read(inode)) {
+   i_size_write(inode, iocb->ki_pos);
+   error = xfs_setfilesize(ip, pos, ret);
+   }
+   if (xfs_is_cow_inode(ip))
+   xfs_reflink_end_cow(ip, pos, ret);
}
 out:
xfs_iunlock(ip, iolock);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index b9a8c3798e08..a1fc75f11cf9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -748,13 +748,14 @@ xfs_direct_write_iomap_begin(
goto out_unlock;
 
if (imap_needs_cow(ip, flags, , nimaps)) {
+   bool need_convert = flags & IOMAP_DIRECT || IS_DAX(inode);
error = -EAGAIN;
if (flags & IOMAP_NOWAIT)
goto out_unlock;
 
/* may drop and re-acquire the ilock */
error = xfs_reflink_allocate_cow(ip, , , ,
-   , flags & IOMAP_DIRECT);
+   , need_convert);
if (error)
goto out_unlock;
if (shared)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 80a13c8561d8..6dd6a973ea75 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -860,6 +860,7 @@ xfs_setattr_size(
int error;
uintlock_flags = 0;
booldid_zeroing = false;
+   const struct iomap_ops  *ops;
 
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
@@ -906,13 +907,17 @@ xfs_setattr_size(
 * extension, or zeroing out the rest of the block on a downward
 * truncate.
 */
+   if (IS_DAX(inode))
+   ops = _direct_write_iomap_ops;
+   else
+   ops = _buffered_write_iomap_ops;
+
if (newsize > oldsize) {
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
-   _zeroing, _buffered_write_iomap_ops);
+   _zeroing, ops);
} else {
-   error = iomap_truncate_page(inode, newsize, _zeroing,
-   _buffered_write_iomap_ops);
+   error = iomap_truncate_page(inode, newsize, _zeroing, ops);
}
 
if (error)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 792217cd1e64..f87ab78dd421 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1269,6 +1269,8 @@ xfs_reflink_zero_posteof(
 
trace_xfs_zero_eof(ip, isize, pos - isize);
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
+   IS_DAX(VFS_I(ip)) ?
+   _direct_write_iomap_ops :
_buffered_write_iomap_ops);
 }
 
-- 
2.27.0





[RFC PATCH 1/8] fs: introduce get_shared_files() for dax

2020-08-07 Thread Shiyang Ruan
Under the mode of both dax and reflink on, one page may be shared by
multiple files and offsets.  In order to track them in memory-failure or
other cases, we introduce this function by finding out who is sharing
this block(the page) in a filesystem.  It returns a list that contains
all the owners, and the offset in each owner.

For XFS, rmapbt is used to find out the owners of one block.  So, it
should be turned on when we want to use dax feature together.

Signed-off-by: Shiyang Ruan 
---
 fs/xfs/xfs_super.c  | 67 +
 include/linux/dax.h |  7 +
 include/linux/fs.h  |  2 ++
 3 files changed, 76 insertions(+)

diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 379cbff438bc..b71392219c91 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -35,6 +35,9 @@
 #include "xfs_refcount_item.h"
 #include "xfs_bmap_item.h"
 #include "xfs_reflink.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
 
 #include 
 #include 
@@ -1097,6 +1100,69 @@ xfs_fs_free_cached_objects(
return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
 }
 
+static int _get_shared_files_fn(
+   struct xfs_btree_cur*cur,
+   struct xfs_rmap_irec*rec,
+   void*priv)
+{
+   struct list_head*list = priv;
+   struct xfs_inode*ip;
+   struct shared_files *sfp;
+
+   /* Get files that incore, filter out others that are not in use. */
+   xfs_iget(cur->bc_mp, cur->bc_tp, rec->rm_owner, XFS_IGET_INCORE, 0, 
);
+   if (ip && !ip->i_vnode.i_mapping)
+   return 0;
+
+   sfp = kmalloc(sizeof(*sfp), GFP_KERNEL);
+   sfp->mapping = ip->i_vnode.i_mapping;
+   sfp->index = rec->rm_offset;
+   list_add_tail(>list, list);
+
+   return 0;
+}
+
+static int
+xfs_fs_get_shared_files(
+   struct super_block  *sb,
+   pgoff_t offset,
+   struct list_head*list)
+{
+   struct xfs_mount*mp = XFS_M(sb);
+   struct xfs_trans*tp = NULL;
+   struct xfs_btree_cur*cur = NULL;
+   struct xfs_rmap_irecrmap_low = { 0 }, rmap_high = { 0 };
+   struct xfs_buf  *agf_bp = NULL;
+   xfs_agblock_t   bno = XFS_B_TO_FSB(mp, offset);
+   xfs_agnumber_t  agno = XFS_FSB_TO_AGNO(mp, bno);
+   int error = 0;
+
+   error = xfs_trans_alloc_empty(mp, );
+   if (error)
+   return error;
+
+   error = xfs_alloc_read_agf(mp, tp, agno, 0, _bp);
+   if (error)
+   return error;
+
+   cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, agno);
+
+   memset(>bc_rec, 0, sizeof(cur->bc_rec));
+   /* Construct the range for one rmap search */
+   memset(_low, 0, sizeof(rmap_low));
+   memset(_high, 0xFF, sizeof(rmap_high));
+   rmap_low.rm_startblock = rmap_high.rm_startblock = bno;
+
+   error = xfs_rmap_query_range(cur, _low, _high,
+_get_shared_files_fn, list);
+   if (error == -ECANCELED)
+   error = 0;
+
+   xfs_btree_del_cursor(cur, error);
+   xfs_trans_brelse(tp, agf_bp);
+   return error;
+}
+
 static const struct super_operations xfs_super_operations = {
.alloc_inode= xfs_fs_alloc_inode,
.destroy_inode  = xfs_fs_destroy_inode,
@@ -1110,6 +1176,7 @@ static const struct super_operations xfs_super_operations 
= {
.show_options   = xfs_fs_show_options,
.nr_cached_objects  = xfs_fs_nr_cached_objects,
.free_cached_objects= xfs_fs_free_cached_objects,
+   .get_shared_files   = xfs_fs_get_shared_files,
 };
 
 static int
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 6904d4e0b2e0..0a85e321d6b4 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -40,6 +40,13 @@ struct dax_operations {
 
 extern struct attribute_group dax_attribute_group;
 
+struct shared_files {
+   struct list_headlist;
+   struct address_space*mapping;
+   pgoff_t index;
+   dax_entry_t cookie;
+};
+
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5abba86107d..81de3d2739b9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1977,6 +1977,8 @@ struct super_operations {
  struct shrink_control *);
long (*free_cached_objects)(struct super_block *,
struct shrink_control *);
+   int (*get_shared_files)(struct super_block *sb, pgoff_t offset,
+   struct list_head *list);
 };
 
 /*
-- 
2.27.0





Re: [Freedreno] [v1] drm/msm/dpu: Fix reservation failures in modeset

2020-08-07 Thread kalyan_t

On 2020-08-06 22:15, Rob Clark wrote:

On Thu, Aug 6, 2020 at 7:46 AM  wrote:


On 2020-08-05 21:18, Rob Clark wrote:
> On Wed, Aug 5, 2020 at 6:34 AM Kalyan Thota 
> wrote:
>>
>> In TEST_ONLY commit, rm global_state will duplicate the
>> object and request for new reservations, once they pass
>> then the new state will be swapped with the old and will
>> be available for the Atomic Commit.
>>
>> This patch fixes some of missing links in the resource
>> reservation sequence mentioned above.
>>
>> 1) Creation of a duplicate state in test_only commit (Rob)
>> 2) Allow resource release only during crtc_active false.
>>
>> For #2
>> In a modeset operation, swap state happens well before disable.
>> Hence clearing reservations in disable will cause failures
>> in modeset enable.
>>
>> Sequence:
>> Swap state --> old, new
>> modeset disables --> virt disable
>> modeset enable --> virt modeset
>>
>> Allow reservations to be cleared only when crtc active is false
>> as in that case there wont be any modeset enable after disable.
>>
>> Signed-off-by: Kalyan Thota 
>> ---
>>  drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 7 +--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> index 63976dc..b85a576 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
>> @@ -582,7 +582,7 @@ static int dpu_encoder_virt_atomic_check(
>> dpu_kms = to_dpu_kms(priv->kms);
>> mode = _state->mode;
>> adj_mode = _state->adjusted_mode;
>> -   global_state = dpu_kms_get_existing_global_state(dpu_kms);
>> +   global_state = dpu_kms_get_global_state(crtc_state->state);
>> trace_dpu_enc_atomic_check(DRMID(drm_enc));
>>
>> /*
>> @@ -1172,6 +1172,7 @@ static void dpu_encoder_virt_disable(struct
>> drm_encoder *drm_enc)
>> struct msm_drm_private *priv;
>> struct dpu_kms *dpu_kms;
>> struct dpu_global_state *global_state;
>> +   struct drm_crtc_state *crtc_state;
>> int i = 0;
>>
>> if (!drm_enc) {
>> @@ -1191,6 +1192,7 @@ static void dpu_encoder_virt_disable(struct
>> drm_encoder *drm_enc)
>> priv = drm_enc->dev->dev_private;
>> dpu_kms = to_dpu_kms(priv->kms);
>> global_state = dpu_kms_get_existing_global_state(dpu_kms);
>> +   crtc_state = drm_enc->crtc->state;
>>
>> trace_dpu_enc_disable(DRMID(drm_enc));
>>
>> @@ -1220,7 +1222,8 @@ static void dpu_encoder_virt_disable(struct
>> drm_encoder *drm_enc)
>>
>> DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n");
>>
>> -   dpu_rm_release(global_state, drm_enc);
>> +   if (crtc_state->active_changed && !crtc_state->active)
>> +   dpu_rm_release(global_state, drm_enc);
>
> I still think releasing the state in the atomic_commit() path is the
> wrong thing to do.  In the commit path, the various state objects
> should be immutable.. ie. in the atomic_test() path you derive the new
> hw state (including assignment/release of resources), and
> atomic_commit() is simply pushing the state down to the hw.
>
> Otherwise, this looks better than v1.
>
> BR,
> -R
>
okay. Should we avoid reservation all together if active=0 on that 
crtc

and trigger rm_release on the enc during atomic_check ?
how do you see the approach ?


Yeah, I suppose something like:

   if (drm_atomic_crtc_needs_modeset()) {
  reserve()
   } else if (active_changed && !active) {
  release()
   }

I think it could happen (at least with atomic api) that you get a
modeset without active_changed, so we might need to release() and then
reserve() in that case?  (This is probably where starting to run more
IGT tests would be useful)

BR,
-R
Thanks Rob, please review the v2 version.

-Kalyan
>>
>> mutex_unlock(_enc->enc_lock);
>>  }
>> --
>> 1.9.1
>>
> ___
> Freedreno mailing list
> freedr...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno


Re: [PATCH 09/10] mm/hugetlb: narrow the hugetlb_lock protection area during preparing huge page

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> set_hugetlb_cgroup_[rsvd] just manipulate page local data, which is not
> necessary to be protected by hugetlb_lock.
> 
> Let's take this out.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 9473eb6800e9..1f2010c9dd8d 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1494,9 +1494,9 @@ static void prep_new_huge_page(struct hstate *h, struct 
> page *page, int nid)
>  {
>   INIT_LIST_HEAD(>lru);
>   set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
> - spin_lock(_lock);
>   set_hugetlb_cgroup(page, NULL);
>   set_hugetlb_cgroup_rsvd(page, NULL);
> + spin_lock(_lock);

Looks good to me.

Reviewed-by: Baoquan He 

>   h->nr_huge_pages++;
>   h->nr_huge_pages_node[nid]++;
>   spin_unlock(_lock);
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 4.19] net/mlx5e: Don't support phys switch id if not in switchdev mode

2020-08-07 Thread Greg Kroah-Hartman
On Thu, Aug 06, 2020 at 07:05:42PM -0700, Saeed Mahameed wrote:
> From: Roi Dayan 
> 
> Support for phys switch id ndo added for representors and if
> we do not have representors there is no need to support it.
> Since each port return different switch id supporting this
> block support for creating bond over PFs and attaching to bridge
> in legacy mode.
> 
> This bug doesn't exist upstream as the code got refactored and the
> netdev api is totally different.
> 
> Fixes: cb67b832921c ("net/mlx5e: Introduce SRIOV VF representors")
> Signed-off-by: Roi Dayan 
> Signed-off-by: Saeed Mahameed 
> ---
> Hi Greg,
> 
> Sorry for submitting a non upstream patch, but this bug is
> bothering some users on 4.19-stable kernels and it doesn't exist
> upstream, so i hope you are ok with backporting this one liner patch.

Also queued up to 4.9.y and 4.14.y.

thanks,

greg k-h


Re: [PATCH v2] drm/qxl: don't take vga ports on rev5+

2020-08-07 Thread Daniel Vetter
On Fri, Aug 07, 2020 at 12:55:01PM +0200, Gerd Hoffmann wrote:
> qemu 5.0 introduces a new qxl hardware revision 5.  Unlike revision 4
> (and below) the device doesn't switch back into vga compatibility mode
> when someone touches the vga ports.  So we don't have to reserve the
> vga ports any more to avoid that happening.
> 
> Signed-off-by: Gerd Hoffmann 

Does what it says on the label.

Reviewed-by: Daniel Vetter 

> ---
>  drivers/gpu/drm/qxl/qxl_drv.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
> index 13872b882775..6e7f16f4cec7 100644
> --- a/drivers/gpu/drm/qxl/qxl_drv.c
> +++ b/drivers/gpu/drm/qxl/qxl_drv.c
> @@ -96,7 +96,7 @@ qxl_pci_probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
>   if (ret)
>   goto disable_pci;
>  
> - if (is_vga(pdev)) {
> + if (is_vga(pdev) && pdev->revision < 5) {
>   ret = vga_get_interruptible(pdev, VGA_RSRC_LEGACY_IO);
>   if (ret) {
>   DRM_ERROR("can't get legacy vga ioports\n");
> @@ -127,7 +127,7 @@ qxl_pci_probe(struct pci_dev *pdev, const struct 
> pci_device_id *ent)
>  unload:
>   qxl_device_fini(qdev);
>  put_vga:
> - if (is_vga(pdev))
> + if (is_vga(pdev) && pdev->revision < 5)
>   vga_put(pdev, VGA_RSRC_LEGACY_IO);
>  disable_pci:
>   pci_disable_device(pdev);
> @@ -155,7 +155,7 @@ qxl_pci_remove(struct pci_dev *pdev)
>  
>   drm_dev_unregister(dev);
>   drm_atomic_helper_shutdown(dev);
> - if (is_vga(pdev))
> + if (is_vga(pdev) && pdev->revision < 5)
>   vga_put(pdev, VGA_RSRC_LEGACY_IO);
>  }
>  
> -- 
> 2.18.4
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH v11 3/6] KASAN: Port KASAN Tests to KUnit

2020-08-07 Thread Andrey Konovalov
On Wed, Aug 5, 2020 at 6:29 AM David Gow  wrote:
>
> From: Patricia Alfonso 
>
> Transfer all previous tests for KASAN to KUnit so they can be run
> more easily. Using kunit_tool, developers can run these tests with their
> other KUnit tests and see "pass" or "fail" with the appropriate KASAN
> report instead of needing to parse each KASAN report to test KASAN
> functionalities. All KASAN reports are still printed to dmesg.
>
> Stack tests do not work properly when KASAN_STACK is enabled so
> those tests use a check for "if IS_ENABLED(CONFIG_KASAN_STACK)" so they
> only run if stack instrumentation is enabled. If KASAN_STACK is not
> enabled, KUnit will print a statement to let the user know this test
> was not run with KASAN_STACK enabled.
>
> copy_user_test and kasan_rcu_uaf cannot be run in KUnit so there is a
> separate test file for those tests, which can be run as before as a
> module.
>
> Signed-off-by: Patricia Alfonso 
> Signed-off-by: David Gow 
> Reviewed-by: Brendan Higgins 
> Reviewed-by: Andrey Konovalov 
> Reviewed-by: Dmitry Vyukov 
> ---
>  lib/Kconfig.kasan   |  22 +-
>  lib/Makefile|   3 +-
>  lib/test_kasan.c| 686 +++-
>  lib/test_kasan_module.c | 111 +++
>  4 files changed, 385 insertions(+), 437 deletions(-)
>  create mode 100644 lib/test_kasan_module.c
>
> diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
> index 047b53dbfd58..9a237887e52e 100644
> --- a/lib/Kconfig.kasan
> +++ b/lib/Kconfig.kasan
> @@ -167,12 +167,24 @@ config KASAN_VMALLOC
>   for KASAN to detect more sorts of errors (and to support vmapped
>   stacks), but at the cost of higher memory usage.
>
> -config TEST_KASAN
> -   tristate "Module for testing KASAN for bug detection"
> -   depends on m
> +config KASAN_KUNIT_TEST
> +   tristate "KUnit-compatible tests of KASAN bug detection capabilities" 
> if !KUNIT_ALL_TESTS
> +   depends on KASAN && KUNIT
> +   default KUNIT_ALL_TESTS
> help
> - This is a test module doing various nasty things like
> - out of bounds accesses, use after free. It is useful for testing
> + This is a KUnit test suite doing various nasty things like
> + out of bounds and use after free accesses. It is useful for testing
>   kernel debugging features like KASAN.
>
> + For more information on KUnit and unit tests in general, please 
> refer
> + to the KUnit documentation in Documentation/dev-tools/kunit
> +
> +config TEST_KASAN_MODULE
> +   tristate "KUnit-incompatible tests of KASAN bug detection 
> capabilities"
> +   depends on m && KASAN
> +   help
> + This is a part of the KASAN test suite that is incompatible with
> + KUnit. Currently includes tests that do bad copy_from/to_user
> + accesses.
> +
>  endif # KASAN
> diff --git a/lib/Makefile b/lib/Makefile
> index 46278be53cda..adaebfac81c9 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -60,9 +60,10 @@ CFLAGS_test_bitops.o += -Werror
>  obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
>  obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
>  obj-$(CONFIG_TEST_IDA) += test_ida.o
> -obj-$(CONFIG_TEST_KASAN) += test_kasan.o
> +obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
>  CFLAGS_test_kasan.o += -fno-builtin
>  CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
> +obj-$(CONFIG_TEST_KASAN_MODULE) += test_kasan_module.o
>  obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
>  CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
>  UBSAN_SANITIZE_test_ubsan.o := y
> diff --git a/lib/test_kasan.c b/lib/test_kasan.c
> index 58bffadd8367..d023fb75fd60 100644
> --- a/lib/test_kasan.c
> +++ b/lib/test_kasan.c
> @@ -5,8 +5,6 @@
>   * Author: Andrey Ryabinin 
>   */
>
> -#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
> -
>  #include 
>  #include 
>  #include 
> @@ -77,416 +75,327 @@ static void kasan_test_exit(struct kunit *test)
> fail_data.report_found); \
>  } while (0)
>
> -
> -
> -/*
> - * Note: test functions are marked noinline so that their names appear in
> - * reports.
> - */
> -static noinline void __init kmalloc_oob_right(void)
> +static void kmalloc_oob_right(struct kunit *test)
>  {
> char *ptr;
> size_t size = 123;
>
> -   pr_info("out-of-bounds to right\n");
> ptr = kmalloc(size, GFP_KERNEL);
> -   if (!ptr) {
> -   pr_err("Allocation failed\n");
> -   return;
> -   }
> -
> -   ptr[size + OOB_TAG_OFF] = 'x';
> +   KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
>
> +   KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x');
> kfree(ptr);
>  }
>
> -static noinline void __init kmalloc_oob_left(void)
> +static void kmalloc_oob_left(struct kunit *test)
>  {
> char *ptr;
> size_t size = 15;
>
> -   pr_info("out-of-bounds to left\n");
> ptr = kmalloc(size, GFP_KERNEL);
> -   if (!ptr) {
> -   

Re: [RESEND PATCH] media: atomisp: Replace trace_printk by pr_info

2020-08-07 Thread Nicolas Boichat
On Fri, Aug 7, 2020 at 4:04 PM Mauro Carvalho Chehab  wrote:
>
> Em Fri, 7 Aug 2020 14:51:12 +0800
> Nicolas Boichat  escreveu:
>
> > On Fri, Aug 7, 2020 at 2:28 PM Greg Kroah-Hartman
> >  wrote:
> > >
> > > On Fri, Aug 07, 2020 at 09:50:23AM +0800, Nicolas Boichat wrote:
> > > > On Fri, Jul 24, 2020 at 8:41 PM Nicolas Boichat  
> > > > wrote:
> > > > >
> > > > > On Fri, Jul 10, 2020 at 3:03 PM Greg Kroah-Hartman
> > > > >  wrote:
> > > > > >
> > > > > > On Fri, Jul 10, 2020 at 02:45:29PM +0800, Nicolas Boichat wrote:
> > > > > > > trace_printk should not be used in production code, replace it
> > > > > > > call with pr_info.
> > > > > > >
> > > > > > > Signed-off-by: Nicolas Boichat 
> > > > > > > ---
> > > > > > > Sent this before as part of a series (whose 4th patch was a
> > > > > > > change that allows to detect such trace_printk), but maybe it's
> > > > > > > easier to get individual maintainer attention by splitting it.
> > > > > >
> > > > > > Mauro should take this soon:
> > > > > >
> > > > > > Acked-by: Greg Kroah-Hartman 
> > > > >
> > > > > Mauro: did you get a chance to look at this? (and the other similar
> > > > > patch "media: camss: vfe: Use trace_printk for debugging only")
> > > >
> > > > Mauro: Another gentle ping. Thanks.
> > >
> > > It's the middle of the merge window, maintainers can't do anything until
> > > after 5.9-rc1 is out, sorry.
> >
> > Huh, wait, looks like Mauro _did_ pick it (found it in this email
> > "[GIT PULL for v5.8-rc7] media fixes").
> >
> > My bad then, I was expecting an ack ,-)
>
> Never expect acks. Kernel maintainers usually don't send them.

For some reasons I'm working mainly with maintainers who do ,-) I'll
adjust my expectations, thanks.

> Yet, in the case of media, you should probably have received
> an automatic e-mail from our patchwork instance.

Nope, didn't receive anything. But I'm happy to blame gmail for that...

Anyway, I'll ping you again after the merge window closes about
"media: camss: vfe: Use trace_printk for debugging only" (I _think_
that one didn't get merged). Hopefully not too many other
trace_printks made it through the cracks in the meantime ,-)

Thanks, have a good weekend,

>
> Thanks,
> Mauro


[v2] drm/msm/dpu: Fix reservation failures in modeset

2020-08-07 Thread Kalyan Thota
In TEST_ONLY commit, rm global_state will duplicate the
object and request for new reservations, once they pass
then the new state will be swapped with the old and will
be available for the Atomic Commit.

This patch fixes some of missing links in the resource
reservation sequence mentioned above.

1) Creation of duplicate state in test_only commit (Rob)
2) Allocate and release the resources on every modeset.
3) Avoid allocation only when active is false.

In a modeset operation, swap state happens well before
disable. Hence clearing reservations in disable will
cause failures in modeset enable.

Allow reservations to be cleared/allocated before swap,
such that only newly committed resources are pushed to HW.

Changes in v1:
 - Move the rm release to atomic_check.
 - Ensure resource allocation and free happens when active
   is not changed i.e only when mode is changed.(Rob)

Signed-off-by: Kalyan Thota 
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 63976dc..50a98d1 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -582,7 +582,7 @@ static int dpu_encoder_virt_atomic_check(
dpu_kms = to_dpu_kms(priv->kms);
mode = _state->mode;
adj_mode = _state->adjusted_mode;
-   global_state = dpu_kms_get_existing_global_state(dpu_kms);
+   global_state = dpu_kms_get_global_state(crtc_state->state);
trace_dpu_enc_atomic_check(DRMID(drm_enc));
 
/*
@@ -617,12 +617,15 @@ static int dpu_encoder_virt_atomic_check(
/* Reserve dynamic resources now. */
if (!ret) {
/*
-* Avoid reserving resources when mode set is pending. Topology
-* info may not be available to complete reservation.
+* Release and Allocate resources on every modeset
+* Dont allocate when active is false.
 */
if (drm_atomic_crtc_needs_modeset(crtc_state)) {
-   ret = dpu_rm_reserve(_kms->rm, global_state,
-   drm_enc, crtc_state, topology);
+   dpu_rm_release(global_state, drm_enc);
+
+   if (!crtc_state->active_changed || crtc_state->active)
+   ret = dpu_rm_reserve(_kms->rm, global_state,
+   drm_enc, crtc_state, topology);
}
}
 
@@ -1171,7 +1174,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder 
*drm_enc)
struct dpu_encoder_virt *dpu_enc = NULL;
struct msm_drm_private *priv;
struct dpu_kms *dpu_kms;
-   struct dpu_global_state *global_state;
int i = 0;
 
if (!drm_enc) {
@@ -1190,7 +1192,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder 
*drm_enc)
 
priv = drm_enc->dev->dev_private;
dpu_kms = to_dpu_kms(priv->kms);
-   global_state = dpu_kms_get_existing_global_state(dpu_kms);
 
trace_dpu_enc_disable(DRMID(drm_enc));
 
@@ -1220,8 +1221,6 @@ static void dpu_encoder_virt_disable(struct drm_encoder 
*drm_enc)
 
DPU_DEBUG_ENC(dpu_enc, "encoder disabled\n");
 
-   dpu_rm_release(global_state, drm_enc);
-
mutex_unlock(_enc->enc_lock);
 }
 
-- 
1.9.1



Re: [PATCH 08/10] mm/hugetlb: return non-isolated page in the loop instead of break and check

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> Function dequeue_huge_page_node_exact() iterates the free list and
> return the first non-isolated one.
> 
> Instead of break and check the loop variant, we could return in the loop
> directly. This could reduce some redundant check.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 26 --
>  1 file changed, 12 insertions(+), 14 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index b8e844911b5a..9473eb6800e9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1035,20 +1035,18 @@ static struct page 
> *dequeue_huge_page_node_exact(struct hstate *h, int nid)
>  {
>   struct page *page;
>  
> - list_for_each_entry(page, >hugepage_freelists[nid], lru)
> - if (!PageHWPoison(page))
> - break;

I don't see how it can reduce redundant check, just two different
styles.

> - /*
> -  * if 'non-isolated free hugepage' not found on the list,
> -  * the allocation fails.

But the above code comment seems stale, it checks HWPoision page
directly, but not the old isolated page checking.

> -  */
> - if (>hugepage_freelists[nid] == >lru)
> - return NULL;
> - list_move(>lru, >hugepage_activelist);
> - set_page_refcounted(page);
> - h->free_huge_pages--;
> - h->free_huge_pages_node[nid]--;
> - return page;
> + list_for_each_entry(page, >hugepage_freelists[nid], lru) {
> + if (PageHWPoison(page))
> + continue;
> +
> + list_move(>lru, >hugepage_activelist);
> + set_page_refcounted(page);
> + h->free_huge_pages--;
> + h->free_huge_pages_node[nid]--;
> + return page;
> + }
> +
> + return NULL;
>  }
>  
>  static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t 
> gfp_mask, int nid,
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH] drm/virtio: fix unblank

2020-08-07 Thread Daniel Vetter
On Fri, Aug 07, 2020 at 12:54:29PM +0200, Gerd Hoffmann wrote:
> When going through a disable/enable cycle without changing the
> framebuffer the optimization added by commit 3954ff10e06e ("drm/virtio:
> skip set_scanout if framebuffer didn't change") causes the screen stay
> blank.  Add a bool to force an update to fix that.
> 
> Cc: 1882...@bugs.launchpad.net
> Fixes: 3954ff10e06e ("drm/virtio: skip set_scanout if framebuffer didn't 
> change")
> Signed-off-by: Gerd Hoffmann 
> ---
>  drivers/gpu/drm/virtio/virtgpu_drv.h | 1 +
>  drivers/gpu/drm/virtio/virtgpu_display.c | 1 +
>  drivers/gpu/drm/virtio/virtgpu_plane.c   | 4 +++-
>  3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h 
> b/drivers/gpu/drm/virtio/virtgpu_drv.h
> index 9ff9f4ac0522..7b0c319f23c9 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_drv.h
> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
> @@ -138,6 +138,7 @@ struct virtio_gpu_output {
>   int cur_x;
>   int cur_y;
>   bool enabled;
> + bool need_update;
>  };
>  #define drm_crtc_to_virtio_gpu_output(x) \
>   container_of(x, struct virtio_gpu_output, crtc)
> diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c 
> b/drivers/gpu/drm/virtio/virtgpu_display.c
> index cc7fd957a307..378be5956b30 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_display.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_display.c
> @@ -100,6 +100,7 @@ static void virtio_gpu_crtc_atomic_enable(struct drm_crtc 
> *crtc,
>   struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc);
>  
>   output->enabled = true;
> + output->need_update = true;
>  }
>  
>  static void virtio_gpu_crtc_atomic_disable(struct drm_crtc *crtc,
> diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c 
> b/drivers/gpu/drm/virtio/virtgpu_plane.c
> index 52d24179bcec..5948031a9ce8 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_plane.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
> @@ -163,7 +163,8 @@ static void virtio_gpu_primary_plane_update(struct 
> drm_plane *plane,
>   plane->state->src_w != old_state->src_w ||
>   plane->state->src_h != old_state->src_h ||
>   plane->state->src_x != old_state->src_x ||
> - plane->state->src_y != old_state->src_y) {
> + plane->state->src_y != old_state->src_y ||
> + output->need_update) {

Uh instead of hand-rolling what's essentially a drm_crtc_needs_modeset
check, why not use that one? atomic helpers try to keep the usual suspects
for state transitions already handy, to avoid every driver rolling their
own. Or do I miss something here?
-Daniel


>   DRM_DEBUG("handle 0x%x, crtc %dx%d+%d+%d, src %dx%d+%d+%d\n",
> bo->hw_res_handle,
> plane->state->crtc_w, plane->state->crtc_h,
> @@ -178,6 +179,7 @@ static void virtio_gpu_primary_plane_update(struct 
> drm_plane *plane,
>  plane->state->src_h >> 16,
>  plane->state->src_x >> 16,
>  plane->state->src_y >> 16);
> + output->need_update = false;
>   }
>  
>   virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle,
> -- 
> 2.18.4
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [PATCH 07/10] mm/hugetlb: a page from buddy is not on any list

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> The page allocated from buddy is not on any list, so just use list_add()
> is enough.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index fb09e5a83c39..b8e844911b5a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -2430,7 +2430,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
>   h->resv_huge_pages--;
>   }
>   spin_lock(_lock);
> - list_move(>lru, >hugepage_activelist);
> + list_add(>lru, >hugepage_activelist);

Looks good to me.

Reviewed-by: Baoquan He 

>   /* Fall through */
>   }
>   hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 1/2] mm/slub: Introduce two counters for the partial objects

2020-08-07 Thread Christopher Lameter
On Fri, 7 Aug 2020, Pekka Enberg wrote:

> I think we can just default to the counters. After all, if I
> understood correctly, we're talking about up to 100 ms time period
> with IRQs disabled when count_partial() is called. As this is
> triggerable from user space, that's a performance bug whatever way you
> look at it.


Well yes under extreme conditions and this is only happening for sysfs
counter retrieval.

There could be other solutions to this. This solution here is penalizing
evertu hotpath slab allocation for the sake of relatively infrequently
used counter monitoring. There the possibility of not traversing the list
ande simply estimating the value based on the number of slab pages
allocated on that node.

> Christoph, others, any objections?

Obviously  ;-)



Re: [PATCH 9/9] scsi: ufs: Properly release resources if a task is aborted successfully

2020-08-07 Thread Can Guo

Hi Markus,

On 2020-08-07 17:33, Markus Elfring wrote:

… To fix it, …


I propose to replace this wording by the tag “Fixes”.



… "mannually", …


Please avoid a typo:
… "manually", …


Regards,
Markus


Thanks, will fix these in next version.

Regards,
Can Guo.


Re: [PATCH 06/10] mm/hugetlb: remove redundant huge_pte_alloc() in hugetlb_fault()

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> Before proper processing, huge_pte_alloc() would be called
> un-conditionally. It is not necessary to do this when ptep is NULL.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index f5f04e89000d..fb09e5a83c39 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -4534,10 +4534,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct 
> vm_area_struct *vma,
>   } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
>   return VM_FAULT_HWPOISON_LARGE |
>   VM_FAULT_SET_HINDEX(hstate_index(h));
> - } else {
> - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
> - if (!ptep)
> - return VM_FAULT_OOM;

Right, seems a relic from Mike's i_mmap_rwsem handling patches.

Reviewed-by: Baoquan He 

>   }
>  
>   /*
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 05/10] mm/hugetlb: remove the redundant check on non_swap_entry()

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> Migration and hwpoison entry is a subset of non_swap_entry().
> 
> Remove the redundant check on non_swap_entry().
> 
> Signed-off-by: Wei Yang 

Hmm, I have posted one patch to do the same thing, got reivewed by
people.

https://lore.kernel.org/linux-mm/20200723104636.GS32539@MiWiFi-R3L-srv/

> ---
>  mm/hugetlb.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index d775e514eb2e..f5f04e89000d 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -3778,7 +3778,7 @@ bool is_hugetlb_entry_migration(pte_t pte)
>   if (huge_pte_none(pte) || pte_present(pte))
>   return false;
>   swp = pte_to_swp_entry(pte);
> - if (non_swap_entry(swp) && is_migration_entry(swp))
> + if (is_migration_entry(swp))
>   return true;
>   else
>   return false;
> @@ -3791,7 +3791,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
>   if (huge_pte_none(pte) || pte_present(pte))
>   return 0;
>   swp = pte_to_swp_entry(pte);
> - if (non_swap_entry(swp) && is_hwpoison_entry(swp))
> + if (is_hwpoison_entry(swp))
>   return 1;
>   else
>   return 0;
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 2/2 v2] rseq/selftests: test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU

2020-08-07 Thread Mathieu Desnoyers
- On Aug 6, 2020, at 8:27 PM, Boqun Feng boqun.f...@gmail.com wrote:

> On Thu, Aug 06, 2020 at 10:05:44AM -0700, Peter Oskolkov wrote:
>> Based on Google-internal RSEQ work done by
>> Paul Turner and Andrew Hunter.
>> 
>> This patch adds a selftest for MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU.
>> The test quite often fails without the previous patch in this patchset,
>> but consistently passes with it.
>> 
>> Signed-off-by: Peter Oskolkov 
>> ---
>>  .../selftests/rseq/basic_percpu_ops_test.c| 181 ++
>>  1 file changed, 181 insertions(+)
>> 
>> diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
>> b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
>> index eb3f6db36d36..147c80deac19 100644
>> --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
>> +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
>> @@ -3,16 +3,21 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>>  
>>  #include "rseq.h"
>>  
>>  #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
>>  
>> +#define MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU  (1<<7)
>> +
>>  struct percpu_lock_entry {
>>  intptr_t v;
>>  } __attribute__((aligned(128)));
>> @@ -289,6 +294,180 @@ void test_percpu_list(void)
>>  assert(sum == expected_sum);
>>  }
>>  
>> +struct test_membarrier_thread_args {
>> +int stop;
>> +intptr_t percpu_list_ptr;
>> +};
>> +
>> +/* Worker threads modify data in their "active" percpu lists. */
>> +void *test_membarrier_worker_thread(void *arg)
>> +{
>> +struct test_membarrier_thread_args *args =
>> +(struct test_membarrier_thread_args *)arg;
>> +const int iters = 10 * 1000 * 1000;
>> +int i;
>> +
>> +if (rseq_register_current_thread()) {
>> +fprintf(stderr, "Error: rseq_register_current_thread(...) 
>> failed(%d): %s\n",
>> +errno, strerror(errno));
>> +abort();
>> +}
>> +
>> +for (i = 0; i < iters; ++i) {
>> +while (true) {
>> +int cpu, ret;
>> +struct percpu_list *list_ptr = (struct percpu_list *)
>> +atomic_load(>percpu_list_ptr);
>> +
> 
> What if the manager thread update ->percpu_list_ptr and call
> membarrier() here? I.e.
> 
>   CPU0CPU1
>   list_ptr = atomic_load(>percpu_list_ptr); 
> // read list_b
>   
>   atomic_store(>percpu_list_ptr, list_a);
>   sys_membarrier(MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU, 1); // send 
> ipi to
>   restart rseq.cs on CPU1
> 
>do>
>   cpu = rseq_cpu_start(); // start a rseq.cs and 
> accessing list_b!
> 
> The thing is, atomic_load() is an reference to ->percpu_list_ptr, which
> is outside the rseq.cs, simply restarting rseq doesn't kill this
> reference.
> 
> Am I missing something subtle?

I'm with you on this, something looks fishy. It would be good to use
delay-inducing testing methods like rseq parametrized selftests to
increase the odds of hitting this race more reliably.

Thanks,

Mathieu

> 
> Regards,
> Boqun
> 
>> +if (!list_ptr)
>> +continue;  /* Not yet initialized. */
>> +
>> +cpu = rseq_cpu_start();
>> +struct percpu_list_node *node = list_ptr->c[cpu].head;
>> +const intptr_t prev = node->data;
>> +
>> +ret = rseq_cmpeqv_cmpeqv_storev(>data, prev,
>> +>percpu_list_ptr,
>> +(intptr_t)list_ptr, prev + 1, cpu);
>> +if (!ret)
>> +break;  /* Success. */
>> +}
>> +}
>> +
>> +if (rseq_unregister_current_thread()) {
>> +fprintf(stderr, "Error: rseq_unregister_current_thread(...) 
>> failed(%d):
>> %s\n",
>> +errno, strerror(errno));
>> +abort();
>> +}
>> +return NULL;
>> +}
>> +
>> +void test_membarrier_init_percpu_list(struct percpu_list *list)
>> +{
>> +int i;
>> +
>> +memset(list, 0, sizeof(*list));
>> +for (i = 0; i < CPU_SETSIZE; i++) {
>> +struct percpu_list_node *node;
>> +
>> +node = malloc(sizeof(*node));
>> +assert(node);
>> +node->data = 0;
>> +node->next = NULL;
>> +list->c[i].head = node;
>> +}
>> +}
>> +
>> +void test_membarrier_free_percpu_list(struct percpu_list *list)
>> +{
>> +int i;
>> +
>> +for (i = 0; i < CPU_SETSIZE; i++)
>> +free(list->c[i].head);
>> +}
>> +
>> +static int sys_membarrier(int cmd, int flags)
>> +{
>> +return syscall(__NR_membarrier, cmd, flags);
>> +}
>> +
>> +/*
>> + * The manager thread swaps per-cpu lists that worker threads see,
>> + * and validates 

Re: [PATCH 04/10] mm/hugetlb: count file_region to be added when regions_needed != NULL

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> There are only two cases of function add_reservation_in_range()
> 
> * count file_region and return the number in regions_needed
> * do the real list operation without counting
> 
> This means it is not necessary to have two parameters to classify these
> two cases.
> 
> Just use regions_needed to separate them.
> 
> Signed-off-by: Wei Yang 

Nice clean up.

Reviewed-by: Baoquan He 

> ---
>  mm/hugetlb.c | 33 +
>  1 file changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 929256c130f9..d775e514eb2e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -321,16 +321,17 @@ static void coalesce_file_region(struct resv_map *resv, 
> struct file_region *rg)
>   }
>  }
>  
> -/* Must be called with resv->lock held. Calling this with count_only == true
> - * will count the number of pages to be added but will not modify the linked
> - * list. If regions_needed != NULL and count_only == true, then 
> regions_needed
> - * will indicate the number of file_regions needed in the cache to carry out 
> to
> - * add the regions for this range.
> +/*
> + * Must be called with resv->lock held.
> + *
> + * Calling this with regions_needed != NULL will count the number of pages
> + * to be added but will not modify the linked list. And regions_needed will
> + * indicate the number of file_regions needed in the cache to carry out to 
> add
> + * the regions for this range.
>   */
>  static long add_reservation_in_range(struct resv_map *resv, long f, long t,
>struct hugetlb_cgroup *h_cg,
> -  struct hstate *h, long *regions_needed,
> -  bool count_only)
> +  struct hstate *h, long *regions_needed)
>  {
>   long add = 0;
>   struct list_head *head = >regions;
> @@ -366,14 +367,14 @@ static long add_reservation_in_range(struct resv_map 
> *resv, long f, long t,
>*/
>   if (rg->from > last_accounted_offset) {
>   add += rg->from - last_accounted_offset;
> - if (!count_only) {
> + if (!regions_needed) {
>   nrg = get_file_region_entry_from_cache(
>   resv, last_accounted_offset, rg->from);
>   record_hugetlb_cgroup_uncharge_info(h_cg, h,
>   resv, nrg);
>   list_add(>link, rg->link.prev);
>   coalesce_file_region(resv, nrg);
> - } else if (regions_needed)
> + } else
>   *regions_needed += 1;
>   }
>  
> @@ -385,13 +386,13 @@ static long add_reservation_in_range(struct resv_map 
> *resv, long f, long t,
>*/
>   if (last_accounted_offset < t) {
>   add += t - last_accounted_offset;
> - if (!count_only) {
> + if (!regions_needed) {
>   nrg = get_file_region_entry_from_cache(
>   resv, last_accounted_offset, t);
>   record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
>   list_add(>link, rg->link.prev);
>   coalesce_file_region(resv, nrg);
> - } else if (regions_needed)
> + } else
>   *regions_needed += 1;
>   }
>  
> @@ -484,8 +485,8 @@ static long region_add(struct resv_map *resv, long f, 
> long t,
>  retry:
>  
>   /* Count how many regions are actually needed to execute this add. */
> - add_reservation_in_range(resv, f, t, NULL, NULL, _regions_needed,
> -  true);
> + add_reservation_in_range(resv, f, t, NULL, NULL,
> +  _regions_needed);
>  
>   /*
>* Check for sufficient descriptors in the cache to accommodate
> @@ -513,7 +514,7 @@ static long region_add(struct resv_map *resv, long f, 
> long t,
>   goto retry;
>   }
>  
> - add = add_reservation_in_range(resv, f, t, h_cg, h, NULL, false);
> + add = add_reservation_in_range(resv, f, t, h_cg, h, NULL);
>  
>   resv->adds_in_progress -= in_regions_needed;
>  
> @@ -549,9 +550,9 @@ static long region_chg(struct resv_map *resv, long f, 
> long t,
>  
>   spin_lock(>lock);
>  
> - /* Count how many hugepages in this range are NOT respresented. */
> + /* Count how many hugepages in this range are NOT represented. */
>   chg = add_reservation_in_range(resv, f, t, NULL, NULL,
> -out_regions_needed, true);
> +out_regions_needed);
>  
>   if (*out_regions_needed == 0)
>   *out_regions_needed = 1;
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 03/10] mm/hugetlb: use list_splice to merge two list at once

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> Instead of add allocated file_region one by one to region_cache, we
> could use list_splice to merge two list at once.
> 
> Also we know the number of entries in the list, increase the number
> directly.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 0a2f3851b828..929256c130f9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -443,11 +443,8 @@ static int allocate_file_region_entries(struct resv_map 
> *resv,
>  
>   spin_lock(>lock);
>  
> - list_for_each_entry_safe(rg, trg, _regions, link) {
> - list_del(>link);
> - list_add(>link, >region_cache);
> - resv->region_cache_count++;
> - }
> + list_splice(_regions, >region_cache);
> + resv->region_cache_count += to_allocate;

Looks good to me.

Reviewed-by: Baoquan He 

>   }
>  
>   return 0;
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 0/2] irqchip: irq-mt58xx: Add mt58xx series interrupt

2020-08-07 Thread Mark-PK Tsai
From: Marc Zyngier 

> On 2020-08-06 15:58, Daniel Palmer wrote:
> > Hi Mark-PK,
> > 
> > On Thu, 6 Aug 2020 at 23:08, Mark-PK Tsai  
> > wrote:
> >> > Do you know if it would be possible to confirm if they are
> >> > the
> >> > same thing? MediaTek bought MStar a few years ago so it seems likely
> >> > but I have no hard information.
> >> >
> >> 
> >> Yes, it's for the same interrupt controller IP.
> > 
> > That's good news. :)
> > 
> >> > If they are the same thing could we work on making one series that
> >> > supports both use cases?
> >> 
> >> Sure, and I think the irq controller driver should support both use 
> >> cases.
> >> So how about keep the MTK version driver?
> > 
> > I'm fine with that. Maybe you can push the MTK version and I can send
> > a small patch after that to add the small bits I need?
> 
> In the interest of being vendor agnostic, please rename the properties
> such as mediatek,irqs-map-range to something less brand-specific.
> The compatible string should be enough.

I can't find the suitable property in standard ones that match the custom
properties here.
And the vendor prefixed rule is described in [1].

The interrupt controller is first used in Mstar TV SoCs.
Now it's used in MTK TV and Sigmastar SoCs.
So I think Mstar prefixed would make more sense.
I will rename the driver into mstar-intc, and MTK will maintain this driver.

[1] https://www.kernel.org/doc/Documentation/devicetree/booting-without-of.txt

Re: [RFC PATCH v2 6/6] sched/fair: Implement starvation monitor

2020-08-07 Thread Juri Lelli
On 07/08/20 13:30, Daniel Bristot de Oliveira wrote:
> On 8/7/20 12:46 PM, pet...@infradead.org wrote:
> > On Fri, Aug 07, 2020 at 11:56:04AM +0200, Juri Lelli wrote:
> >> Starting deadline server for lower priority classes right away when
> >> first task is enqueued might break guarantees, as tasks belonging to
> >> intermediate priority classes could be uselessly preempted. E.g., a well
> >> behaving (non hog) FIFO task can be preempted by NORMAL tasks even if
> >> there are still CPU cycles available for NORMAL tasks to run, as they'll
> >> be running inside the fair deadline server for some period of time.
> >>
> >> To prevent this issue, implement a starvation monitor mechanism that
> >> starts the deadline server only if a (fair in this case) task hasn't
> >> been scheduled for some interval of time after it has been enqueued.
> >> Use pick/put functions to manage starvation monitor status.
> > One thing I considerd was scheduling this as a least-laxity entity --
> > such that it runs late, not early -- and start the server when
> > rq->nr_running != rq->cfs.h_nr_running, IOW when there's !fair tasks
> > around.

IIUC, this would still require programming a timer to fire when laxity
is 0, but doing that only when there are !fair tasks around (so when
enqueuing the first !fair or if there are !fair already when first fair
is enqueued) would probably save us some overhead, I agree (as no timer
and no enqueue of deadline server would be needed in the common "only
fair" case).

> > 
> > Not saying we should do it like that, but that's perhaps more
> > deterministic than this.
> > 
> 
> I agree, what we want here is something that schedules the server if it still
> retains some runtime when the laxity is 0. But this is easier said than done, 
> as
> this would require another scheduler (other pros and cons and analysis (and
> hours of work)...).
> 
> But, for the starvation monitor purpose, the goal is not (necessarily) to
> provide a deterministic guarantee for the starving task, but to avoid system
> issues while minimizing the damage to the "real" real-time workload. With that
> in mind, we could relax our ambitions...
> 
> Thoughts?

I agree that we don't probably want to develop an additional scheduler/
policy for this, but I'll think a bit more about Peter's idea. Maybe
it's already a viable optimization w/o changing EDF/CBS.



Re: [PATCH 02/10] mm/hugetlb: make sure to get NULL when list is empty

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> list_first_entry() may not return NULL even when the list is empty.
> 
> Let's make sure the behavior by using list_first_entry_or_null(),
> otherwise it would corrupt the list.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 62ec74f6d03f..0a2f3851b828 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -237,7 +237,8 @@ get_file_region_entry_from_cache(struct resv_map *resv, 
> long from, long to)
>   VM_BUG_ON(resv->region_cache_count <= 0);


We have had above line, is it possible to be NULL from list_first_entry?

>  
>   resv->region_cache_count--;
> - nrg = list_first_entry(>region_cache, struct file_region, link);
> + nrg = list_first_entry_or_null(>region_cache,
> + struct file_region, link);
>   VM_BUG_ON(!nrg);
>   list_del(>link);
>  
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



Re: [PATCH 01/10] mm/hugetlb: not necessary to coalesce regions recursively

2020-08-07 Thread Baoquan He
On 08/07/20 at 05:12pm, Wei Yang wrote:
> Per my understanding, we keep the regions ordered and would always
> coalesce regions properly. So the task to keep this property is just
> to coalesce its neighbour.
> 
> Let's simplify this.
> 
> Signed-off-by: Wei Yang 
> ---
>  mm/hugetlb.c | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 590111ea6975..62ec74f6d03f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -307,8 +307,7 @@ static void coalesce_file_region(struct resv_map *resv, 
> struct file_region *rg)
>   list_del(>link);
>   kfree(rg);
>  
> - coalesce_file_region(resv, prg);
> - return;
> + rg = prg;
>   }
>  
>   nrg = list_next_entry(rg, link);
> @@ -318,9 +317,6 @@ static void coalesce_file_region(struct resv_map *resv, 
> struct file_region *rg)
>  
>   list_del(>link);
>   kfree(rg);
> -
> - coalesce_file_region(resv, nrg);

I agree with the change. But this change the original behaviour of
coalesce_file_region, not sure if there's any reason we need to do that,
maybe Mike can give a judgement. Personally,

Reviewed-by: Baoquan He 

> - return;
>   }
>  }
>  
> -- 
> 2.20.1 (Apple Git-117)
> 
> 



[PATCH 1/2] arm64: dts: ti: k3-j7200: Add HyperBus node

2020-08-07 Thread Vignesh Raghavendra
J7200 has a Flash SubSystem that has one OSPI and one HyperBus.. Add
DT nodes for HyperBus controller for now.

Signed-off-by: Vignesh Raghavendra 
---
 .../boot/dts/ti/k3-j7200-mcu-wakeup.dtsi  | 27 +++
 arch/arm64/boot/dts/ti/k3-j7200.dtsi  |  8 --
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi 
b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 94a797bbcdaf0..106c774dc224a 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -114,4 +114,31 @@ wkup_i2c0: i2c@4212 {
clocks = <_clks 197 1>;
power-domains = <_pds 197 TI_SCI_PD_SHARED>;
};
+
+   fss: system-controller@4700 {
+   compatible = "syscon", "simple-mfd";
+   reg = <0x0 0x4700 0x0 0x100>;
+   #address-cells = <2>;
+   #size-cells = <2>;
+   ranges;
+
+   hbmc_mux: hbmc-mux {
+   compatible = "mmio-mux";
+   #mux-control-cells = <1>;
+   mux-reg-masks = <0x4 0x2>; /* HBMC select */
+   };
+
+   hbmc: hyperbus@47034000 {
+   compatible = "ti,am654-hbmc";
+   reg = <0x0 0x47034000 0x0 0x100>,
+   <0x5 0x 0x1 0x000>;
+   power-domains = <_pds 102 TI_SCI_PD_EXCLUSIVE>;
+   clocks = <_clks 102 0>;
+   assigned-clocks = <_clks 102 5>;
+   assigned-clock-rates = <3>;
+   #address-cells = <2>;
+   #size-cells = <1>;
+   mux-controls = <_mux 0>;
+   };
+   };
 };
diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi 
b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
index aadf707f25f5c..ba27f6641137e 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi
@@ -139,7 +139,9 @@ cbass_main: bus@10 {
 <0x00 0x4510 0x00 0x4510 0x00 0x00c24000>,
 <0x00 0x4600 0x00 0x4600 0x00 0x0020>,
 <0x00 0x4700 0x00 0x4700 0x00 0x00068400>,
-<0x00 0x5000 0x00 0x5000 0x00 0x1000>;
+<0x00 0x5000 0x00 0x5000 0x00 0x1000>,
+<0x05 0x 0x05 0x 0x01 0x>,
+<0x07 0x 0x07 0x 0x01 0x>;
 
cbass_mcu_wakeup: bus@2838 {
compatible = "simple-bus";
@@ -155,7 +157,9 @@ cbass_mcu_wakeup: bus@2838 {
 <0x00 0x4510 0x00 0x4510 0x00 
0x00c24000>, /* MMRs, remaining NAVSS */
 <0x00 0x4600 0x00 0x4600 0x00 
0x0020>, /* CPSW */
 <0x00 0x4700 0x00 0x4700 0x00 
0x00068400>, /* OSPI register space */
-<0x00 0x5000 0x00 0x5000 0x00 
0x1000>; /* FSS OSPI0/1 data region 0 */
+<0x00 0x5000 0x00 0x5000 0x00 
0x1000>, /* FSS OSPI0/1 data region 0 */
+<0x05 0x 0x05 0x 0x01 
0x>, /* FSS OSPI0 data region 3 */
+<0x07 0x 0x07 0x 0x01 
0x>; /* FSS OSPI1 data region 3 */
};
};
 };
-- 
2.28.0



[PATCH 0/2] arm64: dts: ti: k3-j7200: Add HyperFlash related nodes

2020-08-07 Thread Vignesh Raghavendra
This series adds HyperBus and HyperFlash nodes for TI's J7200 SoC

Based on top of 
https://lore.kernel.org/linux-arm-kernel/20200723084628.19241-1-lokeshvu...@ti.com/
And earlier I2C DT patches:
https://lore.kernel.org/linux-arm-kernel/20200730192600.1872-1-vigne...@ti.com/

Vignesh Raghavendra (2):
  arm64: dts: ti: k3-j7200: Add HyperBus node
  arm64: dts: ti: k3-j7200-som-p0: Add HyperFlash node

 .../boot/dts/ti/k3-j7200-mcu-wakeup.dtsi  | 27 ++
 arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi   | 36 +++
 arch/arm64/boot/dts/ti/k3-j7200.dtsi  |  8 +++--
 3 files changed, 69 insertions(+), 2 deletions(-)

-- 
2.28.0



[PATCH 2/2] arm64: dts: ti: k3-j7200-som-p0: Add HyperFlash node

2020-08-07 Thread Vignesh Raghavendra
J7200 SoM has a HyperFlash connected to HyperBus memory controller. But
HyperBus is muxed with OSPI, therefore keep HyperBus node disabled.
Bootloader will detect the mux and enable the node as required.

Signed-off-by: Vignesh Raghavendra 
---
 arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi | 36 +
 1 file changed, 36 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi 
b/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
index 22fc50bd5c4c4..0984977f381ad 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-som-p0.dtsi
@@ -27,3 +27,39 @@ secure_ddr: optee@9e80 {
};
};
 };
+
+_pmx0 {
+   mcu_fss0_hpb0_pins_default: mcu-fss0-hpb0-pins-default {
+   pinctrl-single,pins = <
+   J721E_WKUP_IOPAD(0x0, PIN_OUTPUT, 1) /* (B6) 
MCU_OSPI0_CLK.MCU_HYPERBUS0_CK */
+   J721E_WKUP_IOPAD(0x4, PIN_OUTPUT, 1) /* (C8) 
MCU_OSPI0_LBCLKO.MCU_HYPERBUS0_CKn */
+   J721E_WKUP_IOPAD(0x2c, PIN_OUTPUT, 1) /* (D6) 
MCU_OSPI0_CSn0.MCU_HYPERBUS0_CSn0 */
+   J721E_WKUP_IOPAD(0x30, PIN_OUTPUT, 1) /* (D7) 
MCU_OSPI0_CSn1.MCU_HYPERBUS0_RESETn */
+   J721E_WKUP_IOPAD(0x8, PIN_INPUT, 1) /* (B7) 
MCU_OSPI0_DQS.MCU_HYPERBUS0_RWDS */
+   J721E_WKUP_IOPAD(0xc, PIN_INPUT, 1) /* (D8) 
MCU_OSPI0_D0.MCU_HYPERBUS0_DQ0 */
+   J721E_WKUP_IOPAD(0x10, PIN_INPUT, 1) /* (C7) 
MCU_OSPI0_D1.MCU_HYPERBUS0_DQ1 */
+   J721E_WKUP_IOPAD(0x14, PIN_INPUT, 1) /* (C5) 
MCU_OSPI0_D2.MCU_HYPERBUS0_DQ2 */
+   J721E_WKUP_IOPAD(0x18, PIN_INPUT, 1) /* (A5) 
MCU_OSPI0_D3.MCU_HYPERBUS0_DQ3 */
+   J721E_WKUP_IOPAD(0x1c, PIN_INPUT, 1) /* (A6) 
MCU_OSPI0_D4.MCU_HYPERBUS0_DQ4 */
+   J721E_WKUP_IOPAD(0x20, PIN_INPUT, 1) /* (B8) 
MCU_OSPI0_D5.MCU_HYPERBUS0_DQ5 */
+   J721E_WKUP_IOPAD(0x24, PIN_INPUT, 1) /* (A8) 
MCU_OSPI0_D6.MCU_HYPERBUS0_DQ6 */
+   J721E_WKUP_IOPAD(0x28, PIN_INPUT, 1) /* (A7) 
MCU_OSPI0_D7.MCU_HYPERBUS0_DQ7 */
+   >;
+   };
+};
+
+ {
+   /* OSPI and HBMC are muxed inside FSS, Bootloader will enable
+* appropriate node based on board detection
+*/
+   status = "disabled";
+   pinctrl-names = "default";
+   pinctrl-0 = <_fss0_hpb0_pins_default>;
+   ranges = <0x0 0x0 0x5 0x 0x400>, /* 64MB Flash on CS0 */
+<0x1 0x0 0x5 0x0400 0x80>; /* 8MB RAM on CS1 */
+
+   flash@0,0 {
+   compatible = "cypress,hyperflash", "cfi-flash";
+   reg = <0x0 0x0 0x400>;
+   };
+};
-- 
2.28.0



Re: [PATCH] drm/virtio: fix memory leak in virtio_gpu_cleanup_object()

2020-08-07 Thread Gerd Hoffmann
On Wed, Jul 22, 2020 at 01:18:51PM +0800, Xin He wrote:
> Before setting shmem->pages to NULL, kfree() should
> be called.

>   sg_free_table(shmem->pages);
> + kfree(shmem->pages);
>   shmem->pages = NULL;

Pushed to drm-misc-fixes.

thanks,
  Gerd



Re: splice: infinite busy loop lockup bug

2020-08-07 Thread Al Viro
On Fri, Aug 07, 2020 at 01:27:27PM +0100, Al Viro wrote:
> On Fri, Aug 07, 2020 at 07:35:08PM +0900, Tetsuo Handa wrote:
> > syzbot is reporting hung task at pipe_release() [1], for for_each_bvec() 
> > from
> > iterate_bvec() from iterate_all_kinds() from iov_iter_alignment() from
> > ext4_unaligned_io() from ext4_dio_write_iter() from ext4_file_write_iter() 
> > from
> > call_write_iter() from do_iter_readv_writev() from do_iter_write() from
> > vfs_iter_write() from iter_file_splice_write() falls into infinite busy loop
> > with pipe->mutex held.
> > 
> > The reason of falling into infinite busy loop is that 
> > iter_file_splice_write()
> > for some reason generates "struct bio_vec" entry with .bv_len=0 and 
> > .bv_offset=0
> > while for_each_bvec() cannot handle .bv_len == 0.
> 
> broken in 1bdc76aea115 "iov_iter: use bvec iterator to implement 
> iterate_bvec()",
> unless I'm misreading it...
> 
> Zero-length segments are not disallowed; it's not all that hard to filter them
> out in iter_file_splice_write(), but the intent had always been to have
> iterate_all_kinds() et.al. able to cope with those.
> 
> How are these pipe_buffers with ->len == 0 generated in that reproducer, BTW?
> There might be something else fishy going on...

FWIW, my preference would be to have for_each_bvec() advance past zero-length
segments; I'll need to go through its uses elsewhere in the tree first, though
(after I grab some sleep),


Re: [PATCH RFC v2 02/18] irq/dev-msi: Add support for a new DEV_MSI irq domain

2020-08-07 Thread gre...@linuxfoundation.org
On Fri, Aug 07, 2020 at 09:06:50AM -0300, Jason Gunthorpe wrote:
> On Thu, Aug 06, 2020 at 10:21:11PM +0200, Thomas Gleixner wrote:
> 
> > Optionally? Please tell the hardware folks to make this mandatory. We
> > have enough pain with non maskable MSI interrupts already so introducing
> > yet another non maskable interrupt trainwreck is not an option.
> 
> Can you elaborate on the flows where Linux will need to trigger
> masking?
> 
> I expect that masking will be available in our NIC HW too - but it
> will require a spin loop if masking has to be done in an atomic
> context.
> 
> > It's more than a decade now that I tell HW people not to repeat the
> > non-maskable MSI failure, but obviously they still think that
> > non-maskable interrupts are a brilliant idea. I know that HW folks
> > believe that everything they omit can be fixed in software, but they
> > have to finally understand that this particular issue _cannot_ be fixed
> > at all.
> 
> Sure, the CPU should always be able to shut off an interrupt!
> 
> Maybe explaining the goals would help understand the HW perspective.
> 
> Today HW can process > 100k queues of work at once. Interrupt delivery
> works by having a MSI index in each queue's metadata and the interrupt
> indirects through a MSI-X table on-chip which has the
> addr/data/mask/etc.
> 
> What IMS proposes is that the interrupt data can move into the queue
> meta data (which is not required to be on-chip), eg along side the
> producer/consumer pointers, and the central MSI-X table is not
> needed. This is necessary because the PCI spec has very harsh design
> requirements for a MSI-X table that make scaling it prohibitive.
> 
> So an IRQ can be silenced by deleting or stopping the queue(s)
> triggering it. It can be masked by including masking in the queue
> metadata. We can detect pending by checking the producer/consumer
> values.
> 
> However synchronizing all the HW and all the state is now more
> complicated than just writing a mask bit via MMIO to an on-die memory.

Because doing all of the work that used to be done in HW in software is
so much faster and scalable?  Feels really wrong to me :(

Do you all have a pointer to the spec for this newly proposed stuff
anywhere to try to figure out how the HW wants this to all work?

thanks,

greg k-h


Re: splice: infinite busy loop lockup bug

2020-08-07 Thread Tetsuo Handa
On 2020/08/07 21:27, Al Viro wrote:
> On Fri, Aug 07, 2020 at 07:35:08PM +0900, Tetsuo Handa wrote:
>> syzbot is reporting hung task at pipe_release() [1], for for_each_bvec() from
>> iterate_bvec() from iterate_all_kinds() from iov_iter_alignment() from
>> ext4_unaligned_io() from ext4_dio_write_iter() from ext4_file_write_iter() 
>> from
>> call_write_iter() from do_iter_readv_writev() from do_iter_write() from
>> vfs_iter_write() from iter_file_splice_write() falls into infinite busy loop
>> with pipe->mutex held.
>>
>> The reason of falling into infinite busy loop is that 
>> iter_file_splice_write()
>> for some reason generates "struct bio_vec" entry with .bv_len=0 and 
>> .bv_offset=0
>> while for_each_bvec() cannot handle .bv_len == 0.
> 
> broken in 1bdc76aea115 "iov_iter: use bvec iterator to implement 
> iterate_bvec()",
> unless I'm misreading it...
> 
> Zero-length segments are not disallowed; it's not all that hard to filter them
> out in iter_file_splice_write(), but the intent had always been to have
> iterate_all_kinds() et.al. able to cope with those.
> 
> How are these pipe_buffers with ->len == 0 generated in that reproducer, BTW?
> There might be something else fishy going on...
> 

OK. Indeed writing to empty pipe which returns -EFAULT allows an empty
page to be linked to pipe's array.

Now, I've just found a simple reproducer, and confirmed that this bug is
a local lockup DoS by unprivileged user. Please fix.

--
#define _GNU_SOURCE
#include 
#include 
#include 
#include 
#include 

int main(int argc, char *argv[])
{
static char buffer[4096];
const int fd = open("/tmp/testfile", O_WRONLY | O_CREAT, 0600);
int pipe_fd[2] = { EOF, EOF };
pipe(pipe_fd);
write(pipe_fd[1], NULL, 4096);
write(pipe_fd[1], buffer, 4096);
splice(pipe_fd[0], NULL, fd, NULL, 65536, 0);
return 0;
}
--

[  125.598898][C0] rcu: INFO: rcu_sched self-detected stall on CPU
[  125.601072][C0] rcu: 0-: (20171 ticks this GP) 
idle=526/1/0x4000 softirq=7918/7918 fqs=5136 
[  125.604874][C0]  (t=21006 jiffies g=9341 q=30)
[  125.606512][C0] NMI backtrace for cpu 0
[  125.607931][C0] CPU: 0 PID: 2792 Comm: a.out Not tainted 5.8.0+ #793
[  125.610948][C0] Hardware name: VMware, Inc. VMware Virtual 
Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020
[  125.614938][C0] Call Trace:
[  125.616049][C0]  
[  125.617010][C0]  dump_stack+0x5e/0x7a
[  125.618370][C0]  nmi_cpu_backtrace.cold.7+0x14/0x52
[  125.620148][C0]  ? lapic_can_unplug_cpu.cold.39+0x3a/0x3a
[  125.622074][C0]  nmi_trigger_cpumask_backtrace+0x92/0x9f
[  125.624154][C0]  arch_trigger_cpumask_backtrace+0x14/0x20
[  125.626102][C0]  rcu_dump_cpu_stacks+0xa0/0xd0
[  125.627919][C0]  rcu_sched_clock_irq.cold.95+0x121/0x39c
[  125.629833][C0]  ? acct_account_cputime+0x17/0x20
[  125.631534][C0]  ? account_system_index_time+0x8a/0xa0
[  125.633422][C0]  update_process_times+0x23/0x60
[  125.635070][C0]  tick_sched_handle.isra.22+0x20/0x60
[  125.636870][C0]  tick_sched_timer+0x68/0x80
[  125.638403][C0]  ? tick_sched_handle.isra.22+0x60/0x60
[  125.640588][C0]  __hrtimer_run_queues+0xf9/0x1a0
[  125.642591][C0]  hrtimer_interrupt+0xfc/0x210
[  125.645033][C0]  __sysvec_apic_timer_interrupt+0x4c/0x60
[  125.647292][C0]  asm_call_on_stack+0xf/0x20
[  125.649192][C0]  
[  125.650501][C0]  sysvec_apic_timer_interrupt+0x75/0x80
[  125.652900][C0]  asm_sysvec_apic_timer_interrupt+0x12/0x20
[  125.655487][C0] RIP: 0010:iov_iter_copy_from_user_atomic+0x19b/0x350
[  125.658124][C0] Code: 89 45 d0 48 c1 e6 06 48 03 37 4d 8d 3c 09 4c 89 cf 
e8 d9 e5 ff ff 48 8b 45 d0 45 39 eb 0f 87 35 01 00 00 49 8b 4a 18 4d 89 f9 <45> 
29 dd 45 01 d8 75 12 eb 19 41 83 c4 01 41 29 c0 74 10 44 89 e0
[  125.666132][C0] RSP: 0018:a6cdc1237aa8 EFLAGS: 0246
[  125.668557][C0] RAX:  RBX: 1000 RCX: 
945035a25100
[  125.671576][C0] RDX:  RSI:  RDI: 
945035a25100
[  125.674851][C0] RBP: a6cdc1237ad8 R08:  R09: 
945028a8
[  125.677989][C0] R10: a6cdc1237de0 R11:  R12: 

[  125.680990][C0] R13: 1000 R14: 1000 R15: 
1000
[  125.684006][C0]  iomap_write_actor+0xbe/0x190
[  125.685982][C0]  ? iomap_write_begin+0x460/0x460
[  125.688031][C0]  iomap_apply+0xf4/0x1a0
[  125.689810][C0]  ? iomap_write_begin+0x460/0x460
[  125.691826][C0]  iomap_file_buffered_write+0x69/0x90
[  125.698598][C0]  ? iomap_write_begin+0x460/0x460
[  125.705341][C0]  xfs_file_buffered_aio_write+0xc2/0x2c0
[  125.707780][C0]  xfs_file_write_iter+0xa3/0xc0
[  125.709802][C0]  do_iter_readv_writev+0x15b/0x1c0
[  125.712496][C0]  do_iter_write+0x81/0x190
[  125.715245][C0] 

Re: [PATCH] perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set

2020-08-07 Thread Arnaldo Carvalho de Melo
Em Fri, Aug 07, 2020 at 09:16:29AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Aug 07, 2020 at 09:09:56AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Aug 07, 2020 at 09:08:24AM -0300, Arnaldo Carvalho de Melo escreveu:
> > > Em Thu, Aug 06, 2020 at 09:43:57PM +0200, Jiri Olsa escreveu:
> > > > On Wed, Aug 05, 2020 at 10:29:37AM +0800, Jin Yao wrote:
> > > > > We received an error report that perf-record caused 'Segmentation 
> > > > > fault'
> > > > > on a newly system (e.g. on the new installed ubuntu).
> > > > > 
> > > > >  (gdb) backtrace
> > > > >  #0  __read_once_size (size=4, res=, p=0x14) at 
> > > > > /root/0-jinyao/acme/tools/include/linux/compiler.h:139
> > > > >  #1  atomic_read (v=0x14) at 
> > > > > /root/0-jinyao/acme/tools/include/asm/../../arch/x86/include/asm/atomic.h:28
> > > > >  #2  refcount_read (r=0x14) at 
> > > > > /root/0-jinyao/acme/tools/include/linux/refcount.h:65
> > > > >  #3  perf_mmap__read_init (map=map@entry=0x0) at mmap.c:177
> > > > >  #4  0x561ce5c0de39 in perf_evlist__poll_thread 
> > > > > (arg=0x561ce68584d0) at util/sideband_evlist.c:62
> > > > >  #5  0x7fad78491609 in start_thread (arg=) at 
> > > > > pthread_create.c:477
> > > > >  #6  0x7fad7823c103 in clone () at 
> > > > > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> > > > 
> > > > hum, I recall discussing the same issue,
> > > > I thought it was already fixed :-\ in any case:
> > > > 
> > > > Acked-by: Jiri Olsa 
> > > 
> > > I added this:
> > > 
> > > Fixes: 23cbb41c939a ("perf record: Move side band evlist setup to 
> > > separate routine")
> > > 
> > > To help this fixe go back to a few stable kernels. Technically the
> > > problem was introduced much earlier, when the side band thread was
> > > added, but then this would require more cherry-picking of patches and
> > > since we have a workaround, i.e. build with libbpf, I think this is
> > > enough.
> > 
> > I backtrack on that, as the sideband event is not just for BPF... The
> > switch-output-event code uses it as well, so it can't be dependent on
> > LIBBPF being built...
> > 
> > I'll see what I can do
> 
> So this is the change I made, the side band thread is may have been
> already created by 
> 
> [acme@quaco ~]$ perf record -h switch-output-event
> 
>  Usage: perf record [] []
> or: perf record [] --  []
> 
> --switch-output-event 
>   switch output event selector. use 'perf list' to 
> list available events
> 
> [acme@quaco ~]$
> 
> I'm doing some extra checking now on your report, and the patch below
> has skews because it clashed with the clockid patches by Jiri so I had
> to resolve its merge.
 
Sorry, I resent your patch, doh, here is the end result, with my change,
its ok to call perf_evlist__start_sb_thread() with a NULL evlist, it'll
just return 0.

- Arnald


commit b13536a7e93680625094beb18cdce4ae47a3dbfb
Author: Jin Yao 
Date:   Wed Aug 5 10:29:37 2020 +0800

perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set

We received an error report that perf-record caused 'Segmentation fault'
on a newly system (e.g. on the new installed ubuntu).

  (gdb) backtrace
  #0  __read_once_size (size=4, res=, p=0x14) at 
/root/0-jinyao/acme/tools/include/linux/compiler.h:139
  #1  atomic_read (v=0x14) at 
/root/0-jinyao/acme/tools/include/asm/../../arch/x86/include/asm/atomic.h:28
  #2  refcount_read (r=0x14) at 
/root/0-jinyao/acme/tools/include/linux/refcount.h:65
  #3  perf_mmap__read_init (map=map@entry=0x0) at mmap.c:177
  #4  0x561ce5c0de39 in perf_evlist__poll_thread (arg=0x561ce68584d0) 
at util/sideband_evlist.c:62
  #5  0x7fad78491609 in start_thread (arg=) at 
pthread_create.c:477
  #6  0x7fad7823c103 in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

The root cause is, evlist__add_bpf_sb_event() just returns 0 if
HAVE_LIBBPF_SUPPORT is not defined (inline function path). So it will
not create a valid evsel for side-band event.

But perf-record still creates BPF side band thread to process the
side-band event, then the error happpens.

We can reproduce this issue by removing the libelf-dev. e.g.
1. apt-get remove libelf-dev
2. perf record -a -- sleep 1

  root@test:~# ./perf record -a -- sleep 1
  perf: Segmentation fault
  Obtained 6 stack frames.
  ./perf(+0x28eee8) [0x5562d6ef6ee8]
  /lib/x86_64-linux-gnu/libc.so.6(+0x46210) [0x7fbfdc65f210]
  ./perf(+0x342e74) [0x5562d6faae74]
  ./perf(+0x257e39) [0x5562d6ebfe39]
  /lib/x86_64-linux-gnu/libpthread.so.0(+0x9609) [0x7fbfdc990609]
  /lib/x86_64-linux-gnu/libc.so.6(clone+0x43) [0x7fbfdc73b103]
  Segmentation fault (core dumped)

To fix this issue,

1. We either install the missing libraries to let HAVE_LIBBPF_SUPPORT
   be defined.
   e.g. apt-get install libelf-dev and install other related libraries.

2. Use 

drivers/video/fbdev/sstfb.c:337:23: sparse: sparse: incorrect type in argument 1 (different address spaces)

2020-08-07 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   86cfccb66937dd6cbf26ed619958b9e587e6a115
commit: 670d0a4b10704667765f7d18f7592993d02783aa sparse: use identifiers to 
define address spaces
date:   7 weeks ago
config: s390-randconfig-s031-20200807 (attached as .config)
compiler: s390-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.2-118-ge1578773-dirty
git checkout 670d0a4b10704667765f7d18f7592993d02783aa
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=s390 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)

>> drivers/video/fbdev/sstfb.c:337:23: sparse: sparse: incorrect type in 
>> argument 1 (different address spaces) @@ expected void *p @@ got 
>> char [noderef] __iomem *screen_base @@
   drivers/video/fbdev/sstfb.c:337:23: sparse: expected void *p
   drivers/video/fbdev/sstfb.c:337:23: sparse: got char [noderef] __iomem 
*screen_base
   drivers/video/fbdev/sstfb.c: note: in included file (through 
arch/s390/include/asm/io.h, include/linux/fb.h):
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-generic/io.h:225:22: sparse: expected unsigned int 
[usertype] val
   include/asm-generic/io.h:225:22: sparse: got restricted __le32 [usertype]
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:179:15: sparse: sparse: cast to restricted __le32
   include/asm-generic/io.h:225:22: sparse: sparse: incorrect type in argument 
1 (different base types) @@ expected unsigned int [usertype] val @@ got 
restricted __le32 [usertype] @@
   include/asm-gen

Re: [PATCH v2 net-next] net/tls: allow MSG_CMSG_COMPAT in sendmsg

2020-08-07 Thread Rouven Czerwinski
On Fri, 2020-08-07 at 10:26 +0200, Rouven Czerwinski wrote:
> On Thu, 2020-08-06 at 11:46 -0700, Jakub Kicinski wrote:
> > On Thu,  6 Aug 2020 08:49:06 +0200 Rouven Czerwinski wrote:
> > > Trying to use ktls on a system with 32-bit userspace and 64-bit
> > > kernel
> > > results in a EOPNOTSUPP message during sendmsg:
> > > 
> > >   setsockopt(3, SOL_TLS, TLS_TX, …, 40) = 0
> > >   sendmsg(3, …, msg_flags=0}, 0) = -1 EOPNOTSUPP (Operation not
> > > supported)
> > > 
> > > The tls_sw implementation does strict flag checking and does not
> > > allow
> > > the MSG_CMSG_COMPAT flag, which is set if the message comes in
> > > through
> > > the compat syscall.
> > > 
> > > This patch adds MSG_CMSG_COMPAT to the flag check to allow the
> > > usage of
> > > the TLS SW implementation on systems using the compat syscall
> > > path.
> > > 
> > > Note that the same check is present in the sendmsg path for the
> > > TLS
> > > device implementation, however the flag hasn't been added there
> > > for
> > > lack
> > > of testing hardware.
> > > 
> > > Signed-off-by: Rouven Czerwinski 
> > 
> > I don't know much about the compat stuff, I trust our cmsg handling
> > is
> > fine?
> > 
> > Just to be sure - did you run tools/testing/selftests/net/tls ?
> 
> After some pains to get this to correctly compile I have two failing
> tests, both for multi_chunk_sendfile:
> 
> root@192:~ /usr/lib/kselftest/net/tls
> [==] Running 93 tests from 4 test cases.
> …
> [ RUN  ] tls.12.multi_chunk_sendfile
> multi_chunk_sendfile: Test terminated by timeout
> [ FAIL ] tls.12.multi_chunk_sendfile
> …
> [ RUN  ] tls.13.multi_chunk_sendfile
> multi_chunk_sendfile: Test terminated by timeout
> [ FAIL ] tls.13.multi_chunk_sendfile
> …
> [==] 91 / 93 tests passed.
> [  FAILED  ]

I just tested on my x86_64 workstation and these specific tests fail
there too, do they only work on 5.8? They were added in 5.8, but I am
running 5.7.11 here. It looks like these failures are not
MSG_CMSG_COMPAT related.

Pooja Trivedi do you have an idea?

Regards,
Rouven



Re: splice: infinite busy loop lockup bug

2020-08-07 Thread Al Viro
On Fri, Aug 07, 2020 at 07:35:08PM +0900, Tetsuo Handa wrote:
> syzbot is reporting hung task at pipe_release() [1], for for_each_bvec() from
> iterate_bvec() from iterate_all_kinds() from iov_iter_alignment() from
> ext4_unaligned_io() from ext4_dio_write_iter() from ext4_file_write_iter() 
> from
> call_write_iter() from do_iter_readv_writev() from do_iter_write() from
> vfs_iter_write() from iter_file_splice_write() falls into infinite busy loop
> with pipe->mutex held.
> 
> The reason of falling into infinite busy loop is that iter_file_splice_write()
> for some reason generates "struct bio_vec" entry with .bv_len=0 and 
> .bv_offset=0
> while for_each_bvec() cannot handle .bv_len == 0.

broken in 1bdc76aea115 "iov_iter: use bvec iterator to implement 
iterate_bvec()",
unless I'm misreading it...

Zero-length segments are not disallowed; it's not all that hard to filter them
out in iter_file_splice_write(), but the intent had always been to have
iterate_all_kinds() et.al. able to cope with those.

How are these pipe_buffers with ->len == 0 generated in that reproducer, BTW?
There might be something else fishy going on...


Re: [PATCH v3 1/3] driver core: Revert default driver_deferred_probe_timeout value to 0

2020-08-07 Thread Thierry Reding
On Fri, Aug 07, 2020 at 01:02:44PM +0200, Thierry Reding wrote:
> On Thu, Aug 06, 2020 at 07:09:16PM -0700, John Stultz wrote:
> > On Thu, Aug 6, 2020 at 6:52 AM Thierry Reding  
> > wrote:
> > >
> > > On Wed, Apr 22, 2020 at 08:32:43PM +, John Stultz wrote:
> > > > This patch addresses a regression in 5.7-rc1+
> > > >
> > > > In commit c8c43cee29f6 ("driver core: Fix
> > > > driver_deferred_probe_check_state() logic"), we both cleaned up
> > > > the logic and also set the default driver_deferred_probe_timeout
> > > > value to 30 seconds to allow for drivers that are missing
> > > > dependencies to have some time so that the dependency may be
> > > > loaded from userland after initcalls_done is set.
> > > >
> > > > However, Yoshihiro Shimoda reported that on his device that
> > > > expects to have unmet dependencies (due to "optional links" in
> > > > its devicetree), was failing to mount the NFS root.
> > > >
> > > > In digging further, it seemed the problem was that while the
> > > > device properly probes after waiting 30 seconds for any missing
> > > > modules to load, the ip_auto_config() had already failed,
> > > > resulting in NFS to fail. This was due to ip_auto_config()
> > > > calling wait_for_device_probe() which doesn't wait for the
> > > > driver_deferred_probe_timeout to fire.
> > > >
> > > > Fixing that issue is possible, but could also introduce 30
> > > > second delays in bootups for users who don't have any
> > > > missing dependencies, which is not ideal.
> > > >
> > > > So I think the best solution to avoid any regressions is to
> > > > revert back to a default timeout value of zero, and allow
> > > > systems that need to utilize the timeout in order for userland
> > > > to load any modules that supply misisng dependencies in the dts
> > > > to specify the timeout length via the exiting documented boot
> > > > argument.
> > > >
> > > > Thanks to Geert for chasing down that ip_auto_config was why NFS
> > > > was failing in this case!
> > > >
> > > > Cc: "David S. Miller" 
> > > > Cc: Alexey Kuznetsov 
> > > > Cc: Hideaki YOSHIFUJI 
> > > > Cc: Jakub Kicinski 
> > > > Cc: Greg Kroah-Hartman 
> > > > Cc: Rafael J. Wysocki 
> > > > Cc: Rob Herring 
> > > > Cc: Geert Uytterhoeven 
> > > > Cc: Yoshihiro Shimoda 
> > > > Cc: Robin Murphy 
> > > > Cc: Andy Shevchenko 
> > > > Cc: Sudeep Holla 
> > > > Cc: Andy Shevchenko 
> > > > Cc: Naresh Kamboju 
> > > > Cc: Basil Eljuse 
> > > > Cc: Ferry Toth 
> > > > Cc: Arnd Bergmann 
> > > > Cc: Anders Roxell 
> > > > Cc: netdev 
> > > > Cc: linux...@vger.kernel.org
> > > > Reported-by: Yoshihiro Shimoda 
> > > > Tested-by: Yoshihiro Shimoda 
> > > > Fixes: c8c43cee29f6 ("driver core: Fix 
> > > > driver_deferred_probe_check_state() logic")
> > > > Signed-off-by: John Stultz 
> > > > ---
> > > >  drivers/base/dd.c | 13 ++---
> > > >  1 file changed, 2 insertions(+), 11 deletions(-)
> > >
> > > Sorry for being a bit late to the party, but this breaks suspend/resume
> > > support on various Tegra devices. I've only noticed now because, well,
> > > suspend/resume have been broken for other reasons for a little while and
> > > it's taken us a bit to resolve those issues.
> > >
> > > But now that those other issues have been fixed, I've started seeing an
> > > issue where after resume from suspend some of the I2C controllers are no
> > > longer working. The reason for this is that they share pins with DP AUX
> > > controllers via the pinctrl framework. The DP AUX driver registers as
> > > part of the DRM/KMS driver, which usually happens in userspace. Since
> > > the deferred probe timeout was set to 0 by default this no longer works
> > > because no pinctrl states are assigned to the I2C controller and
> > > therefore upon resume the pins cannot be configured for I2C operation.
> > 
> > Oof. My apologies!
> > 
> > > I'm also somewhat confused by this patch and a few before because they
> > > claim that they restore previous default behaviour, but that's just not
> > > true. Originally when this timeout was introduced it was -1, which meant
> > > that there was no timeout at all and hence users had to opt-in if they
> > > wanted to use a deferred probe timeout.
> > 
> > I don't think that's quite true, since the point of my original
> > changes were to avoid troubles I was seeing with drivers not loading
> > because once the timeout fired after init, driver loading would fail
> > with ENODEV instead of returning EPROBE_DEFER. The logic that existed
> > was buggy so the timeout handling didn't really work (changing the
> > boot argument wouldn't help, because after init the logic would return
> > ENODEV before it checked the timeout value).
> > 
> > That said, looking at it now, I do realize the
> > driver_deferred_probe_check_state_continue() logic in effect never
> > returned ETIMEDOUT before was consolidated in the earlier changes, and
> > now we've backed the default timeout to 0, old user (see bec6c0ecb243)
> > will now get ETIMEDOUT where 

Re: [PATCH RFC v2 00/18] Add VFIO mediated device support and DEV-MSI support for the idxd driver

2020-08-07 Thread Jason Gunthorpe
On Wed, Aug 05, 2020 at 07:22:58PM -0600, Alex Williamson wrote:

> If you see this as an abuse of the framework, then let's identify those
> specific issues and come up with a better approach.  As we've discussed
> before, things like basic PCI config space emulation are acceptable
> overhead and low risk (imo) and some degree of register emulation is
> well within the territory of an mdev driver.  

What troubles me is that idxd already has a direct userspace interface
to its HW, and does userspace DMA. The purpose of this mdev is to
provide a second direct userspace interface that is a little different
and trivially plugs into the virtualization stack.

I don't think VFIO should be the only entry point to
virtualization. If we say the universe of devices doing user space DMA
must also implement a VFIO mdev to plug into virtualization then it
will be alot of mdevs.

I would prefer to see that the existing userspace interface have the
extra needed bits for virtualization (eg by having appropriate
internal kernel APIs to make this easy) and all the emulation to build
the synthetic PCI device be done in userspace.

Not only is it better for security, it keeps things to one device
driver per device..

Jason


KASAN: use-after-free Read in service_outstanding_interrupt

2020-08-07 Thread syzbot
Hello,

syzbot found the following issue on:

HEAD commit:7b4ea945 Revert "x86/mm/64: Do not sync vmalloc/ioremap ma..
git tree:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git 
usb-testing
console output: https://syzkaller.appspot.com/x/log.txt?x=108adf3290
kernel config:  https://syzkaller.appspot.com/x/.config?x=72a84c46d0c668c
dashboard link: https://syzkaller.appspot.com/bug?extid=9e04e2df4a32fb661daf
compiler:   gcc (GCC) 10.1.0-syz 20200507

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+9e04e2df4a32fb661...@syzkaller.appspotmail.com

==
BUG: KASAN: use-after-free in usb_submit_urb+0x10c4/0x13e0 
drivers/usb/core/urb.c:368
Read of size 4 at addr 8881caa52018 by task syz-executor.3/13922

CPU: 1 PID: 13922 Comm: syz-executor.3 Not tainted 5.8.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xf6/0x16e lib/dump_stack.c:118
 print_address_description.constprop.0+0x1a/0x210 mm/kasan/report.c:383
 __kasan_report mm/kasan/report.c:513 [inline]
 kasan_report.cold+0x37/0x7c mm/kasan/report.c:530
 usb_submit_urb+0x10c4/0x13e0 drivers/usb/core/urb.c:368
 service_outstanding_interrupt.part.0+0x5f/0xa0 drivers/usb/class/cdc-wdm.c:463
 service_outstanding_interrupt drivers/usb/class/cdc-wdm.c:458 [inline]
 wdm_read+0x9a0/0xbd0 drivers/usb/class/cdc-wdm.c:576
 vfs_read+0x1df/0x520 fs/read_write.c:479
 ksys_read+0x12d/0x250 fs/read_write.c:607
 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45ce79
Code: 2d b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 
89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 
fb b5 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:7f8d2099fc78 EFLAGS: 0246 ORIG_RAX: 
RAX: ffda RBX: 000252c0 RCX: 0045ce79
RDX: 00f2 RSI: 2100 RDI: 0003
RBP: 0118bf60 R08:  R09: 
R10:  R11: 0246 R12: 0118bf2c
R13: 7ffd66f595ff R14: 7f8d209a09c0 R15: 0118bf2c

Allocated by task 4524:
 save_stack+0x1b/0x40 mm/kasan/common.c:48
 set_track mm/kasan/common.c:56 [inline]
 __kasan_kmalloc.constprop.0+0xc2/0xd0 mm/kasan/common.c:494
 kmalloc include/linux/slab.h:555 [inline]
 kzalloc include/linux/slab.h:669 [inline]
 usb_alloc_dev+0x51/0xf67 drivers/usb/core/usb.c:582
 hub_port_connect drivers/usb/core/hub.c:5114 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:5348 [inline]
 port_event drivers/usb/core/hub.c:5494 [inline]
 hub_event+0x1dff/0x4390 drivers/usb/core/hub.c:5576
 process_one_work+0x94c/0x15f0 kernel/workqueue.c:2269
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
 kthread+0x392/0x470 kernel/kthread.c:292
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

Freed by task 5477:
 save_stack+0x1b/0x40 mm/kasan/common.c:48
 set_track mm/kasan/common.c:56 [inline]
 kasan_set_free_info mm/kasan/common.c:316 [inline]
 __kasan_slab_free+0x116/0x160 mm/kasan/common.c:455
 slab_free_hook mm/slub.c:1474 [inline]
 slab_free_freelist_hook+0x53/0x140 mm/slub.c:1507
 slab_free mm/slub.c:3072 [inline]
 kfree+0xbc/0x2c0 mm/slub.c:4052
 device_release+0x71/0x200 drivers/base/core.c:1800
 kobject_cleanup lib/kobject.c:704 [inline]
 kobject_release lib/kobject.c:735 [inline]
 kref_put include/linux/kref.h:65 [inline]
 kobject_put+0x1c8/0x540 lib/kobject.c:752
 put_device+0x1b/0x30 drivers/base/core.c:3029
 hub_port_connect drivers/usb/core/hub.c:5059 [inline]
 hub_port_connect_change drivers/usb/core/hub.c:5348 [inline]
 port_event drivers/usb/core/hub.c:5494 [inline]
 hub_event+0x1c93/0x4390 drivers/usb/core/hub.c:5576
 process_one_work+0x94c/0x15f0 kernel/workqueue.c:2269
 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
 kthread+0x392/0x470 kernel/kthread.c:292
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

The buggy address belongs to the object at 8881caa52000
 which belongs to the cache kmalloc-2k of size 2048
The buggy address is located 24 bytes inside of
 2048-byte region [8881caa52000, 8881caa52800)
The buggy address belongs to the page:
page:ea00072a9400 refcount:1 mapcount:0 mapping: index:0x0 
head:ea00072a9400 order:3 compound_mapcount:0 compound_pincount:0
flags: 0x2010200(slab|head)
raw: 02010200  00010001 8881da00c000
raw:  00080008 0001 
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 8881caa51f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 8881caa51f80: fc fc fc fc fc fc fc fc fc 

drivers/net/dsa/sja1105/sja1105_main.c:3418:38: warning: address of array 'match->compatible' will always evaluate to 'true'

2020-08-07 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   86cfccb66937dd6cbf26ed619958b9e587e6a115
commit: 0b0e299720bb99428892a23ecbd2b4b7f61ccf6d net: dsa: sja1105: use 
detected device id instead of DT one on mismatch
date:   2 days ago
config: riscv-randconfig-r034-20200807 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
aa57cabae2fc5abc08ab3e17b45f2890fc7c9e42)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install riscv cross compiling tool for clang build
# apt-get install binutils-riscv64-linux-gnu
git checkout 0b0e299720bb99428892a23ecbd2b4b7f61ccf6d
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> drivers/net/dsa/sja1105/sja1105_main.c:3418:38: warning: address of array 
>> 'match->compatible' will always evaluate to 'true' 
>> [-Wpointer-bool-conversion]
   for (match = sja1105_dt_ids; match->compatible; match++) {
   ~~~  ~~~^~
   1 warning generated.

vim +3418 drivers/net/dsa/sja1105/sja1105_main.c

  3395  
  3396  static int sja1105_check_device_id(struct sja1105_private *priv)
  3397  {
  3398  const struct sja1105_regs *regs = priv->info->regs;
  3399  u8 prod_id[SJA1105_SIZE_DEVICE_ID] = {0};
  3400  struct device *dev = >spidev->dev;
  3401  const struct of_device_id *match;
  3402  u32 device_id;
  3403  u64 part_no;
  3404  int rc;
  3405  
  3406  rc = sja1105_xfer_u32(priv, SPI_READ, regs->device_id, 
_id,
  3407NULL);
  3408  if (rc < 0)
  3409  return rc;
  3410  
  3411  rc = sja1105_xfer_buf(priv, SPI_READ, regs->prod_id, prod_id,
  3412SJA1105_SIZE_DEVICE_ID);
  3413  if (rc < 0)
  3414  return rc;
  3415  
  3416  sja1105_unpack(prod_id, _no, 19, 4, 
SJA1105_SIZE_DEVICE_ID);
  3417  
> 3418  for (match = sja1105_dt_ids; match->compatible; match++) {
  3419  const struct sja1105_info *info = match->data;
  3420  
  3421  /* Is what's been probed in our match table at all? */
  3422  if (info->device_id != device_id || info->part_no != 
part_no)
  3423  continue;
  3424  
  3425  /* But is it what's in the device tree? */
  3426  if (priv->info->device_id != device_id ||
  3427  priv->info->part_no != part_no) {
  3428  dev_warn(dev, "Device tree specifies chip %s 
but found %s, please fix it!\n",
  3429   priv->info->name, info->name);
  3430  /* It isn't. No problem, pick that up. */
  3431  priv->info = info;
  3432  }
  3433  
  3434  return 0;
  3435  }
  3436  
  3437  dev_err(dev, "Unexpected {device ID, part number}: 0x%x 
0x%llx\n",
  3438  device_id, part_no);
  3439  
  3440  return -ENODEV;
  3441  }
  3442  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


Re: [PATCH] perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set

2020-08-07 Thread Arnaldo Carvalho de Melo
Em Fri, Aug 07, 2020 at 09:09:56AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Fri, Aug 07, 2020 at 09:08:24AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Thu, Aug 06, 2020 at 09:43:57PM +0200, Jiri Olsa escreveu:
> > > On Wed, Aug 05, 2020 at 10:29:37AM +0800, Jin Yao wrote:
> > > > We received an error report that perf-record caused 'Segmentation fault'
> > > > on a newly system (e.g. on the new installed ubuntu).
> > > > 
> > > >  (gdb) backtrace
> > > >  #0  __read_once_size (size=4, res=, p=0x14) at 
> > > > /root/0-jinyao/acme/tools/include/linux/compiler.h:139
> > > >  #1  atomic_read (v=0x14) at 
> > > > /root/0-jinyao/acme/tools/include/asm/../../arch/x86/include/asm/atomic.h:28
> > > >  #2  refcount_read (r=0x14) at 
> > > > /root/0-jinyao/acme/tools/include/linux/refcount.h:65
> > > >  #3  perf_mmap__read_init (map=map@entry=0x0) at mmap.c:177
> > > >  #4  0x561ce5c0de39 in perf_evlist__poll_thread 
> > > > (arg=0x561ce68584d0) at util/sideband_evlist.c:62
> > > >  #5  0x7fad78491609 in start_thread (arg=) at 
> > > > pthread_create.c:477
> > > >  #6  0x7fad7823c103 in clone () at 
> > > > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> > > 
> > > hum, I recall discussing the same issue,
> > > I thought it was already fixed :-\ in any case:
> > > 
> > > Acked-by: Jiri Olsa 
> > 
> > I added this:
> > 
> > Fixes: 23cbb41c939a ("perf record: Move side band evlist setup to separate 
> > routine")
> > 
> > To help this fixe go back to a few stable kernels. Technically the
> > problem was introduced much earlier, when the side band thread was
> > added, but then this would require more cherry-picking of patches and
> > since we have a workaround, i.e. build with libbpf, I think this is
> > enough.
> 
> I backtrack on that, as the sideband event is not just for BPF... The
> switch-output-event code uses it as well, so it can't be dependent on
> LIBBPF being built...
> 
> I'll see what I can do

So this is the change I made, the side band thread is may have been
already created by 

[acme@quaco ~]$ perf record -h switch-output-event

 Usage: perf record [] []
or: perf record [] --  []

--switch-output-event 
  switch output event selector. use 'perf list' to list 
available events

[acme@quaco ~]$

I'm doing some extra checking now on your report, and the patch below
has skews because it clashed with the clockid patches by Jiri so I had
to resolve its merge.

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7d97c8e9f7f9..c12b5b072519 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1511,6 +1511,7 @@ static int record__synthesize(struct record *rec, bool 
tail)
return err;
 }
 
+#ifdef HAVE_LIBBPF_SUPPORT
 static int record__process_signal_event(union perf_event *event 
__maybe_unused, void *data)
 {
struct record *rec = data;
@@ -1592,6 +1593,12 @@ static int record__init_clock(struct record *rec)
session->header.env.clock.clockid_ns = ref;
return 0;
 }
+#else
+static int record__setup_sb_evlist(struct record *rec __maybe_unused)
+{
+   return 0;
+}
+#endif
 
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {


Re: [PATCH v3 3/7] x86/xen: drop tests for highmem in pv code

2020-08-07 Thread kernel test robot
Hi Juergen,

I love your patch! Perhaps something to improve:

[auto build test WARNING on tip/x86/core]
[also build test WARNING on tip/x86/asm v5.8 next-20200806]
[cannot apply to xen-tip/linux-next tip/x86/vdso]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Juergen-Gross/Remove-32-bit-Xen-PV-guest-support/20200807-164058
base:   https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
ef2ff0f5d6008d325c9a068e20981c0d0acc4d6b
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
# save the attached .config to linux build tree
make W=1 ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   arch/x86/xen/enlighten_pv.c: In function 'set_aliased_prot':
>> arch/x86/xen/enlighten_pv.c:348:15: warning: variable 'page' set but not 
>> used [-Wunused-but-set-variable]
 348 |  struct page *page;
 |   ^~~~
   arch/x86/xen/enlighten_pv.c: At top level:
   arch/x86/xen/enlighten_pv.c:1198:34: warning: no previous prototype for 
'xen_start_kernel' [-Wmissing-prototypes]
1198 | asmlinkage __visible void __init xen_start_kernel(void)
 |  ^~~~

vim +/page +348 arch/x86/xen/enlighten_pv.c

e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  335  
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  336  /*
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  337   * Set the page permissions 
for a particular virtual address.  If the
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  338   * address is a vmalloc 
mapping (or other non-linear mapping), then
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  339   * find the linear mapping 
of the page and also set its protections to
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  340   * match.
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  341   */
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  342  static void 
set_aliased_prot(void *v, pgprot_t prot)
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  343  {
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  344   int level;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  345   pte_t *ptep;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  346   pte_t pte;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  347   unsigned long pfn;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14 @348   struct page *page;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  349   unsigned char dummy;
64aef3716eab524 Juergen Gross 2020-08-07  350   void *av;
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  351  
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  352   ptep = 
lookup_address((unsigned long)v, );
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  353   BUG_ON(ptep == NULL);
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  354  
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  355   pfn = pte_pfn(*ptep);
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  356   page = pfn_to_page(pfn);
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  357  
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  358   pte = pfn_pte(pfn, 
prot);
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  359  
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  360   /*
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  361* Careful: 
update_va_mapping() will fail if the virtual address
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  362* we're poking isn't 
populated in the page tables.  We don't
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  363* need to worry about 
the direct map (that's always in the page
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  364* tables), but we need 
to be careful about vmap space.  In
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  365* particular, the top 
level page table can lazily propagate
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  366* entries between 
processes, so if we've switched mms since we
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  367* vmapped the target 
in the first place, we might not have the
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  368* top-level page table 
entry populated.
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  369*
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  370* We disable 
preemption because we want the same mm active when
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  371* we probe the target 
and when we issue the hypercall.  We'll
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  372* have the same 
nominal mm, but if we're a kernel thread, lazy
e1dab14cf68d1e0 Vitaly Kuznetsov  2017-03-14  373* mm dropping could 
change our pgd.
e1dab14cf68d1e0 Vitaly Kuznetso

[PATCH] dt-bindings: lpspi: Add missing boolean type for fsl,spi-only-use-cs1-sel

2020-08-07 Thread Geert Uytterhoeven
When running "make dt_binding_check" (even if restricted to an unrelated
binding document using DT_SCHEMA_FILES=...):

Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml: ignoring, error 
in schema: properties: fsl,spi-only-use-cs1-sel
warning: no schema found in file: 
Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml

Fix this by adding a proper type definition for the vendor-specific
fsl,spi-only-use-cs1-sel property.

Fixes: 7ac9bbf6ab3085c2 ("dt-bindings: lpspi: New property in document DT 
bindings for LPSPI")
Suggested-by: Rob Herring 
Signed-off-by: Geert Uytterhoeven 
---
 Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml 
b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
index 22882e769e260f76..312d8fee9dbb8199 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.yaml
@@ -39,6 +39,7 @@ properties:
   spi common code does not support use of CS signals discontinuously.
   i.MX8DXL-EVK board only uses CS1 without using CS0. Therefore, add
   this property to re-config the chipselect value in the LPSPI driver.
+type: boolean
 
 required:
   - compatible
-- 
2.17.1



fs/erofs/zdata.c:198:22: sparse: sparse: non size-preserving integer to pointer cast

2020-08-07 Thread kernel test robot
Hi Gao,

First bad commit (maybe != root cause):

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   86cfccb66937dd6cbf26ed619958b9e587e6a115
commit: 47e4937a4a7ca4184fd282791dfee76c6799966a erofs: move erofs out of 
staging
date:   12 months ago
config: s390-randconfig-s032-20200807 (attached as .config)
compiler: s390-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.2-118-ge1578773-dirty
git checkout 47e4937a4a7ca4184fd282791dfee76c6799966a
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=s390 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)

>> fs/erofs/zdata.c:198:22: sparse: sparse: non size-preserving integer to 
>> pointer cast
   fs/erofs/zdata.c:282:22: sparse: sparse: non size-preserving integer to 
pointer cast
   fs/erofs/zdata.c:1094:24: sparse: sparse: non size-preserving integer to 
pointer cast

vim +198 fs/erofs/zdata.c

3883a79abd0227 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-07-26  162  
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  163  
static void preload_compressed_pages(struct z_erofs_collector *clt,
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  164  
 struct address_space *mc,
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  165  
 enum z_erofs_cache_alloctype type,
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  166  
 struct list_head *pagepool)
105d4ad857dcbf drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-07-26  167  {
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  168  
const struct z_erofs_pcluster *pcl = clt->pcl;
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  169  
const unsigned int clusterpages = BIT(pcl->clusterbits);
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  170  
struct page **pages = clt->compressedpages;
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  171  
pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  172  
bool standalone = true;
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  173  
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  174  
if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  175  
return;
105d4ad857dcbf drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-07-26  176  
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  177  
for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  178  
struct page *page;
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  179  
compressed_page_t t;
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  180  
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  181  
/* the compressed page was loaded before */
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  182  
if (READ_ONCE(*pages))
105d4ad857dcbf drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-07-26  183  
continue;
105d4ad857dcbf drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-07-26  184  
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  185  
page = find_get_page(mc, index);
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  186  
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  187  
if (page) {
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  188  
t = tag_compressed_page_justfound(page);
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  189  
} else if (type == DELAYEDALLOC) {
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  190  
t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  191  
} else {/* DONTALLOC */
92e6efd566c4a1 drivers/staging/erofs/unzip_vle.c Gao Xiang 2018-12-08  192  
if (standalone)
97e86a858bc360 drivers/staging/erofs/zdata.c Gao Xiang 2019-07-31  193  
   

Re: [PATCH] perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set

2020-08-07 Thread Arnaldo Carvalho de Melo
Em Fri, Aug 07, 2020 at 09:08:24AM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Aug 06, 2020 at 09:43:57PM +0200, Jiri Olsa escreveu:
> > On Wed, Aug 05, 2020 at 10:29:37AM +0800, Jin Yao wrote:
> > > We received an error report that perf-record caused 'Segmentation fault'
> > > on a newly system (e.g. on the new installed ubuntu).
> > > 
> > >  (gdb) backtrace
> > >  #0  __read_once_size (size=4, res=, p=0x14) at 
> > > /root/0-jinyao/acme/tools/include/linux/compiler.h:139
> > >  #1  atomic_read (v=0x14) at 
> > > /root/0-jinyao/acme/tools/include/asm/../../arch/x86/include/asm/atomic.h:28
> > >  #2  refcount_read (r=0x14) at 
> > > /root/0-jinyao/acme/tools/include/linux/refcount.h:65
> > >  #3  perf_mmap__read_init (map=map@entry=0x0) at mmap.c:177
> > >  #4  0x561ce5c0de39 in perf_evlist__poll_thread (arg=0x561ce68584d0) 
> > > at util/sideband_evlist.c:62
> > >  #5  0x7fad78491609 in start_thread (arg=) at 
> > > pthread_create.c:477
> > >  #6  0x7fad7823c103 in clone () at 
> > > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> > 
> > hum, I recall discussing the same issue,
> > I thought it was already fixed :-\ in any case:
> > 
> > Acked-by: Jiri Olsa 
> 
> I added this:
> 
> Fixes: 23cbb41c939a ("perf record: Move side band evlist setup to separate 
> routine")
> 
> To help this fixe go back to a few stable kernels. Technically the
> problem was introduced much earlier, when the side band thread was
> added, but then this would require more cherry-picking of patches and
> since we have a workaround, i.e. build with libbpf, I think this is
> enough.

I backtrack on that, as the sideband event is not just for BPF... The
switch-output-event code uses it as well, so it can't be dependent on
LIBBPF being built...

I'll see what I can do

- Arnaldo


Re: [PATCH] x86/paravirt: Add missing noinstr to arch_local*() helpers

2020-08-07 Thread Marco Elver
On Fri, 7 Aug 2020 at 14:04, Jürgen Groß  wrote:
>
> On 07.08.20 13:38, Marco Elver wrote:
> > On Fri, Aug 07, 2020 at 12:35PM +0200, Jürgen Groß wrote:
> >> On 07.08.20 11:50, Marco Elver wrote:
> >>> On Fri, Aug 07, 2020 at 11:24AM +0200, Jürgen Groß wrote:
>  On 07.08.20 11:01, Marco Elver wrote:
> > On Thu, 6 Aug 2020 at 18:06, Marco Elver  wrote:
> >> On Thu, 6 Aug 2020 at 15:17, Marco Elver  wrote:
> >>> On Thu, Aug 06, 2020 at 01:32PM +0200, pet...@infradead.org wrote:
>  On Thu, Aug 06, 2020 at 09:47:23AM +0200, Marco Elver wrote:
> > Testing my hypothesis that raw then nested non-raw
> > local_irq_save/restore() breaks IRQ state tracking -- see the 
> > reproducer
> > below. This is at least 1 case I can think of that we're bound to 
> > hit.
> >>> ...
> 
>  /me goes ponder things...
> 
>  How's something like this then?
> 
>  ---
>  include/linux/sched.h |  3 ---
>  kernel/kcsan/core.c   | 62 
>  ---
>  2 files changed, 44 insertions(+), 21 deletions(-)
> >>>
> >>> Thank you! That approach seems to pass syzbot (also with
> >>> CONFIG_PARAVIRT) and kcsan-test tests.
> >>>
> >>> I had to modify it some, so that report.c's use of the restore logic
> >>> works and not mess up the IRQ trace printed on KCSAN reports (with
> >>> CONFIG_KCSAN_VERBOSE).
> >>>
> >>> I still need to fully convince myself all is well now and we don't end
> >>> up with more fixes. :-) If it passes further testing, I'll send it as 
> >>> a
> >>> real patch (I want to add you as Co-developed-by, but would need your
> >>> Signed-off-by for the code you pasted, I think.)
> >
> > I let it run on syzbot through the night, and it's fine without
> > PARAVIRT (see below). I have sent the patch (need your Signed-off-by
> > as it's based on your code, thank you!):
> > https://lkml.kernel.org/r/20200807090031.3506555-1-el...@google.com
> >
> >> With CONFIG_PARAVIRT=y (without the notrace->noinstr patch), I still
> >> get lockdep DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled()), although
> >> it takes longer for syzbot to hit them. But I think that's expected
> >> because we can still get the recursion that I pointed out, and will
> >> need that patch.
> >
> > Never mind, I get these warnings even if I don't turn on KCSAN
> > (CONFIG_KCSAN=n). Something else is going on with PARAVIRT=y that
> > throws off IRQ state tracking. :-/
> 
>  What are the settings of CONFIG_PARAVIRT_XXL and
>  CONFIG_PARAVIRT_SPINLOCKS in this case?
> >>>
> >>> I attached a config.
> >>>
> >>> $> grep PARAVIRT .config
> >>> CONFIG_PARAVIRT=y
> >>> CONFIG_PARAVIRT_XXL=y
> >>> # CONFIG_PARAVIRT_DEBUG is not set
> >>> CONFIG_PARAVIRT_SPINLOCKS=y
> >>> # CONFIG_PARAVIRT_TIME_ACCOUNTING is not set
> >>> CONFIG_PARAVIRT_CLOCK=y
> >>
> >> Anything special I need to do to reproduce the problem? Or would you be
> >> willing to do some more rounds with different config settings?
> >
> > I can only test it with syzkaller, but that probably doesn't help if you
> > don't already have it set up. It can't seem to find a C reproducer.
> >
> > I did some more rounds with different configs.
> >
> >> I think CONFIG_PARAVIRT_XXL shouldn't matter, but I'm not completely
> >> sure about that. CONFIG_PARAVIRT_SPINLOCKS would be my primary suspect.
> >
> > Yes, PARAVIRT_XXL doesn't make a different. When disabling
> > PARAVIRT_SPINLOCKS, however, the warnings go away.
>
> Thanks for testing!
>
> I take it you are doing the tests in a KVM guest?

Yes, correct.

> If so I have a gut feeling that the use of local_irq_save() and
> local_irq_restore() in kvm_wait() might be fishy. I might be completely
> wrong here, though.

Happy to help debug more, although I might need patches or pointers
what to play with.

> BTW, I think Xen's variant of pv spinlocks is fine (no playing with IRQ
> on/off).
>
> Hyper-V seems to do the same as KVM, and kicking another vcpu could be
> problematic as well, as it is just using IPI.
>
>
> Juergen


Re: [PATCH] perf record: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set

2020-08-07 Thread Arnaldo Carvalho de Melo
Em Thu, Aug 06, 2020 at 09:43:57PM +0200, Jiri Olsa escreveu:
> On Wed, Aug 05, 2020 at 10:29:37AM +0800, Jin Yao wrote:
> > We received an error report that perf-record caused 'Segmentation fault'
> > on a newly system (e.g. on the new installed ubuntu).
> > 
> >  (gdb) backtrace
> >  #0  __read_once_size (size=4, res=, p=0x14) at 
> > /root/0-jinyao/acme/tools/include/linux/compiler.h:139
> >  #1  atomic_read (v=0x14) at 
> > /root/0-jinyao/acme/tools/include/asm/../../arch/x86/include/asm/atomic.h:28
> >  #2  refcount_read (r=0x14) at 
> > /root/0-jinyao/acme/tools/include/linux/refcount.h:65
> >  #3  perf_mmap__read_init (map=map@entry=0x0) at mmap.c:177
> >  #4  0x561ce5c0de39 in perf_evlist__poll_thread (arg=0x561ce68584d0) at 
> > util/sideband_evlist.c:62
> >  #5  0x7fad78491609 in start_thread (arg=) at 
> > pthread_create.c:477
> >  #6  0x7fad7823c103 in clone () at 
> > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> 
> hum, I recall discussing the same issue,
> I thought it was already fixed :-\ in any case:
> 
> Acked-by: Jiri Olsa 

I added this:

Fixes: 23cbb41c939a ("perf record: Move side band evlist setup to separate 
routine")

To help this fixe go back to a few stable kernels. Technically the
problem was introduced much earlier, when the side band thread was
added, but then this would require more cherry-picking of patches and
since we have a workaround, i.e. build with libbpf, I think this is
enough.

Applied,

- Arnaldo
 
> thanks,
> jirka
> 
> > 
> > The root cause is, evlist__add_bpf_sb_event() just returns 0 if
> > HAVE_LIBBPF_SUPPORT is not defined (inline function path). So it will
> > not create a valid evsel for side-band event.
> > 
> > But perf-record still creates BPF side band thread to process the
> > side-band event, then the error happpens.
> > 
> > We can reproduce this issue by removing the libelf-dev. e.g.
> > 1. apt-get remove libelf-dev
> > 2. perf record -a -- sleep 1
> > 
> > root@test:~# ./perf record -a -- sleep 1
> > perf: Segmentation fault
> > Obtained 6 stack frames.
> > ./perf(+0x28eee8) [0x5562d6ef6ee8]
> > /lib/x86_64-linux-gnu/libc.so.6(+0x46210) [0x7fbfdc65f210]
> > ./perf(+0x342e74) [0x5562d6faae74]
> > ./perf(+0x257e39) [0x5562d6ebfe39]
> > /lib/x86_64-linux-gnu/libpthread.so.0(+0x9609) [0x7fbfdc990609]
> > /lib/x86_64-linux-gnu/libc.so.6(clone+0x43) [0x7fbfdc73b103]
> > Segmentation fault (core dumped)
> > 
> > To fix this issue,
> > 
> > 1. We either install the missing libraries to let HAVE_LIBBPF_SUPPORT
> >be defined.
> >e.g. apt-get install libelf-dev and install other related libraries.
> > 
> > 2. Use this patch to skip the side-band event setup if HAVE_LIBBPF_SUPPORT
> >is not set.
> > 
> > Signed-off-by: Jin Yao 
> > ---
> >  tools/perf/builtin-record.c | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index b6bdccd875bc..ae97f98e2753 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -1506,6 +1506,7 @@ static int record__synthesize(struct record *rec, 
> > bool tail)
> > return err;
> >  }
> >  
> > +#ifdef HAVE_LIBBPF_SUPPORT
> >  static int record__process_signal_event(union perf_event *event 
> > __maybe_unused, void *data)
> >  {
> > struct record *rec = data;
> > @@ -1550,6 +1551,12 @@ static int record__setup_sb_evlist(struct record 
> > *rec)
> >  
> > return 0;
> >  }
> > +#else
> > +static int record__setup_sb_evlist(struct record *rec __maybe_unused)
> > +{
> > +   return 0;
> > +}
> > +#endif
> >  
> >  static int __cmd_record(struct record *rec, int argc, const char **argv)
> >  {
> > -- 
> > 2.17.1
> > 
> 

-- 

- Arnaldo


Re: [PATCH RFC v2 02/18] irq/dev-msi: Add support for a new DEV_MSI irq domain

2020-08-07 Thread Jason Gunthorpe
On Thu, Aug 06, 2020 at 10:21:11PM +0200, Thomas Gleixner wrote:

> Optionally? Please tell the hardware folks to make this mandatory. We
> have enough pain with non maskable MSI interrupts already so introducing
> yet another non maskable interrupt trainwreck is not an option.

Can you elaborate on the flows where Linux will need to trigger
masking?

I expect that masking will be available in our NIC HW too - but it
will require a spin loop if masking has to be done in an atomic
context.

> It's more than a decade now that I tell HW people not to repeat the
> non-maskable MSI failure, but obviously they still think that
> non-maskable interrupts are a brilliant idea. I know that HW folks
> believe that everything they omit can be fixed in software, but they
> have to finally understand that this particular issue _cannot_ be fixed
> at all.

Sure, the CPU should always be able to shut off an interrupt!

Maybe explaining the goals would help understand the HW perspective.

Today HW can process > 100k queues of work at once. Interrupt delivery
works by having a MSI index in each queue's metadata and the interrupt
indirects through a MSI-X table on-chip which has the
addr/data/mask/etc.

What IMS proposes is that the interrupt data can move into the queue
meta data (which is not required to be on-chip), eg along side the
producer/consumer pointers, and the central MSI-X table is not
needed. This is necessary because the PCI spec has very harsh design
requirements for a MSI-X table that make scaling it prohibitive.

So an IRQ can be silenced by deleting or stopping the queue(s)
triggering it. It can be masked by including masking in the queue
metadata. We can detect pending by checking the producer/consumer
values.

However synchronizing all the HW and all the state is now more
complicated than just writing a mask bit via MMIO to an on-die memory.

Jason


Re: [PATCH] x86/paravirt: Add missing noinstr to arch_local*() helpers

2020-08-07 Thread Jürgen Groß

On 07.08.20 13:38, Marco Elver wrote:

On Fri, Aug 07, 2020 at 12:35PM +0200, Jürgen Groß wrote:

On 07.08.20 11:50, Marco Elver wrote:

On Fri, Aug 07, 2020 at 11:24AM +0200, Jürgen Groß wrote:

On 07.08.20 11:01, Marco Elver wrote:

On Thu, 6 Aug 2020 at 18:06, Marco Elver  wrote:

On Thu, 6 Aug 2020 at 15:17, Marco Elver  wrote:

On Thu, Aug 06, 2020 at 01:32PM +0200, pet...@infradead.org wrote:

On Thu, Aug 06, 2020 at 09:47:23AM +0200, Marco Elver wrote:

Testing my hypothesis that raw then nested non-raw
local_irq_save/restore() breaks IRQ state tracking -- see the reproducer
below. This is at least 1 case I can think of that we're bound to hit.

...


/me goes ponder things...

How's something like this then?

---
include/linux/sched.h |  3 ---
kernel/kcsan/core.c   | 62 
---
2 files changed, 44 insertions(+), 21 deletions(-)


Thank you! That approach seems to pass syzbot (also with
CONFIG_PARAVIRT) and kcsan-test tests.

I had to modify it some, so that report.c's use of the restore logic
works and not mess up the IRQ trace printed on KCSAN reports (with
CONFIG_KCSAN_VERBOSE).

I still need to fully convince myself all is well now and we don't end
up with more fixes. :-) If it passes further testing, I'll send it as a
real patch (I want to add you as Co-developed-by, but would need your
Signed-off-by for the code you pasted, I think.)


I let it run on syzbot through the night, and it's fine without
PARAVIRT (see below). I have sent the patch (need your Signed-off-by
as it's based on your code, thank you!):
https://lkml.kernel.org/r/20200807090031.3506555-1-el...@google.com


With CONFIG_PARAVIRT=y (without the notrace->noinstr patch), I still
get lockdep DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled()), although
it takes longer for syzbot to hit them. But I think that's expected
because we can still get the recursion that I pointed out, and will
need that patch.


Never mind, I get these warnings even if I don't turn on KCSAN
(CONFIG_KCSAN=n). Something else is going on with PARAVIRT=y that
throws off IRQ state tracking. :-/


What are the settings of CONFIG_PARAVIRT_XXL and
CONFIG_PARAVIRT_SPINLOCKS in this case?


I attached a config.

$> grep PARAVIRT .config
CONFIG_PARAVIRT=y
CONFIG_PARAVIRT_XXL=y
# CONFIG_PARAVIRT_DEBUG is not set
CONFIG_PARAVIRT_SPINLOCKS=y
# CONFIG_PARAVIRT_TIME_ACCOUNTING is not set
CONFIG_PARAVIRT_CLOCK=y


Anything special I need to do to reproduce the problem? Or would you be
willing to do some more rounds with different config settings?


I can only test it with syzkaller, but that probably doesn't help if you
don't already have it set up. It can't seem to find a C reproducer.

I did some more rounds with different configs.


I think CONFIG_PARAVIRT_XXL shouldn't matter, but I'm not completely
sure about that. CONFIG_PARAVIRT_SPINLOCKS would be my primary suspect.


Yes, PARAVIRT_XXL doesn't make a different. When disabling
PARAVIRT_SPINLOCKS, however, the warnings go away.


Thanks for testing!

I take it you are doing the tests in a KVM guest?

If so I have a gut feeling that the use of local_irq_save() and
local_irq_restore() in kvm_wait() might be fishy. I might be completely
wrong here, though.

BTW, I think Xen's variant of pv spinlocks is fine (no playing with IRQ
on/off).

Hyper-V seems to do the same as KVM, and kicking another vcpu could be
problematic as well, as it is just using IPI.


Juergen


Re: [PATCH v3] drm/virtio: fix missing dma_fence_put() in virtio_gpu_execbuffer_ioctl()

2020-08-07 Thread Gerd Hoffmann
On Tue, Jul 21, 2020 at 06:16:47PM +0800, Xin He wrote:
> From: Qi Liu 
> 
> We should put the reference count of the fence after calling
> virtio_gpu_cmd_submit(). So add the missing dma_fence_put().

>   virtio_gpu_cmd_submit(vgdev, buf, exbuf->size,
> vfpriv->ctx_id, buflist, out_fence);
> + dma_fence_put(_fence->f);
>   virtio_gpu_notify(vgdev);

Pushed to drm-misc-fixes.

thanks,
  Gerd



Re: [PATCH] ASoC: amd: Replacing component->name with codec_dai->name

2020-08-07 Thread Mark Brown
On Fri, Aug 07, 2020 at 08:36:25AM +, RAVULAPATI, VISHNU VARDHAN RAO wrote:
> On Thu, Aug 06, 2020 at 03:44:12PM +0530, Ravulapati Vishnu vardhan rao wrote:

> > Replacing string compare with codec_dai->name instead of comparing 
> > with codec_dai->component->name in hw_params.

> >Why?

> Here the component name for codec RT1015 is "i2c-10EC1015:00" and will never 
> be "rt1015-aif1"
> As it is codec-dai->name so the strcmp always compares and fails to set the 
> sysclk,pll,bratio of expected codec-dai

This should be in the changelog so people can understand why the change
was made.

Please fix your mail client to word wrap within paragraphs at something
substantially less than 80 columns.  Doing this makes your messages much
easier to read and reply to.


signature.asc
Description: PGP signature


Re: drivers/video/fbdev/pxafb.c:916:24: sparse: sparse: incorrect type in assignment (different address spaces)

2020-08-07 Thread Luc Van Oostenryck
On Fri, Aug 07, 2020 at 06:37:36PM +0800, kernel test robot wrote:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> master
> head:   86cfccb66937dd6cbf26ed619958b9e587e6a115
> commit: 670d0a4b10704667765f7d18f7592993d02783aa sparse: use identifiers to 
> define address spaces
> date:   7 weeks ago
> config: arm-randconfig-s031-20200807 (attached as .config)
> compiler: arm-linux-gnueabi-gcc (GCC) 9.3.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # apt-get install sparse
> # sparse version: v0.6.2-118-ge1578773-dirty
> git checkout 670d0a4b10704667765f7d18f7592993d02783aa
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
> CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=arm 
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
> 
> 
> sparse warnings: (new ones prefixed by >>)
> 
> >> drivers/video/fbdev/pxafb.c:916:24: sparse: sparse: incorrect type in 
> >> assignment (different address spaces) @@ expected void [noderef] 
> >> __iomem *video_mem @@ got void * @@
> >> drivers/video/fbdev/pxafb.c:916:24: sparse: expected void [noderef] 
> >> __iomem *video_mem

Hi,

since late June I receive several mails per day about this commit but
they are all false-positive.
Commit 670d0a4b10704667765f7d18f7592993d02783aa can't introduce *new*
warnings, it only change how address-spaces are displayed in sparse's
warnings (for example, the address space for __user pointers were
displayd as '', now it's nicely displayed as '__user', same
for '__iomem', '__percpu' and '__rcu').

Isn't it possible to ignore some commits like this one?
Or, even better, should it be possible to only report when a new
warning is effectively added, not when its content is simply modified?
If not it would be nice to be able to see the difference in a diff-like
format.

Best regards,
-- Luc


Re: [PATCH v2 31/41] ARM: s3c24xx: spi: avoid hardcoding fiq number in driver

2020-08-07 Thread Mark Brown
On Thu, Aug 06, 2020 at 08:20:48PM +0200, Krzysztof Kozlowski wrote:
> From: Arnd Bergmann 
> 
> The IRQ_EINT0 constant is a platform detail that is
> defined in mach/irqs.h and not visible to drivers once
> that header is made private.

Acked-by: Mark Brown 


signature.asc
Description: PGP signature


Re: [PATCH v2 28/41] ARM: s3c24xx: move iis pinctrl config into boards

2020-08-07 Thread Mark Brown
On Thu, Aug 06, 2020 at 08:20:45PM +0200, Krzysztof Kozlowski wrote:
> From: Arnd Bergmann 
> 
> The s3c_gpio_cfgall_range() function is an internal interface of the
> samsung gpio driver and should not be called directly by drivers, so
> move the iis pin initialization into the boards.

Well, it is now - that wasn't the case at the time.

Acked-by: Mark Brown 


signature.asc
Description: PGP signature


[PATCH bpf] bpf: doc: remove references to warning message when using bpf_trace_printk()

2020-08-07 Thread Alan Maguire
The BPF helper bpf_trace_printk() no longer uses trace_printk();
it is now triggers a dedicated trace event.  Hence the described
warning is no longer present, so remove the discussion of it as
it may confuse people.

Fixes: ac5a72ea5c89 ("bpf: Use dedicated bpf_trace_printk event instead of 
trace_printk()")
Signed-off-by: Alan Maguire 
---
 Documentation/bpf/bpf_design_QA.rst | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/Documentation/bpf/bpf_design_QA.rst 
b/Documentation/bpf/bpf_design_QA.rst
index 12a246f..2df7b06 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -246,17 +246,6 @@ program is loaded the kernel will print warning message, so
 this helper is only useful for experiments and prototypes.
 Tracing BPF programs are root only.
 
-Q: bpf_trace_printk() helper warning
-
-Q: When bpf_trace_printk() helper is used the kernel prints nasty
-warning message. Why is that?
-
-A: This is done to nudge program authors into better interfaces when
-programs need to pass data to user space. Like bpf_perf_event_output()
-can be used to efficiently stream data via perf ring buffer.
-BPF maps can be used for asynchronous data sharing between kernel
-and user space. bpf_trace_printk() should only be used for debugging.
-
 Q: New functionality via kernel modules?
 
 Q: Can BPF functionality such as new program or map types, new
-- 
1.8.3.1



Re: [PATCH v2 23/41] ARM: s3c24xx: move spi fiq handler into platform

2020-08-07 Thread Mark Brown
On Thu, Aug 06, 2020 at 08:20:40PM +0200, Krzysztof Kozlowski wrote:
> From: Arnd Bergmann 
> 
> The fiq handler needs access to some register definitions that
> should not be used directly by device drivers.

Acked-by: Mark Brown 


signature.asc
Description: PGP signature


Re: linux-next: Signed-off-by missing for commit in the cifs tree

2020-08-07 Thread Dan Carpenter
On Fri, Aug 07, 2020 at 08:33:42AM +1000, Stephen Rothwell wrote:
> Hi Steve,
> 
> Thanks for fixing this up.
> 
> On Thu, 6 Aug 2020 10:31:33 -0500 Steve French  wrote:
> >
> > I just fixed the Author tag in this patch to match your email address
> > but seems like the author email address gets mangled when sent through
> > some mailing lists.  Any ideas how to avoid this.
> 
> You may need to ask people to add an explicit From: line at the start
> of the body for patches sent via the samba.org mailing lists (since
> they mangle addresses to get around DKIM checks, I assume).
> 

I wonder why it affects me in particular and only now...

Steve, could you send me a copy of the patch that you recieved or
something?

regards,
dan carpenter




[PATCH v1 05/13] phy: cadence-torrent: Add support for PHY multilink configuration

2020-08-07 Thread Swapnil Jakhade
Added support for multilink configuration of Torrent PHY. Currently,
maximum two links are supported. In case of multilink configuration,
PHY needs to be configured for both the protocols simultaneously at
the beginning as per the requirement of Torrent PHY.
Also, register sequences for PCIe + SGMII/QSGMII Unique SSC PHY multilink
configurations are added.

Signed-off-by: Swapnil Jakhade 
---
 drivers/phy/cadence/phy-cadence-torrent.c | 783 +-
 1 file changed, 757 insertions(+), 26 deletions(-)

diff --git a/drivers/phy/cadence/phy-cadence-torrent.c 
b/drivers/phy/cadence/phy-cadence-torrent.c
index 7ccdb3105783..c55f8bde52f1 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -29,7 +29,7 @@
 #define DEFAULT_MAX_BIT_RATE   8100 /* in Mbps */
 
 #define NUM_SSC_MODE   3
-#define NUM_PHY_TYPE   2
+#define NUM_PHY_TYPE   5
 
 #define POLL_TIMEOUT_US5000
 #define PLL_LOCK_TIMEOUT   10
@@ -127,8 +127,10 @@
 #define CMN_PLL1_LOCK_REFCNT_START  0x00DCU
 #define CMN_PLL1_LOCK_PLLCNT_START 0x00DEU
 #define CMN_PLL1_LOCK_PLLCNT_THR0x00DFU
+#define CMN_TXPUCAL_TUNE   0x0103U
 #define CMN_TXPUCAL_INIT_TMR   0x0104U
 #define CMN_TXPUCAL_ITER_TMR   0x0105U
+#define CMN_TXPDCAL_TUNE   0x010BU
 #define CMN_TXPDCAL_INIT_TMR   0x010CU
 #define CMN_TXPDCAL_ITER_TMR   0x010DU
 #define CMN_RXCAL_INIT_TMR 0x0114U
@@ -143,6 +145,7 @@
 #define CMN_PDIAG_PLL0_CP_IADJ_M0  0x01A5U
 #define CMN_PDIAG_PLL0_FILT_PADJ_M00x01A6U
 #define CMN_PDIAG_PLL0_CTRL_M1 0x01B0U
+#define CMN_PDIAG_PLL0_CLK_SEL_M1  0x01B1U
 #define CMN_PDIAG_PLL0_CP_PADJ_M1  0x01B4U
 #define CMN_PDIAG_PLL0_CP_IADJ_M1  0x01B5U
 #define CMN_PDIAG_PLL0_FILT_PADJ_M10x01B6U
@@ -151,6 +154,7 @@
 #define CMN_PDIAG_PLL1_CP_PADJ_M0  0x01C4U
 #define CMN_PDIAG_PLL1_CP_IADJ_M0  0x01C5U
 #define CMN_PDIAG_PLL1_FILT_PADJ_M00x01C6U
+#define CMN_DIAG_BIAS_OVRD10x01E1U
 
 /* PMA TX Lane registers */
 #define TX_TXCC_CTRL   0x0040U
@@ -173,11 +177,20 @@
 #define RX_PSC_A2  0x0002U
 #define RX_PSC_A3  0x0003U
 #define RX_PSC_CAL 0x0006U
+#define RX_CDRLF_CNFG  0x0080U
 #define RX_REE_GCSM1_CTRL  0x0108U
+#define RX_REE_GCSM1_EQENM_PH1 0x0109U
+#define RX_REE_GCSM1_EQENM_PH2 0x010AU
 #define RX_REE_GCSM2_CTRL  0x0110U
 #define RX_REE_PERGCSM_CTRL0x0118U
 #define RX_REE_TAP1_CLIP   0x0171U
 #define RX_REE_TAP2TON_CLIP0x0172U
+#define RX_DIAG_DFE_CTRL   0x01E0U
+#define RX_DIAG_DFE_AMP_TUNE_2 0x01E2U
+#define RX_DIAG_DFE_AMP_TUNE_3 0x01E3U
+#define RX_DIAG_NQST_CTRL  0x01E5U
+#define RX_DIAG_PI_RATE0x01F4U
+#define RX_DIAG_PI_CAP 0x01F5U
 #define RX_DIAG_ACYA   0x01FFU
 
 /* PHY PCS common registers */
@@ -204,8 +217,11 @@ static const struct reg_field phy_reset_ctrl =
REG_FIELD(PHY_RESET, 8, 8);
 
 enum cdns_torrent_phy_type {
+   TYPE_NONE,
TYPE_DP,
-   TYPE_PCIE
+   TYPE_PCIE,
+   TYPE_SGMII,
+   TYPE_QSGMII,
 };
 
 enum cdns_torrent_ssc_mode {
@@ -309,9 +325,16 @@ struct cdns_torrent_vals {
 struct cdns_torrent_data {
u8 block_offset_shift;
u8 reg_offset_shift;
-   struct cdns_torrent_vals *cmn_vals[NUM_PHY_TYPE][NUM_SSC_MODE];
-   struct cdns_torrent_vals *tx_ln_vals[NUM_PHY_TYPE][NUM_SSC_MODE];
-   struct cdns_torrent_vals *rx_ln_vals[NUM_PHY_TYPE][NUM_SSC_MODE];
+   struct cdns_torrent_vals *link_cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+  [NUM_SSC_MODE];
+   struct cdns_torrent_vals *xcvr_diag_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+   [NUM_SSC_MODE];
+   struct cdns_torrent_vals *cmn_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+ [NUM_SSC_MODE];
+   struct cdns_torrent_vals *tx_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+   [NUM_SSC_MODE];
+   struct cdns_torrent_vals *rx_ln_vals[NUM_PHY_TYPE][NUM_PHY_TYPE]
+   [NUM_SSC_MODE];
 };
 
 struct cdns_regmap_cdb_context {
@@ -1787,6 +1810,7 @@ static int cdns_torrent_regmap_init(struct 
cdns_torrent_phy *cdns_phy)
 static int cdns_torrent_phy_init(struct phy *phy)
 {
struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
+   const struct cdns_torrent_data *init_data = cdns_phy->init_data;
struct cdns_torrent_vals *cmn_vals, *tx_ln_vals, *rx_ln_vals;
struct cdns_torrent_inst *inst = phy_get_drvdata(phy);
enum cdns_torrent_phy_type phy_type = inst->phy_type;
@@ -1809,11 +1833,14 @@ 

[PATCH v1 07/13] phy: cadence-torrent: Add clk changes for multilink configuration

2020-08-07 Thread Swapnil Jakhade
Prepare and enable clock in probe instead of phy_init.
Also, remove phy_exit callback.

Signed-off-by: Swapnil Jakhade 
---
 drivers/phy/cadence/phy-cadence-torrent.c | 41 ++-
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/cadence/phy-cadence-torrent.c 
b/drivers/phy/cadence/phy-cadence-torrent.c
index fc5720845ec7..691d4aa5b2ed 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -273,7 +273,6 @@ enum phy_powerstate {
 };
 
 static int cdns_torrent_phy_init(struct phy *phy);
-static int cdns_torrent_phy_exit(struct phy *phy);
 static int cdns_torrent_dp_init(struct phy *phy);
 static int cdns_torrent_dp_run(struct cdns_torrent_phy *cdns_phy,
   u32 num_lanes);
@@ -305,7 +304,6 @@ static int cdns_torrent_phy_off(struct phy *phy);
 
 static const struct phy_ops cdns_torrent_phy_ops = {
.init   = cdns_torrent_phy_init,
-   .exit   = cdns_torrent_phy_exit,
.configure  = cdns_torrent_dp_configure,
.power_on   = cdns_torrent_phy_on,
.power_off  = cdns_torrent_phy_off,
@@ -977,14 +975,6 @@ static int cdns_torrent_dp_init(struct phy *phy)
return ret;
 }
 
-static int cdns_torrent_phy_exit(struct phy *phy)
-{
-   struct cdns_torrent_phy *cdns_phy = dev_get_drvdata(phy->dev.parent);
-
-   clk_disable_unprepare(cdns_phy->clk);
-   return 0;
-}
-
 static
 int cdns_torrent_dp_wait_pma_cmn_ready(struct cdns_torrent_phy *cdns_phy)
 {
@@ -1825,20 +1815,7 @@ static int cdns_torrent_phy_init(struct phy *phy)
struct cdns_reg_pairs *reg_pairs;
struct regmap *regmap;
u32 num_regs;
-   int ret, i, j;
-
-   ret = clk_prepare_enable(cdns_phy->clk);
-   if (ret) {
-   dev_err(cdns_phy->dev, "Failed to prepare ref clock\n");
-   return ret;
-   }
-
-   cdns_phy->ref_clk_rate = clk_get_rate(cdns_phy->clk);
-   if (!(cdns_phy->ref_clk_rate)) {
-   dev_err(cdns_phy->dev, "Failed to get ref clock rate\n");
-   clk_disable_unprepare(cdns_phy->clk);
-   return -EINVAL;
-   }
+   int i, j;
 
if (cdns_phy->nsubnodes > 1)
return 0;
@@ -2072,6 +2049,19 @@ static int cdns_torrent_phy_probe(struct platform_device 
*pdev)
if (ret)
return ret;
 
+   ret = clk_prepare_enable(cdns_phy->clk);
+   if (ret) {
+   dev_err(cdns_phy->dev, "Failed to prepare ref clock\n");
+   return ret;
+   }
+
+   cdns_phy->ref_clk_rate = clk_get_rate(cdns_phy->clk);
+   if (!(cdns_phy->ref_clk_rate)) {
+   dev_err(cdns_phy->dev, "Failed to get ref clock rate\n");
+   clk_disable_unprepare(cdns_phy->clk);
+   return -EINVAL;
+   }
+
/* Enable APB */
reset_control_deassert(cdns_phy->apb_rst);
 
@@ -2246,6 +2236,7 @@ static int cdns_torrent_phy_probe(struct platform_device 
*pdev)
reset_control_put(cdns_phy->phys[i].lnk_rst);
of_node_put(child);
reset_control_assert(cdns_phy->apb_rst);
+   clk_disable_unprepare(cdns_phy->clk);
return ret;
 }
 
@@ -2261,6 +2252,8 @@ static int cdns_torrent_phy_remove(struct platform_device 
*pdev)
reset_control_put(cdns_phy->phys[i].lnk_rst);
}
 
+   clk_disable_unprepare(cdns_phy->clk);
+
return 0;
 }
 
-- 
2.26.1



[PATCH v1 09/13] phy: cadence-torrent: Configure PHY_PLL_CFG as part of link_cmn_vals

2020-08-07 Thread Swapnil Jakhade
Include PHY_PLL_CFG as a first register value to configure in
link_cmn_vals array values.

Signed-off-by: Swapnil Jakhade 
---
 drivers/phy/cadence/phy-cadence-torrent.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/cadence/phy-cadence-torrent.c 
b/drivers/phy/cadence/phy-cadence-torrent.c
index d01a44b93e99..216b25512a08 100644
--- a/drivers/phy/cadence/phy-cadence-torrent.c
+++ b/drivers/phy/cadence/phy-cadence-torrent.c
@@ -1830,7 +1830,14 @@ static int cdns_torrent_phy_init(struct phy *phy)
reg_pairs = link_cmn_vals->reg_pairs;
num_regs = link_cmn_vals->num_regs;
regmap = cdns_phy->regmap_common_cdb;
-   for (i = 0; i < num_regs; i++)
+
+   /**
+* First array value in link_cmn_vals must be of
+* PHY_PLL_CFG register
+*/
+   regmap_field_write(cdns_phy->phy_pll_cfg, reg_pairs[0].val);
+
+   for (i = 1; i < num_regs; i++)
regmap_write(regmap, reg_pairs[i].off,
 reg_pairs[i].val);
}
@@ -1907,8 +1914,6 @@ int cdns_torrent_phy_configure_multilink(struct 
cdns_torrent_phy *cdns_phy)
phy_t1 = cdns_phy->phys[0].phy_type;
phy_t2 = cdns_phy->phys[1].phy_type;
 
-   regmap_field_write(cdns_phy->phy_pll_cfg, 0x0003);
-
/**
 * First configure the PHY for first link with phy_t1. Get the array
 * values as [phy_t1][phy_t2][ssc].
@@ -1944,7 +1949,15 @@ int cdns_torrent_phy_configure_multilink(struct 
cdns_torrent_phy *cdns_phy)
reg_pairs = link_cmn_vals->reg_pairs;
num_regs = link_cmn_vals->num_regs;
regmap = cdns_phy->regmap_common_cdb;
-   for (i = 0; i < num_regs; i++)
+
+   /**
+* First array value in link_cmn_vals must be of
+* PHY_PLL_CFG register
+*/
+   regmap_field_write(cdns_phy->phy_pll_cfg,
+  reg_pairs[0].val);
+
+   for (i = 1; i < num_regs; i++)
regmap_write(regmap, reg_pairs[i].off,
 reg_pairs[i].val);
}
@@ -2283,6 +2296,7 @@ static int cdns_torrent_phy_remove(struct platform_device 
*pdev)
 
 /* PCIe and SGMII/QSGMII Unique SSC link configuration */
 static struct cdns_reg_pairs pcie_sgmii_link_cmn_regs[] = {
+   {0x0003, PHY_PLL_CFG},
{0x0601, CMN_PDIAG_PLL0_CLK_SEL_M0},
{0x0400, CMN_PDIAG_PLL0_CLK_SEL_M1},
{0x0601, CMN_PDIAG_PLL1_CLK_SEL_M0}
-- 
2.26.1



<    1   2   3   4   5   6   7   8   9   10   >