date:20210315

Re: [PATCH] smp: kernel/panic.c - silence warnings

2021-03-15 Thread Christophe Leroy





Le 16/03/2021 à 07:35, heying (H) a écrit :

Thanks for your reply.


在 2021/3/16 13:15, Randy Dunlap 写道:

On 3/15/21 9:08 PM, He Ying wrote:

We found these warnings in kernel/panic.c by using sparse tool:
warning: symbol 'panic_smp_self_stop' was not declared.
warning: symbol 'nmi_panic_self_stop' was not declared.
warning: symbol 'crash_smp_send_stop' was not declared.

To avoid them, add declarations for these three functions in
include/linux/smp.h.

Reported-by: Hulk Robot 
Signed-off-by: He Ying 
---
  include/linux/smp.h | 8 
  1 file changed, 8 insertions(+)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 70c6f6284dcf..861a253cc179 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,6 +50,14 @@ extern unsigned int total_cpus;
  int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
   int wait);
+/*
+ * Cpus stopping functions in panic. All have default weak definations.

   definitions.

Sorry for my typo.



+ * Architecure dependent code may override them.

   Architecture-dependent

Is that necessary?


Sure you are missing a 't'

Architecure ==> Architecture





+ */
+void panic_smp_self_stop(void);
+void nmi_panic_self_stop(struct pt_regs *regs);
+void crash_smp_send_stop(void);
+
  /*
   * Call a function on all processors
   */

[PATCH v7 4/6] selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available

2021-03-15 Thread Chang S. Bae

The SIGSTKSZ constant may not represent enough stack size in some
architectures as the hardware state size grows.

Use getauxval(AT_MINSIGSTKSZ) to increase the stack size.

Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Cc: linux-kselft...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v5:
* Added as a new patch.
---
 tools/testing/selftests/sigaltstack/sas.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/sigaltstack/sas.c 
b/tools/testing/selftests/sigaltstack/sas.c
index 8934a3766d20..c53b070755b6 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "../kselftest.h"
 
@@ -24,6 +25,11 @@
 #define SS_AUTODISARM  (1U << 31)
 #endif
 
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51
+#endif
+
+static unsigned int stack_size;
 static void *sstack, *ustack;
 static ucontext_t uc, sc;
 static const char *msg = "[OK]\tStack preserved";
@@ -47,7 +53,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 #endif
 
if (sp < (unsigned long)sstack ||
-   sp >= (unsigned long)sstack + SIGSTKSZ) {
+   sp >= (unsigned long)sstack + stack_size) {
ksft_exit_fail_msg("SP is not on sigaltstack\n");
}
/* put some data on stack. other sighandler will try to overwrite it */
@@ -108,6 +114,10 @@ int main(void)
stack_t stk;
int err;
 
+   /* Make sure more than the required minimum. */
+   stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+   ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
+
ksft_print_header();
ksft_set_plan(3);
 
@@ -117,7 +127,7 @@ int main(void)
sigaction(SIGUSR1, &act, NULL);
act.sa_sigaction = my_usr2;
sigaction(SIGUSR2, &act, NULL);
-   sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+   sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
  MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (sstack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -139,7 +149,7 @@ int main(void)
}
 
stk.ss_sp = sstack;
-   stk.ss_size = SIGSTKSZ;
+   stk.ss_size = stack_size;
stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
err = sigaltstack(&stk, NULL);
if (err) {
@@ -161,7 +171,7 @@ int main(void)
}
}
 
-   ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+   ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
  MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (ustack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -170,7 +180,7 @@ int main(void)
getcontext(&uc);
uc.uc_link = NULL;
uc.uc_stack.ss_sp = ustack;
-   uc.uc_stack.ss_size = SIGSTKSZ;
+   uc.uc_stack.ss_size = stack_size;
makecontext(&uc, switch_fn, 0);
raise(SIGUSR1);
 
-- 
2.17.1

[PATCH v7 6/6] selftest/x86/signal: Include test cases for validating sigaltstack

2021-03-15 Thread Chang S. Bae

The test measures the kernel's signal delivery with different (enough vs.
insufficient) stack sizes.

Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Cc: x...@kernel.org
Cc: linux-kselft...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v3:
* Revised test messages again (Borislav Petkov)

Changes from v2:
* Revised test messages (Borislav Petkov)
---
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/sigaltstack.c | 128 ++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/sigaltstack.c

diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index 333980375bc7..65bba2ae86ee 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) 
trivial_program.c -no-pie)
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
-   syscall_arg_fault fsgsbase_restore
+   syscall_arg_fault fsgsbase_restore sigaltstack
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/sigaltstack.c 
b/tools/testing/selftests/x86/sigaltstack.c
new file mode 100644
index ..f689af75e979
--- /dev/null
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* sigaltstack()-enforced minimum stack */
+#define ENFORCED_MINSIGSTKSZ   2048
+
+#ifndef AT_MINSIGSTKSZ
+#  define AT_MINSIGSTKSZ   51
+#endif
+
+static int nerrs;
+
+static bool sigalrm_expected;
+
+static unsigned long at_minstack_size;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+  int flags)
+{
+   struct sigaction sa;
+
+   memset(&sa, 0, sizeof(sa));
+   sa.sa_sigaction = handler;
+   sa.sa_flags = SA_SIGINFO | flags;
+   sigemptyset(&sa.sa_mask);
+   if (sigaction(sig, &sa, 0))
+   err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+   struct sigaction sa;
+
+   memset(&sa, 0, sizeof(sa));
+   sa.sa_handler = SIG_DFL;
+   sigemptyset(&sa.sa_mask);
+   if (sigaction(sig, &sa, 0))
+   err(1, "sigaction");
+}
+
+static int setup_altstack(void *start, unsigned long size)
+{
+   stack_t ss;
+
+   memset(&ss, 0, sizeof(ss));
+   ss.ss_size = size;
+   ss.ss_sp = start;
+
+   return sigaltstack(&ss, NULL);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+   if (sigalrm_expected) {
+   printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected 
SIGALRM).");
+   nerrs++;
+   } else {
+   printf("[OK]\tSIGSEGV signal delivered.\n");
+   }
+
+   siglongjmp(jmpbuf, 1);
+}
+
+static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
+{
+   if (!sigalrm_expected) {
+   printf("[FAIL]\tWrong signal delivered: SIGALRM (expected 
SIGSEGV).");
+   nerrs++;
+   } else {
+   printf("[OK]\tSIGALRM signal delivered.\n");
+   }
+}
+
+static void test_sigaltstack(void *altstack, unsigned long size)
+{
+   if (setup_altstack(altstack, size))
+   err(1, "sigaltstack()");
+
+   sigalrm_expected = (size > at_minstack_size) ? true : false;
+
+   sethandler(SIGSEGV, sigsegv, 0);
+   sethandler(SIGALRM, sigalrm, SA_ONSTACK);
+
+   if (!sigsetjmp(jmpbuf, 1)) {
+   printf("[RUN]\tTest an alternate signal stack of %ssufficient 
size.\n",
+  sigalrm_expected ? "" : "in");
+   printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
+  sigalrm_expected ? "It" : "SIGSEGV");
+   raise(SIGALRM);
+   }
+
+   clearhandler(SIGALRM);
+   clearhandler(SIGSEGV);
+}
+
+int main(void)
+{
+   void *altstack;
+
+   at_minstack_size = getauxval(AT_MINSIGSTKSZ);
+
+   altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | 
PROT_WRITE,
+   MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+   if (altstack == MAP_FAILED)
+   err(1, "mmap()");
+
+   if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
+   test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
+
+   test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
+
+   return nerrs == 0 ? 0 : 1;
+}
-- 
2.17.1

Re: [PATCH] powerpc: arch/powerpc/kernel/setup_64.c - cleanup warnings

2021-03-15 Thread Christophe Leroy





Le 16/03/2021 à 05:11, He Ying a écrit :

warning: symbol 'rfi_flush' was not declared.
warning: symbol 'entry_flush' was not declared.
warning: symbol 'uaccess_flush' was not declared.
We found warnings above in arch/powerpc/kernel/setup_64.c by using
sparse tool.

Define 'entry_flush' and 'uaccess_flush' as static because they are not
referenced outside the file. Include asm/security_features.h in which
'rfi_flush' is declared.

Reported-by: Hulk Robot 
Signed-off-by: He Ying 


Reviewed-by: Christophe Leroy 


---
  arch/powerpc/kernel/setup_64.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 560ed8b975e7..f92d72a7e7ce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,6 +68,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "setup.h"
  
@@ -949,8 +950,8 @@ static bool no_rfi_flush;

  static bool no_entry_flush;
  static bool no_uaccess_flush;
  bool rfi_flush;
-bool entry_flush;
-bool uaccess_flush;
+static bool entry_flush;
+static bool uaccess_flush;
  DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
  EXPORT_SYMBOL(uaccess_flush_key);

[PATCH v7 3/6] x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ

2021-03-15 Thread Chang S. Bae

Historically, signal.h defines MINSIGSTKSZ (2KB) and SIGSTKSZ (8KB), for
use by all architectures with sigaltstack(2). Over time, the hardware state
size grew, but these constants did not evolve. Today, literal use of these
constants on several architectures may result in signal stack overflow, and
thus user data corruption.

A few years ago, the ARM team addressed this issue by establishing
getauxval(AT_MINSIGSTKSZ). This enables the kernel to supply at runtime
value that is an appropriate replacement on the current and future
hardware.

Add getauxval(AT_MINSIGSTKSZ) support to x86, analogous to the support
added for ARM in commit 94b07c1f8c39 ("arm64: signal: Report signal frame
size to userspace via auxv").

Also, include a documentation to describe x86-specific auxiliary vectors.

Reported-by: Florian Weimer 
Fixes: c2bc11f10a39 ("x86, AVX-512: Enable AVX-512 States Context Switch")
Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Cc: H.J. Lu 
Cc: Fenghua Yu 
Cc: Dave Martin 
Cc: Michael Ellerman 
Cc: x...@kernel.org
Cc: libc-al...@sourceware.org
Cc: linux-a...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=153531
---
Changes from v6:
* Revised the documentation and fixed the build issue. (Borislav Petkov)
* Fixed the vertical alignment of '\'. (Borislav Petkov)

Changes from v5:
* Added a documentation.
---
 Documentation/x86/elf_auxvec.rst   | 53 ++
 Documentation/x86/index.rst|  1 +
 arch/x86/include/asm/elf.h |  4 +++
 arch/x86/include/uapi/asm/auxvec.h |  4 +--
 arch/x86/kernel/signal.c   |  5 +++
 5 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/x86/elf_auxvec.rst

diff --git a/Documentation/x86/elf_auxvec.rst b/Documentation/x86/elf_auxvec.rst
new file mode 100644
index ..6c75b26f5efb
--- /dev/null
+++ b/Documentation/x86/elf_auxvec.rst
@@ -0,0 +1,53 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+x86-specific ELF Auxiliary Vectors
+==
+
+This document describes the semantics of the x86 auxiliary vectors.
+
+Introduction
+
+
+ELF Auxiliary vectors enable the kernel to efficiently provide
+configuration specific parameters to userspace. In this example, a program
+allocates an alternate stack based on the kernel-provided size::
+
+   #include 
+   #include 
+   #include 
+   #include 
+   #include 
+   #include 
+
+   #ifndef AT_MINSIGSTKSZ
+   #define AT_MINSIGSTKSZ  51
+   #endif
+
+   
+   stack_t ss;
+
+   ss.ss_sp = malloc(ss.ss_size);
+   assert(ss.ss_sp);
+
+   ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+   ss.ss_flags = 0;
+
+   if (sigaltstack(&ss, NULL))
+err(1, "sigaltstack");
+
+
+The exposed auxiliary vectors
+=
+
+AT_SYSINFO is used for locating the vsyscall entry point.  It is not
+exported on 64-bit mode.
+
+AT_SYSINFO_EHDR is the start address of the page containing the vDSO.
+
+AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to
+deliver a signal to user-space.  AT_MINSIGSTKSZ comprehends the space
+consumed by the kernel to accommodate the user context for the current
+hardware configuration.  It does not comprehend subsequent user-space stack
+consumption, which must be added by the user.  (e.g. Above, user-space adds
+SIGSTKSZ to AT_MINSIGSTKSZ.)
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 4693e192b447..d58614d5cde6 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -35,3 +35,4 @@ x86-specific Documentation
sva
sgx
features
+   elf_auxvec
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9224d40cdefe..18d9b1117871 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -312,6 +312,7 @@ do {
\
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY);\
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\
}   \
+   NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size());   \
 } while (0)
 
 /*
@@ -328,6 +329,7 @@ extern unsigned long task_size_32bit(void);
 extern unsigned long task_size_64bit(int full_addr_space);
 extern unsigned long get_mmap_base(int is_legacy);
 extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
+extern unsigned long get_sigframe_size(void);
 
 #ifdef CONFIG_X86_32
 
@@ -349,6 +351,7 @@ do {
\
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR,\
(unsigned long __force)current->mm->context.v

Re: [PATCH v1 3/5] dt-bindings: arm: Add cpu-idle-states to Tegra194 CPU nodes

2021-03-15 Thread Sudeep Holla

On Fri, Mar 12, 2021 at 02:27:30PM -0800, Sowjanya Komatineni wrote:
> Hi Sudeep,
>
> To make our driver PSCI compliant below are few updates to be done
>
> 1. Standardize passing residency time run-time thru PSCI to ATF.
>

Yes that was my initial understanding, but your previous response was
confusing. I should have read this first.

>     From PSCI specification I only see PSCI_STAT_RESIDENCY and
> PSCI_STAT_COUNT optional functions for PSCI1.0/PSCI1.1
>

Indeed, we don't have any support to pass run-time residency hints in PSCI
today.

>    Can you please help add right people to explore on possible ways to add
> PSCI function for passing corresponding state residency time to ATF?
>

Before we jump to implementation in TF-A we need to get agreement to get this
added to the specification to support in OSPM/Linux. TF-A is just one
implementation of PSCI and may not be only one.

Formally you can raise any specification related queries to
https://developer.arm.com/support or supp...@arm.com. I will ask the author
of PSCI specification internally to take a look at this thread and chime in
if they can.

> 2. Tegra CPU Idle support definitely need to pass deepest cluster state and
> threshold to MCE firmware from DT and looks like we can move this
> implementation to ATF
>

Yes, I just asked the same question as response to your earlier email. Thanks
for confirming that it can be moved out of OSPM/Linux and DT

>  With both of the above implementation changes Tegra194 driver will be
> PSCI compliant.
>

We still need to get agreement on the specification front 😉.

--
Regards,
Sudeep

[PATCH v7 5/6] x86/signal: Detect and prevent an alternate signal stack overflow

2021-03-15 Thread Chang S. Bae

The kernel pushes context on to the userspace stack to prepare for the
user's signal handler. When the user has supplied an alternate signal
stack, via sigaltstack(2), it is easy for the kernel to verify that the
stack size is sufficient for the current hardware context.

Check if writing the hardware context to the alternate stack will exceed
it's size. If yes, then instead of corrupting user-data and proceeding with
the original signal handler, an immediate SIGSEGV signal is delivered.

Instead of calling on_sig_stack(), directly check the new stack pointer
whether in the bounds.

While the kernel allows new source code to discover and use a sufficient
alternate signal stack size, this check is still necessary to protect
binaries with insufficient alternate signal stack size from data
corruption.

Suggested-by: Jann Horn 
Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Reviewed-by: Jann Horn 
Cc: Andy Lutomirski 
Cc: Jann Horn 
Cc: x...@kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v5:
* Fixed the overflow check. (Andy Lutomirski)
* Updated the changelog.

Changes from v3:
* Updated the changelog (Borislav Petkov)

Changes from v2:
* Simplified the implementation (Jann Horn)
---
 arch/x86/kernel/signal.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 0d24f64d0145..9a62604fbf63 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -242,7 +242,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 
size_t frame_size,
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
-   int onsigstack = on_sig_stack(sp);
+   bool onsigstack = on_sig_stack(sp);
int ret;
 
/* redzone */
@@ -251,8 +251,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 
size_t frame_size,
 
/* This is the X/Open sanctioned signal stack switching.  */
if (ka->sa.sa_flags & SA_ONSTACK) {
-   if (sas_ss_flags(sp) == 0)
+   if (sas_ss_flags(sp) == 0) {
sp = current->sas_ss_sp + current->sas_ss_size;
+   /* On the alternate signal stack */
+   onsigstack = true;
+   }
} else if (IS_ENABLED(CONFIG_X86_32) &&
   !onsigstack &&
   regs->ss != __USER_DS &&
@@ -272,7 +275,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 
size_t frame_size,
 * If we are on the alternate signal stack and would overflow it, don't.
 * Return an always-bogus address instead so we will die with SIGSEGV.
 */
-   if (onsigstack && !likely(on_sig_stack(sp)))
+   if (onsigstack && unlikely(sp <= current->sas_ss_sp ||
+  sp - current->sas_ss_sp > 
current->sas_ss_size))
return (void __user *)-1L;
 
/* save i387 and extended state */
-- 
2.17.1

[PATCH v7 2/6] x86/signal: Introduce helpers to get the maximum signal frame size

2021-03-15 Thread Chang S. Bae

Signal frames do not have a fixed format and can vary in size when a number
of things change: support XSAVE features, 32 vs. 64-bit apps. Add the code
to support a runtime method for userspace to dynamically discover how large
a signal stack needs to be.

Introduce a new variable, max_frame_size, and helper functions for the
calculation to be used in a new user interface. Set max_frame_size to a
system-wide worst-case value, instead of storing multiple app-specific
values.

Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Acked-by: H.J. Lu 
Cc: x...@kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from v2:
* Renamed the fpstate size helper with cleanup (Borislav Petkov)
* Moved the sigframe struct size defines to where used (Borislav Petkov)
* Removed unneeded sentence in the changelog (Borislav Petkov)

Change from v1:
* Took stack alignment into account for sigframe size (Dave Martin)
---
 arch/x86/include/asm/fpu/signal.h |  2 ++
 arch/x86/include/asm/sigframe.h   |  2 ++
 arch/x86/kernel/cpu/common.c  |  3 ++
 arch/x86/kernel/fpu/signal.c  | 19 +++
 arch/x86/kernel/signal.c  | 57 +--
 5 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/fpu/signal.h 
b/arch/x86/include/asm/fpu/signal.h
index 7fb516b6893a..8b6631dffefd 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -29,6 +29,8 @@ unsigned long
 fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
 unsigned long *buf_fx, unsigned long *size);
 
+unsigned long fpu__get_fpstate_size(void);
+
 extern void fpu__init_prepare_fx_sw_frame(void);
 
 #endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h
index 84eab2724875..5b1ed650b124 100644
--- a/arch/x86/include/asm/sigframe.h
+++ b/arch/x86/include/asm/sigframe.h
@@ -85,4 +85,6 @@ struct rt_sigframe_x32 {
 
 #endif /* CONFIG_X86_64 */
 
+void __init init_sigframe_size(void);
+
 #endif /* _ASM_X86_SIGFRAME_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ab640abe26b6..c49ef3ad34dc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -58,6 +58,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "cpu.h"
 
@@ -1334,6 +1335,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 
*c)
 
fpu__init_system(c);
 
+   init_sigframe_size();
+
 #ifdef CONFIG_X86_32
/*
 * Regardless of whether PCID is enumerated, the SDM says
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a4ec65317a7f..dbb304e48f16 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -507,6 +507,25 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
 
return sp;
 }
+
+unsigned long fpu__get_fpstate_size(void)
+{
+   unsigned long ret = xstate_sigframe_size();
+
+   /*
+* This space is needed on (most) 32-bit kernels, or when a 32-bit
+* app is running on a 64-bit kernel. To keep things simple, just
+* assume the worst case and always include space for 'freg_state',
+* even for 64-bit apps on 64-bit kernels. This wastes a bit of
+* space, but keeps the code simple.
+*/
+   if ((IS_ENABLED(CONFIG_IA32_EMULATION) ||
+IS_ENABLED(CONFIG_X86_32)) && use_fxsr())
+   ret += sizeof(struct fregs_state);
+
+   return ret;
+}
+
 /*
  * Prepare the SW reserved portion of the fxsave memory layout, indicating
  * the presence of the extended state information in the memory layout
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index ea794a083c44..800243afd1ef 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -212,6 +212,11 @@ do {   
\
  * Set up a signal frame.
  */
 
+/* x86 ABI requires 16-byte alignment */
+#define FRAME_ALIGNMENT16UL
+
+#define MAX_FRAME_PADDING  (FRAME_ALIGNMENT - 1)
+
 /*
  * Determine which stack to use..
  */
@@ -222,9 +227,9 @@ static unsigned long align_sigframe(unsigned long sp)
 * Align the stack pointer according to the i386 ABI,
 * i.e. so that on function entry ((sp + 4) & 15) == 0.
 */
-   sp = ((sp + 4) & -16ul) - 4;
+   sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
 #else /* !CONFIG_X86_32 */
-   sp = round_down(sp, 16) - 8;
+   sp = round_down(sp, FRAME_ALIGNMENT) - 8;
 #endif
return sp;
 }
@@ -663,6 +668,54 @@ SYSCALL_DEFINE0(rt_sigreturn)
return 0;
 }
 
+/*
+ * There are four different struct types for signal frame: sigframe_ia32,
+ * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
+ * -- the largest size. It means the size for 64-bit apps is a bit more
+ * than needed, but this keeps the code simple.
+ */
+#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)

Re: [PATCH] Insert SFENCE.VMA in function set_pte_at for RISCV

2021-03-15 Thread Jiuyang Liu

> As per my understanding, we don't need to explicitly invalidate local TLB
> in set_pte() or set_pet_at() because generic Linux page table management
> (/mm/*) will call the appropriate flush_tlb_xyz() function after page
> table updates.

I witnessed this bug in our micro-architecture: set_pte instruction is
still in the store buffer, no functions are inserting SFENCE.VMA in
the stack below, so TLB cannot witness this modification.
Here is my call stack:
set_pte
set_pte_at
map_vm_area
__vmalloc_area_node
__vmalloc_node_range
__vmalloc_node
__vmalloc_node_flags
vzalloc
n_tty_open

I think this is an architecture specific code, so /mm/* should
not be modified.
And spec requires SFENCE.VMA to be inserted on each modification to
TLB. So I added code here.

> Also, just local TLB flush is generally not sufficient because
> a lot of page tables will be used across on multiple HARTs.

Yes, this is the biggest issue, in RISC-V Volume 2, Privileged Spec v.
20190608 page 67 gave a solution:
Consequently, other harts must be notified separately when the
memory-management data structures have been modified. One approach is
to use
1) a local data fence to ensure local writes are visible globally,
then 2) an interprocessor interrupt to the other thread,
then 3) a local SFENCE.VMA in the interrupt handler of the remote thread,
and finally 4) signal back to originating thread that operation is
complete. This is, of course, the RISC-V analog to a TLB shootdown.

In general, this patch didn't handle the G bit in PTE, kernel trap it
to sbi_remote_sfence_vma. do you think I should use flush_tlb_all?

Jiuyang




arch/arm/mm/mmu.c
void set_pte_at(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep, pte_t pteval)
{
unsigned long ext = 0;

if (addr < TASK_SIZE && pte_valid_user(pteval)) {
if (!pte_special(pteval))
__sync_icache_dcache(pteval);
ext |= PTE_EXT_NG;
}

set_pte_ext(ptep, pteval, ext);
}

arch/mips/include/asm/pgtable.h
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
  pte_t *ptep, pte_t pteval)
{

if (!pte_present(pteval))
goto cache_sync_done;

if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval)))
goto cache_sync_done;

__update_cache(addr, pteval);
cache_sync_done:
set_pte(ptep, pteval);
}


Also, just local TLB flush is generally not sufficient because
> a lot of page tables will be used accross on multiple HARTs.


On Tue, Mar 16, 2021 at 5:05 AM Anup Patel  wrote:
>
> +Alex
>
> On Tue, Mar 16, 2021 at 9:20 AM Jiuyang Liu  wrote:
> >
> > This patch inserts SFENCE.VMA after modifying PTE based on RISC-V
> > specification.
> >
> > arch/riscv/include/asm/pgtable.h:
> > 1. implement pte_user, pte_global and pte_leaf to check correspond
> > attribute of a pte_t.
>
> Adding pte_user(), pte_global(), and pte_leaf() is fine.
>
> >
> > 2. insert SFENCE.VMA in set_pte_at based on RISC-V Volume 2, Privileged
> > Spec v. 20190608 page 66 and 67:
> > If software modifies a non-leaf PTE, it should execute SFENCE.VMA with
> > rs1=x0. If any PTE along the traversal path had its G bit set, rs2 must
> > be x0; otherwise, rs2 should be set to the ASID for which the
> > translation is being modified.
> > If software modifies a leaf PTE, it should execute SFENCE.VMA with rs1
> > set to a virtual address within the page. If any PTE along the traversal
> > path had its G bit set, rs2 must be x0; otherwise, rs2 should be set to
> > the ASID for which the translation is being modified.
> >
> > arch/riscv/include/asm/tlbflush.h:
> > 1. implement get_current_asid to get current program asid.
> > 2. implement local_flush_tlb_asid to flush tlb with asid.
>
> As per my understanding, we don't need to explicitly invalidate local TLB
> in set_pte() or set_pet_at() because generic Linux page table management
> (/mm/*) will call the appropriate flush_tlb_xyz() function after page
> table updates. Also, just local TLB flush is generally not sufficient because
> a lot of page tables will be used accross on multiple HARTs.
>
> >
> > Signed-off-by: Jiuyang Liu 
> > ---
> >  arch/riscv/include/asm/pgtable.h  | 27 +++
> >  arch/riscv/include/asm/tlbflush.h | 12 
> >  2 files changed, 39 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/pgtable.h 
> > b/arch/riscv/include/asm/pgtable.h
> > index ebf817c1bdf4..5a47c60372c1 100644
> > --- a/arch/riscv/include/asm/pgtable.h
> > +++ b/arch/riscv/include/asm/pgtable.h
> > @@ -222,6 +222,16 @@ static inline int pte_write(pte_t pte)
> > return pte_val(pte) & _PAGE_WRITE;
> >  }
> >
> > +static inline int pte_user(pte_t pte)
> > +{
> > +   return pte_val(pte) & _PAGE_USER;
> > +}
> > +
> > +static inline int pte_global(pte_t pte)
> > +{
> > +   return pte_val(pte) & _PAGE_GLOBAL;
> > +}
> > +
> >  static inlin

[PATCH v7 1/6] uapi: Define the aux vector AT_MINSIGSTKSZ

2021-03-15 Thread Chang S. Bae

Define the AT_MINSIGSTKSZ in generic Linux. It is already used as generic
ABI in glibc's generic elf.h, and this define will prevent future namespace
conflicts. In particular, x86 is also using this generic definition.

Signed-off-by: Chang S. Bae 
Reviewed-by: Len Brown 
Cc: Carlos O'Donell 
Cc: Dave Martin 
Cc: libc-al...@sourceware.org
Cc: linux-a...@vger.kernel.org
Cc: linux-...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
---
Change from v6:
* Revised the comment. (Borislav Petkov)

Change from v5:
* Reverted the arm64 change. (Dave Martin and Will Deacon)
* Massaged the changelog.

Change from v4:
* Added as a new patch (Carlos O'Donell)
---
 include/uapi/linux/auxvec.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h
index abe5f2b6581b..c7e502bf5a6f 100644
--- a/include/uapi/linux/auxvec.h
+++ b/include/uapi/linux/auxvec.h
@@ -33,5 +33,8 @@
 
 #define AT_EXECFN  31  /* filename of program */
 
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51  /* minimal stack size for signal delivery */
+#endif
 
 #endif /* _UAPI_LINUX_AUXVEC_H */
-- 
2.17.1

[PATCH v7 0/6] x86: Improve Minimum Alternate Stack Size

2021-03-15 Thread Chang S. Bae

During signal entry, the kernel pushes data onto the normal userspace
stack. On x86, the data pushed onto the user stack includes XSAVE state,
which has grown over time as new features and larger registers have been
added to the architecture.

MINSIGSTKSZ is a constant provided in the kernel signal.h headers and
typically distributed in lib-dev(el) packages, e.g. [1]. Its value is
compiled into programs and is part of the user/kernel ABI. The MINSIGSTKSZ
constant indicates to userspace how much data the kernel expects to push on
the user stack, [2][3].

However, this constant is much too small and does not reflect recent
additions to the architecture. For instance, when AVX-512 states are in
use, the signal frame size can be 3.5KB while MINSIGSTKSZ remains 2KB.

The bug report [4] explains this as an ABI issue. The small MINSIGSTKSZ can
cause user stack overflow when delivering a signal.

In this series, we suggest a couple of things:
1. Provide a variable minimum stack size to userspace, as a similar
   approach to [5].
2. Avoid using a too-small alternate stack.

Changes from v6 [11]:
* Updated and fixed the documentation. (Borislav Petkov)
* Revised the AT_MINSIGSTKSZ comment. (Borislav Petkov)

Changes form v5 [10]:
* Fixed the overflow detection. (Andy Lutomirski)
* Reverted the AT_MINSIGSTKSZ removal on arm64. (Dave Martin)
* Added a documentation about the x86 AT_MINSIGSTKSZ.
* Supported the existing sigaltstack test to use the new aux vector.

Changes from v4 [9]:
* Moved the aux vector define to the generic header. (Carlos O'Donell)

Changes from v3 [8]:
* Updated the changelog. (Borislav Petkov)
* Revised the test messages again. (Borislav Petkov)

Changes from v2 [7]:
* Simplified the sigaltstack overflow prevention. (Jann Horn)
* Renamed fpstate size helper with cleanup. (Borislav Petkov)
* Cleaned up the signframe struct size defines. (Borislav Petkov)
* Revised the selftest messages. (Borislav Petkov)
* Revised a changelog. (Borislav Petkov)

Changes from v1 [6]:
* Took stack alignment into account for sigframe size. (Dave Martin)

[1]: 
https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/bits/sigstack.h;h=b9dca794da093dc4d41d39db9851d444e1b54d9b;hb=HEAD
[2]: https://www.gnu.org/software/libc/manual/html_node/Signal-Stack.html
[3]: https://man7.org/linux/man-pages/man2/sigaltstack.2.html
[4]: https://bugzilla.kernel.org/show_bug.cgi?id=153531
[5]: 
https://blog.linuxplumbersconf.org/2017/ocw/system/presentations/4671/original/plumbers-dm-2017.pdf
[6]: 
https://lore.kernel.org/lkml/20200929205746.6763-1-chang.seok@intel.com/
[7]: https://lore.kernel.org/lkml/20201119190237.626-1-chang.seok@intel.com/
[8]: 
https://lore.kernel.org/lkml/20201223015312.4882-1-chang.seok@intel.com/
[9]: 
https://lore.kernel.org/lkml/20210115211038.2072-1-chang.seok@intel.com/
[10]: 
https://lore.kernel.org/lkml/20210203172242.29644-1-chang.seok@intel.com/
[11]: 
https://lore.kernel.org/lkml/20210227165911.32757-1-chang.seok@intel.com/

Chang S. Bae (6):
  uapi: Define the aux vector AT_MINSIGSTKSZ
  x86/signal: Introduce helpers to get the maximum signal frame size
  x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ
  selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available
  x86/signal: Detect and prevent an alternate signal stack overflow
  selftest/x86/signal: Include test cases for validating sigaltstack

 Documentation/x86/elf_auxvec.rst  |  53 +
 Documentation/x86/index.rst   |   1 +
 arch/x86/include/asm/elf.h|   4 +
 arch/x86/include/asm/fpu/signal.h |   2 +
 arch/x86/include/asm/sigframe.h   |   2 +
 arch/x86/include/uapi/asm/auxvec.h|   4 +-
 arch/x86/kernel/cpu/common.c  |   3 +
 arch/x86/kernel/fpu/signal.c  |  19 
 arch/x86/kernel/signal.c  |  72 +++-
 include/uapi/linux/auxvec.h   |   3 +
 tools/testing/selftests/sigaltstack/sas.c |  20 +++-
 tools/testing/selftests/x86/Makefile  |   2 +-
 tools/testing/selftests/x86/sigaltstack.c | 128 ++
 13 files changed, 300 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/x86/elf_auxvec.rst
 create mode 100644 tools/testing/selftests/x86/sigaltstack.c


base-commit: 1e28eed17697bcf343c6743f0028cc3b5dd88bf0
-- 
2.17.1

[PATCH] ALSA: hda/realtek: fix mute/micmute LEDs for HP 840 G8

2021-03-15 Thread Jeremy Szu

 * The HP EliteBook 840 G8 Notebook PC is using ALC285 codec which is
   using 0x04 to control mute LED and 0x01 to control micmute LED.
   Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu 
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b47504fa8dfd..e568bf460bae 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8048,6 +8048,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", 
ALC245_FIXUP_HP_X360_AMP),
+   SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", 
ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", 
ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
-- 
2.30.1

[PATCH] power-supply: use kobj_to_dev()

2021-03-15 Thread Jian Dong

From: dongjian 

Use kobj_to_dev() instead of open-coding it

Signed-off-by: dongjian 
---
 drivers/power/supply/ds2781_battery.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/power/supply/ds2781_battery.c 
b/drivers/power/supply/ds2781_battery.c
index 3df3c82..05b859b 100644
--- a/drivers/power/supply/ds2781_battery.c
+++ b/drivers/power/supply/ds2781_battery.c
@@ -626,7 +626,7 @@ static ssize_t ds2781_read_param_eeprom_bin(struct file 
*filp,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct power_supply *psy = to_power_supply(dev);
struct ds2781_device_info *dev_info = to_ds2781_device_info(psy);
 
@@ -639,7 +639,7 @@ static ssize_t ds2781_write_param_eeprom_bin(struct file 
*filp,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct power_supply *psy = to_power_supply(dev);
struct ds2781_device_info *dev_info = to_ds2781_device_info(psy);
int ret;
@@ -671,7 +671,7 @@ static ssize_t ds2781_read_user_eeprom_bin(struct file 
*filp,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct power_supply *psy = to_power_supply(dev);
struct ds2781_device_info *dev_info = to_ds2781_device_info(psy);
 
@@ -685,7 +685,7 @@ static ssize_t ds2781_write_user_eeprom_bin(struct file 
*filp,
struct bin_attribute *bin_attr,
char *buf, loff_t off, size_t count)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct power_supply *psy = to_power_supply(dev);
struct ds2781_device_info *dev_info = to_ds2781_device_info(psy);
int ret;
-- 
1.9.1

Re: [PATCH v1 10/14] mm: multigenerational lru: core

2021-03-15 Thread Huang, Ying

Yu Zhao  writes:

> On Tue, Mar 16, 2021 at 10:08:51AM +0800, Huang, Ying wrote:
>> Yu Zhao  writes:
>> [snip]
>> 
>> > +/* Main function used by foreground, background and user-triggered aging. 
>> > */
>> > +static bool walk_mm_list(struct lruvec *lruvec, unsigned long next_seq,
>> > +   struct scan_control *sc, int swappiness)
>> > +{
>> > +  bool last;
>> > +  struct mm_struct *mm = NULL;
>> > +  int nid = lruvec_pgdat(lruvec)->node_id;
>> > +  struct mem_cgroup *memcg = lruvec_memcg(lruvec);
>> > +  struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
>> > +
>> > +  VM_BUG_ON(next_seq > READ_ONCE(lruvec->evictable.max_seq));
>> > +
>> > +  /*
>> > +   * For each walk of the mm list of a memcg, we decrement the priority
>> > +   * of its lruvec. For each walk of memcgs in kswapd, we increment the
>> > +   * priorities of all lruvecs.
>> > +   *
>> > +   * So if this lruvec has a higher priority (smaller value), it means
>> > +   * other concurrent reclaimers (global or memcg reclaim) have walked
>> > +   * its mm list. Skip it for this priority to balance the pressure on
>> > +   * all memcgs.
>> > +   */
>> > +#ifdef CONFIG_MEMCG
>> > +  if (!mem_cgroup_disabled() && !cgroup_reclaim(sc) &&
>> > +  sc->priority > atomic_read(&lruvec->evictable.priority))
>> > +  return false;
>> > +#endif
>> > +
>> > +  do {
>> > +  last = get_next_mm(lruvec, next_seq, swappiness, &mm);
>> > +  if (mm)
>> > +  walk_mm(lruvec, mm, swappiness);
>> > +
>> > +  cond_resched();
>> > +  } while (mm);
>> 
>> It appears that we need to scan the whole address space of multiple
>> processes in this loop?
>> 
>> If so, I have some concerns about the duration of the function.  Do you
>> have some number of the distribution of the duration of the function?
>> And may be the number of mm_struct and the number of pages scanned.
>> 
>> In comparison, in the traditional LRU algorithm, for each round, only a
>> small subset of the whole physical memory is scanned.
>
> Reasonable concerns, and insightful too. We are sensitive to direct
> reclaim latency, and we tuned another path carefully so that direct
> reclaims virtually don't hit this path :)
>
> Some numbers from the cover letter first:
>   In addition, direct reclaim latency is reduced by 22% at 99th
>   percentile and the number of refaults is reduced 7%. These metrics are
>   important to phones and laptops as they are correlated to user
>   experience.
>
> And "another path" is the background aging in kswapd:
>   age_active_anon()
> age_lru_gens()
>   try_walk_mm_list()
> /* try to spread pages out across spread+1 generations */
> if (old_and_young[0] >= old_and_young[1] * spread &&
> min_nr_gens(max_seq, min_seq, swappiness) > max(spread, 
> MIN_NR_GENS))
> return;
>
> walk_mm_list(lruvec, max_seq, sc, swappiness);
>
> By default, spread = 2, which makes kswapd slight more aggressive
> than direct reclaim for our use cases. This can be entirely disabled
> by setting spread to 0, for worloads that don't care about direct
> reclaim latency, or larger values, they are more sensitive than
> ours.

OK, I see.  That can avoid the long latency in direct reclaim path.

> It's worth noting that walk_mm_list() is multithreaded -- reclaiming
> threads can work on different mm_structs on the same list
> concurrently. We do occasionally see this function in direct reclaims,
> on over-overcommitted systems, i.e., kswapd CPU usage is 100%. Under
> the same condition, we saw the current page reclaim live locked and
> triggered hardware watchdog timeouts (our hardware watchdog is set to
> 2 hours) many times.

Just to confirm, in the current page reclaim, kswapd will keep running
until watchdog?  This is avoided in your algorithm mainly via
multi-threading?  Or via direct vs. reversing page table scanning?

Best Regards,
Huang, Ying

Re: slow boot with 7fef431be9c9 ("mm/page_alloc: place pages to tail in __free_pages_core()")

2021-03-15 Thread Mike Rapoport

Hi Leo,

On Tue, Mar 16, 2021 at 12:36:29AM +, Liang, Liang (Leo) wrote:
> 
> Hi David,
> 
> Sorry for late. If revert 7fef431be9c9 (without 7fef431be9c9), the dmesg 
> attached. And looks the exception as below:
> [  +0.027833] [0x7800 - 0x783f] 20925 MB/s / 25405 
> MB/s
> [  +1.363596] [0x0001 - 0x0001003f] 222 MB/s / 222 MB/s
> [  +1.562192] [0x00010040 - 0x0001007f] 222 MB/s / 222 MB/s
> [  +1.881332] [0x00010080 - 0x000100bf] 195 MB/s / 159 MB/s
> [  +1.383388] [0x000100c0 - 0x000100ff] 219 MB/s / 221 MB/s
> [  +0.029342] [0x00010100 - 0x0001013f] 19807 MB/s / 24125 
> MB/s
> 
> What is the problem here? Do you want to check the acpi tables?

As it seems the first 16M at 0x0001 are two orders of magnitude
slower than the rest of the memory as if there is a different memory device
there.

This would explain why with 7fef431be9c9 everything gets slower as we
allocate the first (and probably quite critical) data from those 16M.

No idea how this could be related to ACPI and why ACPI initialization
causes the huge slowdown on its own.

Can you please try booting with 7fef431be9c9 still applied and with this
patch (not even compile tested):

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d883176ef2ce..780f11ca14c9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -778,6 +778,7 @@ void __init setup_arch(char **cmdline_p)
 * L1TF its contents can be leaked to user processes.
 */
memblock_reserve(0, PAGE_SIZE);
+   memblock_reserve(0x0001, SZ_16M);
 
early_reserve_initrd();
 
 
> BRs,
> Leo
> -Original Message-
> From: David Hildenbrand  
> Sent: Monday, March 15, 2021 9:04 PM
> To: Mike Rapoport 
> Cc: Liang, Liang (Leo) ; Deucher, Alexander 
> ; linux-kernel@vger.kernel.org; amd-gfx list 
> ; Andrew Morton ; 
> Huang, Ray ; Koenig, Christian ; 
> Rafael J. Wysocki ; George Kennedy 
> 
> Subject: Re: slow boot with 7fef431be9c9 ("mm/page_alloc: place pages to tail 
> in __free_pages_core()")
> 
> On 13.03.21 14:48, Mike Rapoport wrote:
> > Hi,
> > 
> > On Sat, Mar 13, 2021 at 10:05:23AM +0100, David Hildenbrand wrote:
> >>> Am 13.03.2021 um 05:04 schrieb Liang, Liang (Leo) :
> >>>
> >>> Hi David,
> >>>
> >>> Which benchmark tool you prefer? Memtest86+ or else?
> >>
> >> Hi Leo,
> >>
> >> I think you want something that runs under Linux natively.
> >>
> >> I'm planning on coding up a kernel module to walk all 4MB pages in 
> >> the freelists and perform a stream benchmark individually. Then we 
> >> might be able to identify the problematic range - if there is a 
> >> problematic range :)
> > 
> > My wild guess would be that the pages that are now at the head of free 
> > lists have wrong caching enabled. Might be worth checking in your test 
> > module.
> 
> I hacked something up real quick:
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fdavidhildenbrand%2Fkstream&data=04%7C01%7Cliang.liang%40amd.com%7C61fb103eeb7647f5228408d8e7b2d7d3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637514102622932303%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ufUYQRtdSHvEkR61LiJZtsVdYZbtdGbKlzZHOQdct78%3D&reserved=0
> 
> Only briefly tested inside a VM. The output looks something like
> 
> [...]
> [ 8396.432225] [0x4580 - 0x45bf] 25322 MB/s /
> 38948 MB/s
> [ 8396.448749] [0x45c0 - 0x45ff] 24481 MB/s /
> 38946 MB/s
> [ 8396.465197] [0x4600 - 0x463f] 24892 MB/s /
> 39170 MB/s
> [ 8396.481552] [0x4640 - 0x467f] 25222 MB/s /
> 39156 MB/s
> [ 8396.498012] [0x4680 - 0x46bf] 24416 MB/s /
> 39159 MB/s
> [ 8396.514397] [0x46c0 - 0x46ff] 25469 MB/s /
> 38940 MB/s
> [ 8396.530849] [0x4700 - 0x473f] 24885 MB/s /
> 38734 MB/s
> [ 8396.547195] [0x4740 - 0x477f] 25458 MB/s /
> 38941 MB/s
> [...]
> 
> The benchmark allocates one 4 MiB chunk at a time and runs a simplified 
> STREAM benchmark a) without flushing caches b) flushing caches before every 
> memory access.
> 
> It would be great if you could run that with the *old behavior* kernel (IOW, 
> without 7fef431be9c9), so we might still be lucky to catch the problematic 
> area in the freelist.
> 
> Let's see if that will indicate anything.
> 
> --
> Thanks,
> 
> David / dhildenb



-- 
Sincerely yours,
Mike.

Re: [PATCH v2 5/5] mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case

2021-03-15 Thread Miaohe Lin

On 2021/3/16 11:07, Mike Kravetz wrote:
> On 3/15/21 7:27 PM, Miaohe Lin wrote:
>> The fault_mutex hashing overhead can be avoided in truncate_op case
>> because page faults can not race with truncation in this routine.  So
>> calculate hash for fault_mutex only in !truncate_op case to save some cpu
>> cycles.
>>
>> Reviewed-by: Mike Kravetz 
>> Signed-off-by: Miaohe Lin 
>> Cc: Mike Kravetz 
>> ---
>> v1->v2:
>> remove unnecessary initialization for variable hash
>> collect Reviewed-by tag from Mike Kravetz
> 
> My apologies for not replying sooner and any misunderstanding from my
> previous comments.
> 

That's all right.

> If the compiler is going to produce a warning because the variable is
> not initialized, then we will need to keep the initialization.
> Otherwise, this will show up as a build regression.  Ideally, there
> would be a modifier which could be used to tell the compiler the
> variable will used.  I do not know if such a modifier exists.
> 

I do not know if such a modifier exists too. But maybe not all compilers are 
intelligent
enough to not produce a warning. It would be safe to keep the initialization...

> The patch can not produce a new warning.  So, if you need to initialize

So just drop this version of the patch? Or should I send a new version with 
your Reviewed-by tag and
keep the initialization?

> the variable then do it.  My Reviewed-by still applies.
>

Re: [PATCH v2] staging: rtl8192u: remove extra lines

2021-03-15 Thread Dan Carpenter

On Tue, Mar 16, 2021 at 10:44:10AM +0800, zhaoxiao wrote:
> Remove extra lines in many functions in r8192U_wx.c.
> 
> Signed-off-by: zhaoxiao 
> ---
>  drivers/staging/rtl8192u/r8192U_wx.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
   
The commit message says you're removing lines but you're also adding
them.  :P

regards,
dan carpenter

[syzbot] general protection fault in scatterwalk_copychunks (4)

2021-03-15 Thread syzbot

Hello,

syzbot found the following issue on:

HEAD commit:47142ed6 net: dsa: bcm_sf2: Qualify phydev->dev_flags base..
git tree:   net
console output: https://syzkaller.appspot.com/x/log.txt?x=17fb9376d0
kernel config:  https://syzkaller.appspot.com/x/.config?x=eec733599e95cd87
dashboard link: https://syzkaller.appspot.com/bug?extid=66e3ea42c4b176748b9c

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+66e3ea42c4b176748...@syzkaller.appspotmail.com

general protection fault, probably for non-canonical address 
0xdc01:  [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0008-0x000f]
CPU: 1 PID: 25 Comm: kworker/u4:1 Not tainted 5.12.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Workqueue: pencrypt_parallel padata_parallel_worker
RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:68 [inline]
RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:93 [inline]
RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:77 [inline]
RIP: 0010:scatterwalk_copychunks+0x4db/0x6a0 crypto/scatterwalk.c:50
Code: ff df 80 3c 02 00 0f 85 b4 01 00 00 49 8d 44 24 08 4d 89 26 48 89 c2 48 
89 44 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 
3c 03 0f 8e 77 01 00 00 48 b8 00 00 00 00
RSP: 0018:c9dff620 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: 
RDX: 0001 RSI: 83c45a33 RDI: 0003
RBP:  R08:  R09: 88801ba09d1b
R10: 83c459e3 R11: d9e6 R12: 
R13: 0001 R14: c9dff880 R15: 
FS:  () GS:8880b9d0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 00540198 CR3: 18d08000 CR4: 001506e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 skcipher_next_slow crypto/skcipher.c:278 [inline]
 skcipher_walk_next+0x7af/0x1680 crypto/skcipher.c:363
 skcipher_walk_first+0xf8/0x3c0 crypto/skcipher.c:446
 skcipher_walk_aead_common+0x7a5/0xbc0 crypto/skcipher.c:539
 gcmaes_crypt_by_sg+0x323/0x8a0 arch/x86/crypto/aesni-intel_glue.c:658
Modules linked in:
---[ end trace 15593fd836276143 ]---
RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:68 [inline]
RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:93 [inline]
RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:77 [inline]
RIP: 0010:scatterwalk_copychunks+0x4db/0x6a0 crypto/scatterwalk.c:50
Code: ff df 80 3c 02 00 0f 85 b4 01 00 00 49 8d 44 24 08 4d 89 26 48 89 c2 48 
89 44 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 
3c 03 0f 8e 77 01 00 00 48 b8 00 00 00 00
RSP: 0018:c9dff620 EFLAGS: 00010202
RAX: dc00 RBX:  RCX: 
RDX: 0001 RSI: 83c45a33 RDI: 0003
RBP:  R08:  R09: 88801ba09d1b
R10: 83c459e3 R11: d9e6 R12: 
R13: 0001 R14: c9dff880 R15: 
FS:  () GS:8880b9d0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 00540198 CR3: 18d08000 CR4: 001506e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.

Re: [PATCH v1 09/14] mm: multigenerational lru: mm_struct list

2021-03-15 Thread Huang, Ying

Yu Zhao  writes:

> On Tue, Mar 16, 2021 at 10:07:36AM +0800, Huang, Ying wrote:
>> Rik van Riel  writes:
>> 
>> > On Sat, 2021-03-13 at 00:57 -0700, Yu Zhao wrote:
>> >
>> >> +/*
>> >> + * After pages are faulted in, they become the youngest generation.
>> >> They must
>> >> + * go through aging process twice before they can be evicted. After
>> >> first scan,
>> >> + * their accessed bit set during initial faults are cleared and they
>> >> become the
>> >> + * second youngest generation. And second scan makes sure they
>> >> haven't been used
>> >> + * since the first.
>> >> + */
>> >
>> > I have to wonder if the reductions in OOM kills and 
>> > low-memory tab discards is due to this aging policy
>> > change, rather than from the switch to virtual scanning.
>
> There are no policy changes per se. The current page reclaim also
> scans a faulted-in page at least twice before it can reclaim it.
> That said, the new aging yields a better overall result because it
> discovers every page that has been referenced since the last scan,
> in addition to what Ying has mentioned. The current page scan stops
> stops once it finds enough candidates, which may seem more
> efficiently, but actually pays the price for not finding the best.
>
>> If my understanding were correct, the temperature of the processes is
>> considered in addition to that of the individual pages.  That is, the
>> pages of the processes that haven't been scheduled after the previous
>> scanning will not be scanned.  I guess that this helps OOM kills?
>
> Yes, that's correct.
>
>> If so, how about just take advantage of that information for OOM killing
>> and page reclaiming?  For example, if a process hasn't been scheduled
>> for long time, just reclaim its private pages.
>
> This is how it works. Pages that haven't been scanned grow older
> automatically because those that have been scanned will be tagged with
> younger generation numbers. Eviction does bucket sort based on
> generation numbers and attacks the oldest.

Sorry, my original words are misleading.  What I wanted to say was that
is it good enough that

- Do not change the core algorithm of current page reclaiming.

- Add some new logic to reclaim the process private pages regardless of
  the Accessed bits if the processes are not scheduled for some long
  enough time.  This can be done before the normal page reclaiming.

So this is an one small step improvement to the current page reclaiming
algorithm via taking advantage of the scheduler information.  It's
clearly not sophisticated as your new algorithm, for example, the cold
pages in the hot processes will not be reclaimed in this stage.  But it
can reduce the overhead of scanning too.

All in all, some of your ideas may help the original LRU algorithm too.
Or some can be experimented without replacing the original algorithm.

But from another point of view, your solution can be seen as a kind of
improvement on top of the original LRU algorithm too.  It moves the
recently accessed pages to kind of multiple active lists based on
scanning page tables directly (instead of reversely).

Best Regards,
Huang, Ying

RE: [EXT] [PATCH 0/2] enable flexspi support on imx8mp

2021-03-15 Thread Kuldeep Singh

Hi Heiko,

> -Original Message-
> From: Heiko Schocher 
> Sent: Tuesday, March 16, 2021 10:34 AM
> To: linux-...@vger.kernel.org
> Cc: Heiko Schocher ; linux-arm-ker...@lists.infradead.org;
> Ashish Kumar ; Kuldeep Singh
> ; Mark Brown ; Rob Herring
> ; Shawn Guo ; Yogesh Gaur
> ; devicet...@vger.kernel.org; linux-
> ker...@vger.kernel.org
> Subject: [EXT] [PATCH 0/2] enable flexspi support on imx8mp
> 
> Caution: EXT Email
> 
> add compatible entry in nxp_fspi driver for imx8mp
> 
> new in v3:
> seperate spi changes from series:
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.inf
> radead.org%2Fpipermail%2Flinux-arm-kernel%2F2021-
> March%2F643289.html&data=04%7C01%7Ckuldeep.singh%40nxp.com%
> 7C5da0c3da3dbe410baaf508d8e83903f4%7C686ea1d3bc2b4c6fa92cd99c5c3
> 01635%7C0%7C0%7C637514678868305498%7CUnknown%7CTWFpbGZsb3d8
> eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
> D%7C1000&sdata=2uy0EKUh4Nt0BceSQbIkCZDfakid3wx5uwebw0DhEIQ
> %3D&reserved=0
> 
> into own series as Kuldeep suggested and rebased against
> git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next
> 144c79ef33536 ("Merge tag 'perf-tools-fixes-for-v5.12-2020-03-07' of
> git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux")

The changes are not on on top of spi tree
git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next
and therefore may not apply seamlessly.

I recently added driver support for imx8dxl which is accepted in spi tree
And these patches will cause conflict with it.

Kindly rebase these patches on top of the tree.

Regards
Kuldeep

Re: [PATCH] mm,hwpoison: return -EBUSY when page already poisoned

2021-03-15 Thread 堀口　直也

On Fri, Mar 12, 2021 at 11:48:31PM +, Luck, Tony wrote:
> >> will memory_failure() find it and unmap it? if succeed, then the current 
> >> will be
> >> signaled with correct vaddr and shift?
> >
> > That's a very good question.  I didn't see a SIGBUS when I first wrote this 
> > code,
> > hence all the p->mce_vaddr.  But now I'm
> > a) not sure why there wasn't a signal

We have a recent change around this behavior, which might be an answer for you.
See commit 30c9cf492704 ("mm,hwpoison: send SIGBUS to PF_MCE_EARLY processes on 
action required events").

> > b) if we are to fix the problems noted by AndyL, need to make sure that 
> > there isn't a SIGBUS
> 
> Tests on upstream kernel today show that memory_failure() is both unmapping 
> the page
> and sending a SIGBUS.

Sorry if I misunderstood the exact problem, but if the point is to allow
user processes to find poisoned virtual address without SIGBUS, one possible
way is to expose hwpoison entry via /proc/pid/pagemap (attached a draft patch 
below).
With this patch, processes easily find hwpoison entries without any new 
interface.

> 
> My biggest issue with the KERNEL_COPYIN recovery path is that we don't have 
> code
> to mark the page not present while we are still in do_machine_check(). 

It sounds to me that even if we have such code, it just narrows down the race 
window
between multiple MCEs on different CPUs. Or does it completely prevent the race?
(Another thread could touch the poisoned page just before the first thread marks
the page non-present?)

> That's resulted
> in recovery working for simple cases where there is a single get_user() call 
> followed by
> an error return if that failed. But more complex cases require more machine 
> checks and
> a touching faith that the kernel will eventually give up trying (spoiler: it 
> sometimes doesn't).
> 
> Thanks to the decode of the instruction we do have the virtual address. So we 
> just need
> a safe walk of pgd->p4d->pud->pmd->pte (truncated if we hit a huge page) with 
> a write
> of a "not-present" value. Maybe a different poison type from the one we get 
> from
> memory_failure() so that the #PF code can recognize this as a special case 
> and do any
> other work that we avoided because we were in #MC context.

As you answered in another email, p->mce_vaddr is set only on KERNEL_COPYIN 
case,
then if p->mce_vaddr is available also for generic SRAR MCE (I'm assuming that 
we are
talking about issues on race between generic SRAR MCE not only for 
KERNEL_COPYIN case),
that might be helpful, although it might be hard to implement.
And I'm afraid that walking page table could find the wrong virtual address if 
a process
have multiple entries to the single page. We could send multiple SIGBUSs for 
such case,
but maybe that's not an optimal solution.

Thanks,
Naoya Horiguchi


>From 147449a97e2ea3420ac3523f13523f5d30a13614 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi 
Date: Tue, 16 Mar 2021 14:22:21 +0900
Subject: [PATCH] pagemap: expose hwpoison entry

not-signed-off-by-yet: Naoya Horiguchi 
---
 fs/proc/task_mmu.c  |  6 ++
 include/linux/swapops.h | 12 
 tools/vm/page-types.c   |  5 -
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 602e3a52884d..08cea209bae7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1300,6 +1300,7 @@ struct pagemapread {
 #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
 #define PM_SOFT_DIRTY  BIT_ULL(55)
 #define PM_MMAP_EXCLUSIVE  BIT_ULL(56)
+#define PM_HWPOISONBIT_ULL(60)
 #define PM_FILEBIT_ULL(61)
 #define PM_SWAPBIT_ULL(62)
 #define PM_PRESENT BIT_ULL(63)
@@ -1385,6 +1386,11 @@ static pagemap_entry_t pte_to_pagemap_entry(struct 
pagemapread *pm,
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
 
+   if (is_hwpoison_entry(entry)) {
+   page = hwpoison_entry_to_page(entry);
+   flags |= PM_HWPOISON;
+   }
+
if (is_device_private_entry(entry))
page = device_private_entry_to_page(entry);
}
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d9b7c9132c2f..1b9dedbd06ab 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -323,6 +323,13 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
 }
 
+static inline struct page *hwpoison_entry_to_page(swp_entry_t entry)
+{
+   struct page *p = pfn_to_page(swp_offset(entry));
+   WARN_ON(!PageHWPoison(p));
+   return p;
+}
+
 static inline void num_poisoned_pages_inc(void)
 {
atomic_long_inc(&num_poisoned_pages);
@@ -345,6 +352,11 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
return 0;
 }
 
+static inline struct page *hwpoison_

Re: [PATCH] KCOV: Introduced tracing unique covered PCs

2021-03-15 Thread Dmitry Vyukov

On Mon, Mar 15, 2021 at 10:43 PM Alexander Lochmann
 wrote:
> On 15.03.21 09:02, Dmitry Vyukov wrote:
>   static notrace unsigned long canonicalize_ip(unsigned long ip)
>  @@ -191,18 +192,26 @@ void notrace __sanitizer_cov_trace_pc(void)
>  struct task_struct *t;
>  unsigned long *area;
>  unsigned long ip = canonicalize_ip(_RET_IP_);
>  -   unsigned long pos;
>  +   unsigned long pos, idx;
> 
>  t = current;
>  -   if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t))
>  +   if (!check_kcov_mode(KCOV_MODE_TRACE_PC | KCOV_MODE_UNIQUE_PC, 
>  t))
>  return;
> 
>  area = t->kcov_area;
>  -   /* The first 64-bit word is the number of subsequent PCs. */
>  -   pos = READ_ONCE(area[0]) + 1;
>  -   if (likely(pos < t->kcov_size)) {
>  -   area[pos] = ip;
>  -   WRITE_ONCE(area[0], pos);
>  +   if (likely(t->kcov_mode == KCOV_MODE_TRACE_PC)) {
> >>>
> >>> Does this introduce an additional real of t->kcov_mode?
> >>> If yes, please reuse the value read in check_kcov_mode.
> >> Okay. How do I get that value from check_kcov_mode() to the caller?
> >> Shall I add an additional parameter to check_kcov_mode()?
> >
> > Yes, I would try to add an additional pointer parameter for mode. I
> > think after inlining the compiler should be able to regestrize it.
> >
> Should kcov->mode be written directly to that ptr?
> Otherwise, it must be written to the already present variable mode, and
> than copied to the ptr (if not NULL).

I would expect that after inlining it won't make difference in
generated code. Is so, both options are fine. Whatever leads to a
cleaner code.

Re: [PATCH] smp: kernel/panic.c - silence warnings

2021-03-15 Thread heying (H)


Thanks for your reply.


在 2021/3/16 13:15, Randy Dunlap 写道:

On 3/15/21 9:08 PM, He Ying wrote:

We found these warnings in kernel/panic.c by using sparse tool:
warning: symbol 'panic_smp_self_stop' was not declared.
warning: symbol 'nmi_panic_self_stop' was not declared.
warning: symbol 'crash_smp_send_stop' was not declared.

To avoid them, add declarations for these three functions in
include/linux/smp.h.

Reported-by: Hulk Robot 
Signed-off-by: He Ying 
---
  include/linux/smp.h | 8 
  1 file changed, 8 insertions(+)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 70c6f6284dcf..861a253cc179 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,6 +50,14 @@ extern unsigned int total_cpus;
  int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
 int wait);
  
+/*

+ * Cpus stopping functions in panic. All have default weak definations.

   definitions.

Sorry for my typo.



+ * Architecure dependent code may override them.

   Architecture-dependent

Is that necessary?




+ */
+void panic_smp_self_stop(void);
+void nmi_panic_self_stop(struct pt_regs *regs);
+void crash_smp_send_stop(void);
+
  /*
   * Call a function on all processors
   */

Re: [PATCH 4.14 00/95] 4.14.226-rc1 review

2021-03-15 Thread Samuel Zou





On 2021/3/15 21:56, gre...@linuxfoundation.org wrote:

From: Greg Kroah-Hartman 

This is the start of the stable review cycle for the 4.14.226 release.
There are 95 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Wed, 17 Mar 2021 13:57:24 +.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:

https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.226-rc1.gz
or in the git tree and branch at:

git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
linux-4.14.y
and the diffstat can be found below.

thanks,

greg k-h



Tested on x86 for 4.14.226-rc1,

Kernel repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Branch: linux-4.14.y
Version: 4.14.226-rc1
Commit: 57cc62fb2d2b8e81c02cb9197e303c7782dee4cd
Compiler: gcc version 7.3.0 (GCC)

x86 (No kernel failures)

Testcase Result Summary:
total_num: 4728
succeed_num: 4727
failed_num: 1
timeout_num: 0

Tested-by: Hulk Robot

Re: [PATCH v2] task_work: kasan: record task_work_add() call stack

2021-03-15 Thread Dmitry Vyukov

On Tue, Mar 16, 2021 at 3:44 AM Walter Wu  wrote:
>
> Why record task_work_add() call stack?
> Syzbot reports many use-after-free issues for task_work, see [1].
> After see the free stack and the current auxiliary stack, we think
> they are useless, we don't know where register the work, this work
> may be the free call stack, so that we miss the root cause and
> don't solve the use-after-free.
>
> Add task_work_add() call stack into KASAN auxiliary stack in
> order to improve KASAN report. It is useful for programmers
> to solve use-after-free issues.
>
> [1]: 
> https://groups.google.com/g/syzkaller-bugs/search?q=kasan%20use-after-free%20task_work_run
>
> Signed-off-by: Walter Wu 
> Suggested-by: Dmitry Vyukov 
> Cc: Andrey Konovalov 
> Cc: Andrey Ryabinin 
> Cc: Dmitry Vyukov 
> Cc: Alexander Potapenko 
> Cc: Andrew Morton 
> Cc: Matthias Brugger 
> Cc: Jens Axboe 
> Cc: Oleg Nesterov 
> ---
>
> v2: Fix kasan_record_aux_stack() calling sequence issue.
> Thanks for Dmitry's suggestion

Reviewed-by: Dmitry Vyukov 

> ---
>  kernel/task_work.c | 3 +++
>  mm/kasan/kasan.h   | 2 +-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/task_work.c b/kernel/task_work.c
> index 9cde961875c0..3d4852891fa8 100644
> --- a/kernel/task_work.c
> +++ b/kernel/task_work.c
> @@ -34,6 +34,9 @@ int task_work_add(struct task_struct *task, struct 
> callback_head *work,
>  {
> struct callback_head *head;
>
> +   /* record the work call stack in order to print it in KASAN reports */
> +   kasan_record_aux_stack(work);
> +
> do {
> head = READ_ONCE(task->task_works);
> if (unlikely(head == &work_exited))
> diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
> index 3436c6bf7c0c..e4629a971a3c 100644
> --- a/mm/kasan/kasan.h
> +++ b/mm/kasan/kasan.h
> @@ -146,7 +146,7 @@ struct kasan_alloc_meta {
> struct kasan_track alloc_track;
>  #ifdef CONFIG_KASAN_GENERIC
> /*
> -* call_rcu() call stack is stored into struct kasan_alloc_meta.
> +* The auxiliary stack is stored into struct kasan_alloc_meta.
>  * The free stack is stored into struct kasan_free_meta.
>  */
> depot_stack_handle_t aux_stack[2];
> --
> 2.18.0
>
> --
> You received this message because you are subscribed to the Google Groups 
> "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to kasan-dev+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/kasan-dev/20210316024410.19967-1-walter-zh.wu%40mediatek.com.

Re: [syzbot] KASAN: invalid-free in sg_finish_scsi_blk_rq

2021-03-15 Thread Douglas Gilbert


On 2021-03-15 9:59 p.m., syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:d98f554b Add linux-next specific files for 20210312
git tree:   linux-next
console output: https://syzkaller.appspot.com/x/log.txt?x=1189318ad0
kernel config:  https://syzkaller.appspot.com/x/.config?x=e362835d2e58cef6
dashboard link: https://syzkaller.appspot.com/bug?extid=0a0e8ecea895d38332e6

Unfortunately, I don't have any reproducer for this issue yet.


No need, I think I can see how it happens. A particular type of resource
error from the block layer, together with a 32 byte (or larger) SCSI
command. I'm testing a patch.

Doug Gilbert


IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+0a0e8ecea895d3833...@syzkaller.appspotmail.com

==
BUG: KASAN: double-free or invalid-free in slab_free mm/slub.c:3161 [inline]
BUG: KASAN: double-free or invalid-free in kfree+0xe5/0x7f0 mm/slub.c:4215

CPU: 0 PID: 10481 Comm: syz-executor.5 Not tainted 
5.12.0-rc2-next-20210312-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:79 [inline]
  dump_stack+0x141/0x1d7 lib/dump_stack.c:120
  print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232
  kasan_report_invalid_free+0x51/0x80 mm/kasan/report.c:357
  kasan_slab_free mm/kasan/common.c:340 [inline]
  __kasan_slab_free+0x118/0x130 mm/kasan/common.c:367
  kasan_slab_free include/linux/kasan.h:200 [inline]
  slab_free_hook mm/slub.c:1562 [inline]
  slab_free_freelist_hook+0x92/0x210 mm/slub.c:1600
  slab_free mm/slub.c:3161 [inline]
  kfree+0xe5/0x7f0 mm/slub.c:4215
  scsi_req_free_cmd include/scsi/scsi_request.h:28 [inline]
  sg_finish_scsi_blk_rq+0x690/0x810 drivers/scsi/sg.c:3224
  sg_common_write+0xa07/0xe70 drivers/scsi/sg.c:1132
  sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797
  sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline]
  sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014
  sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229
  vfs_ioctl fs/ioctl.c:48 [inline]
  __do_sys_ioctl fs/ioctl.c:753 [inline]
  __se_sys_ioctl fs/ioctl.c:739 [inline]
  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739
  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
  entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x465f69
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 
48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 
c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:7f8413efa188 EFLAGS: 0246 ORIG_RAX: 0010
RAX: ffda RBX: 0056bf60 RCX: 00465f69
RDX: 20001780 RSI: 2285 RDI: 0003
RBP: 004bfa8f R08:  R09: 
R10:  R11: 0246 R12: 0056bf60
R13: 7ffe20e16e2f R14: 7f8413efa300 R15: 00022000

Allocated by task 10481:
  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
  kasan_set_track mm/kasan/common.c:46 [inline]
  set_alloc_info mm/kasan/common.c:427 [inline]
  kasan_kmalloc mm/kasan/common.c:506 [inline]
  kasan_kmalloc mm/kasan/common.c:465 [inline]
  __kasan_kmalloc+0x99/0xc0 mm/kasan/common.c:515
  kmalloc include/linux/slab.h:561 [inline]
  kzalloc include/linux/slab.h:686 [inline]
  sg_start_req+0x16f/0x24e0 drivers/scsi/sg.c:3044
  sg_common_write+0x5fd/0xe70 drivers/scsi/sg.c:1109
  sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797
  sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline]
  sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014
  sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229
  vfs_ioctl fs/ioctl.c:48 [inline]
  __do_sys_ioctl fs/ioctl.c:753 [inline]
  __se_sys_ioctl fs/ioctl.c:739 [inline]
  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739
  do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Freed by task 10481:
  kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38
  kasan_set_track+0x1c/0x30 mm/kasan/common.c:46
  kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:357
  kasan_slab_free mm/kasan/common.c:360 [inline]
  kasan_slab_free mm/kasan/common.c:325 [inline]
  __kasan_slab_free+0xf5/0x130 mm/kasan/common.c:367
  kasan_slab_free include/linux/kasan.h:200 [inline]
  slab_free_hook mm/slub.c:1562 [inline]
  slab_free_freelist_hook+0x92/0x210 mm/slub.c:1600
  slab_free mm/slub.c:3161 [inline]
  kfree+0xe5/0x7f0 mm/slub.c:4215
  sg_start_req+0x1b33/0x24e0 drivers/scsi/sg.c:3106
  sg_common_write+0x5fd/0xe70 drivers/scsi/sg.c:1109
  sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797
  sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline]
  sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014
  sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229
  vfs_ioctl fs/ioctl.c:48 [inline]
  __do_sys_ioctl fs/ioctl.c:753 [inline]
  __se_sys_ioctl fs/ioctl.c:739 [inline]
  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:7

Re: [PATCH -tip v2 00/10] kprobes: Fix stacktrace with kretprobes

2021-03-15 Thread Masami Hiramatsu

On Mon, 15 Mar 2021 21:30:03 -0500
Josh Poimboeuf  wrote:

> On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote:
> > Hello,
> > 
> > Here is the 2nd version of the series to fix the stacktrace with kretprobe.
> > 
> > The 1st series is here;
> > 
> > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/
> > 
> > In this version I merged the ORC unwinder fix for kretprobe which discussed 
> > in the
> > previous thread. [3/10] is updated according to the Miroslav's comment. 
> > [4/10] is
> > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous 
> > tread
> > and are introduced to the series.
> > 
> > Daniel, can you also test this again? I and Josh discussed a bit different
> > method and I've implemented it on this version.
> > 
> > This actually changes the kretprobe behavisor a bit, now the instraction 
> > pointer in
> > the pt_regs passed to kretprobe user handler is correctly set the real 
> > return
> > address. So user handlers can get it via instruction_pointer() API.
> 
> When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly.
> 
> show_trace_log_lvl() reads the entire stack in lockstep with calls to
> the unwinder so that it can decide which addresses get prefixed with
> '?'.  So it expects to find an actual return address on the stack.
> Instead it finds %rsp.  So it never syncs up with unwind_next_frame()
> and shows all remaining addresses as unreliable.
> 
>   Call Trace:
>__kretprobe_trampoline_handler+0xca/0x1a0
>trampoline_handler+0x3d/0x50
>kretprobe_trampoline+0x25/0x50
>? init_test_probes.cold+0x323/0x387
>? init_kprobes+0x144/0x14c
>? init_optprobes+0x15/0x15
>? do_one_initcall+0x5b/0x300
>? lock_is_held_type+0xe8/0x140
>? kernel_init_freeable+0x174/0x2cd
>? rest_init+0x233/0x233
>? kernel_init+0xa/0x11d
>? ret_from_fork+0x22/0x30
> 
> How about pushing 'kretprobe_trampoline' instead of %rsp for the return
> address placeholder.  That fixes the above test, and removes the need
> for the weird 'state->ip == sp' check:
> 
>   Call Trace:
>__kretprobe_trampoline_handler+0xca/0x150
>trampoline_handler+0x3d/0x50
>kretprobe_trampoline+0x29/0x50
>? init_test_probes.cold+0x323/0x387
>elfcorehdr_read+0x10/0x10
>init_kprobes+0x144/0x14c
>? init_optprobes+0x15/0x15
>do_one_initcall+0x72/0x280
>kernel_init_freeable+0x174/0x2cd
>? rest_init+0x122/0x122
>kernel_init+0xa/0x10e
>ret_from_fork+0x22/0x30
> 
> Though, init_test_probes.cold() (the real return address) is still
> listed as unreliable.  Maybe we need a call to kretprobe_find_ret_addr()
> in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call
> there.

Thanks for the test!
OK, that could be acceptable. However, push %sp still needed for accessing
stack address from kretprobe handler via pt_regs. (regs->sp must point
the stack address)
Anyway, with int3, it pushes one more entry for emulating call, so I think
it is OK.
Let me update the series!

Thank you!

> 
> 
> 
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index 06f33bfebc50..70188fdd288e 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -766,19 +766,19 @@ asm(
>   "kretprobe_trampoline:\n"
>   /* We don't bother saving the ss register */
>  #ifdef CONFIG_X86_64
> - "   pushq %rsp\n"
> + /* Push fake return address to tell the unwinder it's a kretprobe */
> + "   pushq $kretprobe_trampoline\n"
>   UNWIND_HINT_FUNC
>   "   pushfq\n"
>   SAVE_REGS_STRING
>   "   movq %rsp, %rdi\n"
>   "   call trampoline_handler\n"
> - /* Replace saved sp with true return address. */
> + /* Replace the fake return address with the real one. */
>   "   movq %rax, 19*8(%rsp)\n"
>   RESTORE_REGS_STRING
>   "   popfq\n"
>  #else
>   "   pushl %esp\n"
> - UNWIND_HINT_FUNC
>   "   pushfl\n"
>   SAVE_REGS_STRING
>   "   movl %esp, %eax\n"
> diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
> index 1d1b9388a1b1..1d3de84d2410 100644
> --- a/arch/x86/kernel/unwind_orc.c
> +++ b/arch/x86/kernel/unwind_orc.c
> @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state)
>* In those cases, find the correct return address from
>* task->kretprobe_instances list.
>*/
> - if (state->ip == sp ||
> - is_kretprobe_trampoline(state->ip))
> + if (is_kretprobe_trampoline(state->ip))
>   state->ip = kretprobe_find_ret_addr(state->task,
>   &state->kr_iter);
>  
> 
> 


-- 
Masami Hiramatsu

Re: [mm, net-next v2] mm: net: memcg accounting for TCP rx zerocopy

2021-03-15 Thread Arjun Roy

On Mon, Mar 15, 2021 at 11:22 PM Arjun Roy  wrote:
>
> On Mon, Mar 15, 2021 at 9:29 PM Shakeel Butt  wrote:
> >
> > On Mon, Mar 15, 2021 at 9:20 PM Arjun Roy  wrote:
> > >
> > [...]
> > > >
> > >
> > > Apologies for the spam - looks like I forgot to rebase the first time
> > > I sent this out.
> > >
> > > Actually, on a related note, it's not 100% clear to me whether this
> > > patch (which in its current form, applies to net-next) should instead
> > > be based on the mm branch - but the most recent (clean) checkout of mm
> > > fails to build for me so net-next for now.
> > >
> >
> > It is due to "mm: page-writeback: simplify memcg handling in
> > test_clear_page_writeback()" patch in the mm tree. You would need to
> > reintroduce the lock_page_memcg() which returns the memcg and make
> > __unlock_page_memcg() non-static.
>
> To clarify, Shakeel - the tag "tag: v5.12-rc2-mmots-2021-03-11-21-49"
> fails to build on a clean checkout, without this patch, due to a
> compilation failure in mm/shmem.c, for reference:
> https://pastebin.com/raw/12eSGdGD
> (and that's why I'm basing this patch off of net-next in this email).
>
> -Arjun

Another seeming anomaly - the patch sent out passes
scripts/checkpatch.pl but netdev/checkpatch finds plenty of actionable
fixes here: 
https://patchwork.kernel.org/project/netdevbpf/patch/20210316041645.144249-1-arjunroy.k...@gmail.com/

Is netdev using some other automated checker instead of scripts/checkpatch.pl?

-Arjun

Re: [mm, net-next v2] mm: net: memcg accounting for TCP rx zerocopy

2021-03-15 Thread Arjun Roy

On Mon, Mar 15, 2021 at 9:29 PM Shakeel Butt  wrote:
>
> On Mon, Mar 15, 2021 at 9:20 PM Arjun Roy  wrote:
> >
> [...]
> > >
> >
> > Apologies for the spam - looks like I forgot to rebase the first time
> > I sent this out.
> >
> > Actually, on a related note, it's not 100% clear to me whether this
> > patch (which in its current form, applies to net-next) should instead
> > be based on the mm branch - but the most recent (clean) checkout of mm
> > fails to build for me so net-next for now.
> >
>
> It is due to "mm: page-writeback: simplify memcg handling in
> test_clear_page_writeback()" patch in the mm tree. You would need to
> reintroduce the lock_page_memcg() which returns the memcg and make
> __unlock_page_memcg() non-static.

To clarify, Shakeel - the tag "tag: v5.12-rc2-mmots-2021-03-11-21-49"
fails to build on a clean checkout, without this patch, due to a
compilation failure in mm/shmem.c, for reference:
https://pastebin.com/raw/12eSGdGD
(and that's why I'm basing this patch off of net-next in this email).

-Arjun

Re: [PATCH v3] bus: mhi: core: Check state before processing power_down

2021-03-15 Thread Manivannan Sadhasivam

On Wed, Mar 10, 2021 at 01:49:25PM -0700, Jeffrey Hugo wrote:
> We cannot process a power_down if the power state is DISABLED.  There is
> no valid mhi_ctxt in that case, so attepting to process the power_down
> will likely result in a null pointer dereference.  If the power state is
> DISABLED, there is nothing to do anyways, so just bail early.
> 
> Signed-off-by: Jeffrey Hugo 

Applied to mhi-next!

Thanks,
Mani

> ---
> 
> v3: Move the pm_lock use up
> v2: Fix subject and tweak the locking to avoid needless lock/unlock/relock
> 
>  drivers/bus/mhi/core/pm.c | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> index 414da4f..ea62549 100644
> --- a/drivers/bus/mhi/core/pm.c
> +++ b/drivers/bus/mhi/core/pm.c
> @@ -1149,6 +1149,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
>   mhi_deinit_dev_ctxt(mhi_cntrl);
>  
>  error_dev_ctxt:
> + mhi_cntrl->pm_state = MHI_PM_DISABLE;
>   mutex_unlock(&mhi_cntrl->pm_mutex);
>  
>   return ret;
> @@ -1160,12 +1161,19 @@ void mhi_power_down(struct mhi_controller *mhi_cntrl, 
> bool graceful)
>   enum mhi_pm_state cur_state, transition_state;
>   struct device *dev = &mhi_cntrl->mhi_dev->dev;
>  
> + mutex_lock(&mhi_cntrl->pm_mutex);
> + write_lock_irq(&mhi_cntrl->pm_lock);
> + cur_state = mhi_cntrl->pm_state;
> + if (cur_state == MHI_PM_DISABLE) {
> + write_unlock_irq(&mhi_cntrl->pm_lock);
> + mutex_unlock(&mhi_cntrl->pm_mutex);
> + return; /* Already powered down */
> + }
> +
>   /* If it's not a graceful shutdown, force MHI to linkdown state */
>   transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS :
>  MHI_PM_LD_ERR_FATAL_DETECT;
>  
> - mutex_lock(&mhi_cntrl->pm_mutex);
> - write_lock_irq(&mhi_cntrl->pm_lock);
>   cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state);
>   if (cur_state != transition_state) {
>   dev_err(dev, "Failed to move to state: %s from: %s\n",
> -- 
> Qualcomm Technologies, Inc. is a member of the
> Code Aurora Forum, a Linux Foundation Collaborative Project.
>

Re: [PATCH v3] bus: mhi: core: Check state before processing power_down

2021-03-15 Thread Manivannan Sadhasivam

On Wed, Mar 10, 2021 at 01:49:25PM -0700, Jeffrey Hugo wrote:
> We cannot process a power_down if the power state is DISABLED.  There is
> no valid mhi_ctxt in that case, so attepting to process the power_down
> will likely result in a null pointer dereference.  If the power state is
> DISABLED, there is nothing to do anyways, so just bail early.
> 
> Signed-off-by: Jeffrey Hugo 

Reviewed-by: Manivannan Sadhasivam 

Thanks,
Mani

> ---
> 
> v3: Move the pm_lock use up
> v2: Fix subject and tweak the locking to avoid needless lock/unlock/relock
> 
>  drivers/bus/mhi/core/pm.c | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> index 414da4f..ea62549 100644
> --- a/drivers/bus/mhi/core/pm.c
> +++ b/drivers/bus/mhi/core/pm.c
> @@ -1149,6 +1149,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl)
>   mhi_deinit_dev_ctxt(mhi_cntrl);
>  
>  error_dev_ctxt:
> + mhi_cntrl->pm_state = MHI_PM_DISABLE;
>   mutex_unlock(&mhi_cntrl->pm_mutex);
>  
>   return ret;
> @@ -1160,12 +1161,19 @@ void mhi_power_down(struct mhi_controller *mhi_cntrl, 
> bool graceful)
>   enum mhi_pm_state cur_state, transition_state;
>   struct device *dev = &mhi_cntrl->mhi_dev->dev;
>  
> + mutex_lock(&mhi_cntrl->pm_mutex);
> + write_lock_irq(&mhi_cntrl->pm_lock);
> + cur_state = mhi_cntrl->pm_state;
> + if (cur_state == MHI_PM_DISABLE) {
> + write_unlock_irq(&mhi_cntrl->pm_lock);
> + mutex_unlock(&mhi_cntrl->pm_mutex);
> + return; /* Already powered down */
> + }
> +
>   /* If it's not a graceful shutdown, force MHI to linkdown state */
>   transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS :
>  MHI_PM_LD_ERR_FATAL_DETECT;
>  
> - mutex_lock(&mhi_cntrl->pm_mutex);
> - write_lock_irq(&mhi_cntrl->pm_lock);
>   cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state);
>   if (cur_state != transition_state) {
>   dev_err(dev, "Failed to move to state: %s from: %s\n",
> -- 
> Qualcomm Technologies, Inc. is a member of the
> Code Aurora Forum, a Linux Foundation Collaborative Project.
>

Re: [PATCH 3/3] arm64: dts: qcom: sm8150: add i2c nodes

2021-03-15 Thread Vinod Koul

On 10-03-21, 16:31, Caleb Connolly wrote:
> Tested on the OnePlus 7 Pro (including DMA).

Lgtm:

Reviewed-by: Vinod Koul 

But missing enabling nodes in board dts ..?

> 
> Signed-off-by: Caleb Connolly 
> ---
>  arch/arm64/boot/dts/qcom/sm8150.dtsi | 521 +++
>  1 file changed, 521 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi 
> b/arch/arm64/boot/dts/qcom/sm8150.dtsi
> index 543417d74216..0a38ad54c715 100644
> --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
> @@ -588,6 +588,111 @@ qupv3_id_0: geniqup@8c {
>   #size-cells = <2>;
>   ranges;
>   status = "disabled";
> +
> + i2c0: i2c@88 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x0088 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S0_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c0_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c1: i2c@884000 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x00884000 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S1_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c1_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c2: i2c@888000 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x00888000 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S2_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c2_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c3: i2c@88c000 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x0088c000 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S3_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c3_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c4: i2c@89 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x0089 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S4_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c4_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c5: i2c@894000 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x00894000 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S5_CLK>;
> + pinctrl-names = "default";
> + pinctrl-0 = <&qup_i2c5_default>;
> + interrupts = ;
> + #address-cells = <1>;
> + #size-cells = <0>;
> + status = "disabled";
> + };
> +
> + i2c6: i2c@898000 {
> + compatible = "qcom,geni-i2c";
> + reg = <0 0x00898000 0 0x4000>;
> + clock-names = "se";
> + clocks = <&gcc GCC_QUPV3_WRAP0_S6_CLK>;
> + pinctrl-names = "default";
> +

Re: [PATCH 2/3] arm64: dts: qcom: sm8150: add iommus to qups

2021-03-15 Thread Vinod Koul

On 10-03-21, 16:31, Caleb Connolly wrote:
> Hook up the SMMU for doing DMA over i2c. Some peripherals like
> touchscreens easily exceed 32-bytes per transfer, causing errors and
> lockups without this.

Why not squash this to patch 1..?

> 
> Signed-off-by: Caleb Connolly 
> ---
> Fixes i2c on the OnePlus 7, without this touching the screen with more
> than 4 fingers causes the device to lock up and reboot.
> ---
>  arch/arm64/boot/dts/qcom/sm8150.dtsi | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi 
> b/arch/arm64/boot/dts/qcom/sm8150.dtsi
> index 03e05d98daf2..543417d74216 100644
> --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
> @@ -583,6 +583,7 @@ qupv3_id_0: geniqup@8c {
>   clock-names = "m-ahb", "s-ahb";
>   clocks = <&gcc GCC_QUPV3_WRAP_0_M_AHB_CLK>,
><&gcc GCC_QUPV3_WRAP_0_S_AHB_CLK>;
> + iommus = <&apps_smmu 0xc3 0x0>;
>   #address-cells = <2>;
>   #size-cells = <2>;
>   ranges;
> @@ -595,6 +596,7 @@ qupv3_id_1: geniqup@ac {
>   clock-names = "m-ahb", "s-ahb";
>   clocks = <&gcc GCC_QUPV3_WRAP_1_M_AHB_CLK>,
><&gcc GCC_QUPV3_WRAP_1_S_AHB_CLK>;
> + iommus = <&apps_smmu 0x603 0x0>;
>   #address-cells = <2>;
>   #size-cells = <2>;
>   ranges;
> @@ -617,6 +619,7 @@ qupv3_id_2: geniqup@cc {
>   clock-names = "m-ahb", "s-ahb";
>   clocks = <&gcc GCC_QUPV3_WRAP_2_M_AHB_CLK>,
><&gcc GCC_QUPV3_WRAP_2_S_AHB_CLK>;
> + iommus = <&apps_smmu 0x7a3 0x0>;
>   #address-cells = <2>;
>   #size-cells = <2>;
>   ranges;
> -- 
> 2.29.2
> 

-- 
~Vinod

Re: [PATCH v2 RESEND] bus: mhi: core: Wait for ready state after reset

2021-03-15 Thread Manivannan Sadhasivam

On Wed, Mar 10, 2021 at 01:41:58PM -0700, Jeffrey Hugo wrote:
> After the device has signaled the end of reset by clearing the reset bit,
> it will automatically reinit MHI and the internal device structures.  Once
> That is done, the device will signal it has entered the ready state.
> 
> Signaling the ready state involves sending an interrupt (MSI) to the host
> which might cause IOMMU faults if it occurs at the wrong time.
> 
> If the controller is being powered down, and possibly removed, then the
> reset flow would only wait for the end of reset.  At which point, the host
> and device would start a race.  The host may complete its reset work, and
> remove the interrupt handler, which would cause the interrupt to be
> disabled in the IOMMU.  If that occurs before the device signals the ready
> state, then the IOMMU will fault since it blocked an interrupt.  While
> harmless, the fault would appear like a serious issue has occurred so let's
> silence it by making sure the device hits the ready state before the host
> completes its reset processing.
> 
> Signed-off-by: Jeffrey Hugo 
> ---
>  drivers/bus/mhi/core/pm.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> index adb0e80..414da4f 100644
> --- a/drivers/bus/mhi/core/pm.c
> +++ b/drivers/bus/mhi/core/pm.c
> @@ -467,7 +467,7 @@ static void mhi_pm_disable_transition(struct 
> mhi_controller *mhi_cntrl)
>  
>   /* Trigger MHI RESET so that the device will not access host memory */
>   if (!MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
> - u32 in_reset = -1;
> + u32 in_reset = -1, ready = 0;
>   unsigned long timeout = msecs_to_jiffies(mhi_cntrl->timeout_ms);
>  
>   dev_dbg(dev, "Triggering MHI Reset in device\n");
> @@ -490,6 +490,21 @@ static void mhi_pm_disable_transition(struct 
> mhi_controller *mhi_cntrl)
>* hence re-program it
>*/
>   mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0);
> +
> + if (!MHI_IN_PBL(mhi_get_exec_env(mhi_cntrl))) {
> + /* wait for ready to be set */
> + ret = wait_event_timeout(mhi_cntrl->state_event,
> +  mhi_read_reg_field(mhi_cntrl,
> + mhi_cntrl->regs,
> + MHISTATUS,
> + MHISTATUS_READY_MASK,
> + MHISTATUS_READY_SHIFT,
> + &ready)
> +  || ready, timeout);
> + if (!ret || !ready)
> + dev_warn(dev,
> + "Device failed to enter READY state\n");

Wouldn't dev_err be more appropriate here provided that we might get IOMMU fault
anytime soon?

Thanks,
Mani

> + }
>   }
>  
>   dev_dbg(dev,
> -- 
> Qualcomm Technologies, Inc. is a member of the
> Code Aurora Forum, a Linux Foundation Collaborative Project.
>

Re: [PATCH V4 4/7] vDPA/ifcvf: remove the version number string

2021-03-15 Thread Jason Wang




在 2021/3/15 下午3:44, Zhu Lingshan 写道:

This commit removes the version number string, using kernel
version is enough.

Signed-off-by: Zhu Lingshan 
Reviewed-by: Leon Romanovsky 



Acked-by: Jason Wang 



---
  drivers/vdpa/ifcvf/ifcvf_main.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index fd5befc5cbcc..c34e1eec6b6c 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -14,7 +14,6 @@
  #include 
  #include "ifcvf_base.h"
  
-#define VERSION_STRING  "0.1"

  #define DRIVER_AUTHOR   "Intel Corporation"
  #define IFCVF_DRIVER_NAME   "ifcvf"
  
@@ -503,4 +502,3 @@ static struct pci_driver ifcvf_driver = {

  module_pci_driver(ifcvf_driver);
  
  MODULE_LICENSE("GPL v2");

-MODULE_VERSION(VERSION_STRING);

Re: [PATCH V4 3/7] vDPA/ifcvf: rename original IFCVF dev ids to N3000 ids

2021-03-15 Thread Jason Wang




在 2021/3/15 下午3:44, Zhu Lingshan 写道:

IFCVF driver probes multiple types of devices now,
to distinguish the original device driven by IFCVF
from others, it is renamed as "N3000".

Signed-off-by: Zhu Lingshan 



Acked-by: Jason Wang 

If you want to have a general driver, you probaby need to rename the driver.

Thanks



---
  drivers/vdpa/ifcvf/ifcvf_base.h | 8 
  drivers/vdpa/ifcvf/ifcvf_main.c | 8 
  2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 75d9a8052039..794d1505d857 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -18,10 +18,10 @@
  #include 
  #include 
  
-#define IFCVF_VENDOR_ID		0x1AF4

-#define IFCVF_DEVICE_ID0x1041
-#define IFCVF_SUBSYS_VENDOR_ID 0x8086
-#define IFCVF_SUBSYS_DEVICE_ID 0x001A
+#define N3000_VENDOR_ID0x1AF4
+#define N3000_DEVICE_ID0x1041
+#define N3000_SUBSYS_VENDOR_ID 0x8086
+#define N3000_SUBSYS_DEVICE_ID 0x001A
  
  #define C5000X_PL_VENDOR_ID		0x1AF4

  #define C5000X_PL_DEVICE_ID   0x1000
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 26a2dab7ca66..fd5befc5cbcc 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -480,10 +480,10 @@ static void ifcvf_remove(struct pci_dev *pdev)
  }
  
  static struct pci_device_id ifcvf_pci_ids[] = {

-   { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
-   IFCVF_DEVICE_ID,
-   IFCVF_SUBSYS_VENDOR_ID,
-   IFCVF_SUBSYS_DEVICE_ID) },
+   { PCI_DEVICE_SUB(N3000_VENDOR_ID,
+N3000_DEVICE_ID,
+N3000_SUBSYS_VENDOR_ID,
+N3000_SUBSYS_DEVICE_ID) },
{ PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
 C5000X_PL_DEVICE_ID,
 C5000X_PL_SUBSYS_VENDOR_ID,

Re: [PATCH] pinctrl: add lock in mtk_rmw function.

2021-03-15 Thread zhiyong tao

On Tue, 2021-03-16 at 13:05 +0800, Sean Wang wrote:
> Hi Zhiyong,
> 
> On Fri, Mar 12, 2021 at 2:35 PM Zhiyong Tao  wrote:
> >
> > When multiple threads operate on the same register resource
> > which include multiple pin, It will make the register resource
> > wrong to control. So we add lock to avoid the case.
> >
> > Signed-off-by: Zhiyong Tao 
> > ---
> >  drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 4 
> >  drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h | 2 ++
> >  drivers/pinctrl/mediatek/pinctrl-paris.c | 2 ++
> >  3 files changed, 8 insertions(+)
> >
> > diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c 
> > b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
> > index 72f17f26acd8..fcf7c34a 100644
> > --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
> > +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
> > @@ -58,10 +58,14 @@ void mtk_rmw(struct mtk_pinctrl *pctl, u8 i, u32 reg, 
> > u32 mask, u32 set)
> >  {
> > u32 val;
> >
> > +   mutex_lock(&pctl->lock);
> > +
> > val = mtk_r32(pctl, i, reg);
> > val &= ~mask;
> > val |= set;
> > mtk_w32(pctl, i, reg, val);
> > +
> > +   mutex_unlock(&pctl->lock);
> >  }
> >
> >  static int mtk_hw_pin_field_lookup(struct mtk_pinctrl *hw,
> > diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h 
> > b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h
> > index e2aae285b5fc..65eac708a3b3 100644
> > --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h
> > +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h
> > @@ -251,6 +251,8 @@ struct mtk_pinctrl {
> > struct mtk_eint *eint;
> > struct mtk_pinctrl_group*groups;
> > const char  **grp_names;
> > +   /* lock pin's register resource to avoid multiple threads issue*/
> > +   struct mutex lock;
> >  };
> >
> >  void mtk_rmw(struct mtk_pinctrl *pctl, u8 i, u32 reg, u32 mask, u32 set);
> > diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c 
> > b/drivers/pinctrl/mediatek/pinctrl-paris.c
> > index da1f19288aa6..48e823f6d293 100644
> > --- a/drivers/pinctrl/mediatek/pinctrl-paris.c
> > +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c
> > @@ -970,6 +970,8 @@ int mtk_paris_pinctrl_probe(struct platform_device 
> > *pdev,
> >
> > hw->nbase = hw->soc->nbase_names;
> >
> > +   mutex_init(&hw->lock);
> > +
> 
> Could you help add the mutex initialization into pinctrl-moore.c too?
> and then the patch would look good to me.

==>Hi sean,
We will add add the mutex initialization into pinctrl-moore.c too in v2.

Thanks.
> 
> > err = mtk_pctrl_build_state(pdev);
> > if (err) {
> > dev_err(&pdev->dev, "build state failed: %d\n", err);
> > --
> > 2.18.0
> >

Re: [PATCH V4 2/7] vDPA/ifcvf: enable Intel C5000X-PL virtio-net for vDPA

2021-03-15 Thread Jason Wang




在 2021/3/15 下午3:44, Zhu Lingshan 写道:

This commit enabled Intel FPGA SmartNIC C5000X-PL virtio-net
for vDPA

Signed-off-by: Zhu Lingshan 



Acked-by: Jason Wang 



---
  drivers/vdpa/ifcvf/ifcvf_base.h | 5 +
  drivers/vdpa/ifcvf/ifcvf_main.c | 5 +
  2 files changed, 10 insertions(+)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 64696d63fe07..75d9a8052039 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -23,6 +23,11 @@
  #define IFCVF_SUBSYS_VENDOR_ID0x8086
  #define IFCVF_SUBSYS_DEVICE_ID0x001A
  
+#define C5000X_PL_VENDOR_ID		0x1AF4

+#define C5000X_PL_DEVICE_ID0x1000
+#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
+
  #define IFCVF_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC)   | \
 (1ULL << VIRTIO_F_ANY_LAYOUT)| \
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index e501ee07de17..26a2dab7ca66 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -484,6 +484,11 @@ static struct pci_device_id ifcvf_pci_ids[] = {
IFCVF_DEVICE_ID,
IFCVF_SUBSYS_VENDOR_ID,
IFCVF_SUBSYS_DEVICE_ID) },
+   { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
+C5000X_PL_DEVICE_ID,
+C5000X_PL_SUBSYS_VENDOR_ID,
+C5000X_PL_SUBSYS_DEVICE_ID) },
+
{ 0 },
  };
  MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);

Re: [PATCH V4 1/7] vDPA/ifcvf: get_vendor_id returns a device specific vendor id

2021-03-15 Thread Jason Wang




在 2021/3/15 下午3:44, Zhu Lingshan 写道:

In this commit, ifcvf_get_vendor_id() will return
a device specific vendor id of the probed pci device
than a hard code.

Signed-off-by: Zhu Lingshan 



Acked-by: Jason Wang 



---
  drivers/vdpa/ifcvf/ifcvf_main.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index fa1af301cf55..e501ee07de17 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -324,7 +324,10 @@ static u32 ifcvf_vdpa_get_device_id(struct vdpa_device 
*vdpa_dev)
  
  static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)

  {
-   return IFCVF_SUBSYS_VENDOR_ID;
+   struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+   struct pci_dev *pdev = adapter->pdev;
+
+   return pdev->subsystem_vendor;
  }
  
  static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)

RE: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json schema

2021-03-15 Thread Kuldeep Singh

Hi Vladimir,

> -Original Message-
> From: Vladimir Oltean 
> Sent: Tuesday, March 16, 2021 2:25 AM
> To: Kuldeep Singh 
> Cc: linux-...@vger.kernel.org; devicet...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Mark Brown ; Rob Herring
> 
> Subject: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json 
> schema
> 
> Caution: EXT Email
> 
> Hi Kuldeep,
> 
> On Mon, Mar 15, 2021 at 05:45:18PM +0530, Kuldeep Singh wrote:
> > Convert the Freescale DSPI binding to DT schema format using json-schema.
> >
> > Signed-off-by: Kuldeep Singh 
> > ---
[...]
> > +
> > +allOf:
> > +  - $ref: "spi-controller.yaml#"
> > +
> > +properties:
> > +  compatible:
> > +oneOf:
> > +  - enum:
> > +  - fsl,vf610-dspi
> > +  - fsl,ls1021a-v1.0-dspi
> > +  - fsl,ls1028a-dspi
> > +  - fsl,ls2085a-dspi
> > +  - fsl,lx2160a-dspi
> > +  - items:
> > +  - enum:
> > +  - fsl,ls1012a-dspi
> > +  - fsl,ls1028a-dspi
> > +  - fsl,ls1043a-dspi
> > +  - fsl,ls1046a-dspi
> > +  - fsl,ls1088a-dspi
> > +  - const: fsl,ls1021a-v1.0-dspi
> > +  - items:
> > +  - enum:
> > +  - fsl,ls2080a-dspi
> > +  - fsl,lx2160a-dspi
> > +  - const: fsl,ls2085a-dspi
> 
> Can this simply be:
>   compatible:
> oneOf:
>   - enum:
>   - fsl,vf610-dspi
>   - fsl,ls1021a-v1.0-dspi
>   - fsl,ls1012a-dspi
>   - fsl,ls1028a-dspi
>   - fsl,ls1043a-dspi
>   - fsl,ls1046a-dspi
>   - fsl,ls1088a-dspi
>   - fsl,ls2080a-dspi
>   - fsl,ls2085a-dspi
>   - fsl,lx2160a-dspi
> ?

Compatible entries in conjugation require enum and const pair.
For example, ls1012a.dtsi uses compatible = 
"fsl,ls1012a-dspi","fsl,ls1021a-v1.0-dspi";
Same goes for LS1028 as well.

Therefore, can't mention the compatible entry as single entity otherwise
it may fail "make dt_binding_check" and "make dtbs_check".

> 
> > +examples:
> > +  - |
> > +#include 
> > +#include 
> > +
> > +soc {
> > +#address-cells = <2>;
> > +#size-cells = <2>;
> > +
> > +spi@210 {
> > +compatible = "fsl,ls1028a-dspi", "fsl,ls1021a-v1.0-dspi";
> 
> This doesn't need the "fsl,ls1021a-v1.0-dspi" compatible, can you please 
> remove
> it?

I have taken this example from LS1028a.dtsi and it uses these compatibles in 
conjugation.
If "fsl,ls1021a-v1.0-dspi" is not required, then it should also be removed from 
device-tree
As well as from bindings both.

> 
> > +#address-cells = <1>;
> > +#size-cells = <0>;
> > +reg = <0x0 0x210 0x0 0x1>;
> > +interrupts = ;
> > +clock-names = "dspi";
> > +clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
> QORIQ_CLK_PLL_DIV(2)>;
> > +dmas = <&edma0 0 62>, <&edma0 0 60>;
> > +dma-names = "tx", "rx";
> > +spi-num-chipselects = <4>;
> > +little-endian;
> > +
> > +flash@0 {
> > +compatible = "jedec,spi-nor";
> > +spi-max-frequency = <1000>;
> > +reg = <0>;
> > +};
> > +};
> > +};
> 
> (...)
> 
> > -Optional property:
> > -- big-endian: If present the dspi device's registers are implemented
> > -  in big endian mode.
> 
> I don't see "big-endian" being covered in any common yaml, could you please 
> not
> delete it? The driver calls of_device_is_big_endian.

Thanks for mentioning.
Will consider this in next version after receiving feedback on other thread.

Regards
Kuldeep

[tip:master] BUILD SUCCESS 4aabf1d2f23189cabd453e10473ed5d6cce6bf5f

2021-03-15 Thread kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master
branch HEAD: 4aabf1d2f23189cabd453e10473ed5d6cce6bf5f  Merge 'x86/cpu' into 
master

elapsed time: 724m

configs tested: 134
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
x86_64   allyesconfig
riscvallmodconfig
h8300 edosk2674_defconfig
powerpcamigaone_defconfig
powerpc linkstation_defconfig
m68km5407c3_defconfig
arc nsimosci_hs_smp_defconfig
openrisc simple_smp_defconfig
arm   mainstone_defconfig
m68kmvme16x_defconfig
arm mv78xx0_defconfig
mips   rbtx49xx_defconfig
h8300h8300h-sim_defconfig
h8300   defconfig
armrealview_defconfig
s390 alldefconfig
powerpc stx_gp3_defconfig
mipsmalta_kvm_guest_defconfig
sh ap325rxa_defconfig
mips cu1000-neo_defconfig
mips   xway_defconfig
arm   versatile_defconfig
arm lpc32xx_defconfig
shshmin_defconfig
mips   ip32_defconfig
parisc   alldefconfig
ia64defconfig
sh   se7712_defconfig
arm lubbock_defconfig
x86_64  defconfig
sh   rts7751r2dplus_defconfig
powerpc  tqm8xx_defconfig
xtensa virt_defconfig
sh   se7722_defconfig
arm eseries_pxa_defconfig
m68k   bvme6000_defconfig
powerpc rainier_defconfig
pariscgeneric-32bit_defconfig
armmini2440_defconfig
i386defconfig
powerpc mpc5200_defconfig
mips  fuloong2e_defconfig
pariscgeneric-64bit_defconfig
armhisi_defconfig
powerpc mpc836x_mds_defconfig
mips rt305x_defconfig
arc haps_hs_smp_defconfig
powerpc mpc512x_defconfig
ia64 alldefconfig
cskydefconfig
powerpc   mpc834x_itxgp_defconfig
powerpc  cm5200_defconfig
armxcep_defconfig
sh  sh7785lcr_32bit_defconfig
powerpc   motionpro_defconfig
arm   h3600_defconfig
powerpc mpc8313_rdb_defconfig
powerpc  g5_defconfig
arm orion5x_defconfig
powerpc mpc837x_rdb_defconfig
powerpc  arches_defconfig
arm  pcm027_defconfig
xtensa   alldefconfig
mips  ath79_defconfig
mips   lemote2f_defconfig
ia64 allmodconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nds32   defconfig
nios2allyesconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a006-20210

[tip:x86/cpu] BUILD SUCCESS WITH WARNING 301cddc21a157a3072d789a3097857202e550a24

2021-03-15 Thread kernel test robot

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/cpu
branch HEAD: 301cddc21a157a3072d789a3097857202e550a24  objtool/x86: Use 
asm/nops.h

Warning reports:

https://lore.kernel.org/lkml/202103160701.3uxlwiwm-...@intel.com

Warning in current branch:

arch/x86/kernel/alternative.c:96:10: warning: Undefined behaviour, pointer 
arithmetic 'x86nops+10' is out of bounds. [pointerOutOfBounds]
arch/x86/kernel/ftrace.c:304:7: warning: union member 
'ftrace_op_code_union::code' is never used. [unusedStructMember]

Warning ids grouped by kconfigs:

gcc_recent_errors
`-- i386-randconfig-p002-20210315
|-- 
arch-x86-kernel-alternative.c:warning:Undefined-behaviour-pointer-arithmetic-x86nops-is-out-of-bounds.-pointerOutOfBounds
`-- 
arch-x86-kernel-ftrace.c:warning:union-member-ftrace_op_code_union::code-is-never-used.-unusedStructMember

elapsed time: 723m

configs tested: 135
configs skipped: 2

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
x86_64   allyesconfig
riscvallmodconfig
mips  malta_kvm_defconfig
arm  exynos_defconfig
ia64 bigsur_defconfig
armdove_defconfig
arm   mainstone_defconfig
m68kmvme16x_defconfig
arm mv78xx0_defconfig
mips   rbtx49xx_defconfig
h8300h8300h-sim_defconfig
h8300   defconfig
armzeus_defconfig
powerpcsocrates_defconfig
arc   tb10x_defconfig
powerpc  chrp32_defconfig
openrisc alldefconfig
arm am200epdkit_defconfig
arm palmz72_defconfig
arm   versatile_defconfig
openrisc simple_smp_defconfig
armrealview_defconfig
s390 alldefconfig
powerpc stx_gp3_defconfig
mipsmalta_kvm_guest_defconfig
sh ap325rxa_defconfig
mips cu1000-neo_defconfig
mips   xway_defconfig
arm lpc32xx_defconfig
shshmin_defconfig
mips   ip32_defconfig
parisc   alldefconfig
ia64defconfig
sh   se7712_defconfig
arm lubbock_defconfig
x86_64  defconfig
sh   rts7751r2dplus_defconfig
powerpc  tqm8xx_defconfig
xtensa virt_defconfig
sh   se7722_defconfig
arm eseries_pxa_defconfig
m68k   bvme6000_defconfig
powerpc rainier_defconfig
pariscgeneric-32bit_defconfig
armmini2440_defconfig
i386defconfig
mips rt305x_defconfig
arc haps_hs_smp_defconfig
powerpc mpc512x_defconfig
ia64 alldefconfig
cskydefconfig
powerpc   mpc834x_itxgp_defconfig
powerpc  cm5200_defconfig
armxcep_defconfig
sh  sh7785lcr_32bit_defconfig
powerpc   motionpro_defconfig
arm   h3600_defconfig
powerpc mpc8313_rdb_defconfig
powerpc  g5_defconfig
arm orion5x_defconfig
powerpc mpc837x_rdb_defconfig
powerpc  arches_defconfig
arm  pcm027_defconfig
xtensa   alldefconfig
mips  ath79_defconfig
mips   lemote2f_defconfig
ia64 allmodconfig
ia64 allyesconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
arc  allyesconfig
nds32 allnoconfig
nds32   defconfig
nios2allyesconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh

Re: [PATCH v3 RESEND] bus: mhi: core: Return EAGAIN if MHI ring is full

2021-03-15 Thread Manivannan Sadhasivam

On Wed, Mar 10, 2021 at 01:40:03PM -0700, Jeffrey Hugo wrote:
> From: Fan Wu 
> 
> Currently ENOMEM is returned when MHI ring is full. This error code is
> very misleading. Change to EAGAIN instead.
> 
> Signed-off-by: Fan Wu 
> Signed-off-by: Jeffrey Hugo 
> Reviewed-by: Hemant Kumar 
> Reviewed-by: Manivannan Sadhasivam 

Applied to mhi-next!

Thanks,
Mani

> ---
>  drivers/bus/mhi/core/main.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c
> index 2c61dfd..a7811fb 100644
> --- a/drivers/bus/mhi/core/main.c
> +++ b/drivers/bus/mhi/core/main.c
> @@ -1020,7 +1020,7 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct 
> mhi_buf_info *buf_info,
>  
>   ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
>   if (unlikely(ret)) {
> - ret = -ENOMEM;
> + ret = -EAGAIN;
>   goto exit_unlock;
>   }
>  
> -- 
> Qualcomm Technologies, Inc. is a member of the
> Code Aurora Forum, a Linux Foundation Collaborative Project.
>

drivers/md/raid5.c:2539 resize_stripes() warn: inconsistent indenting

2021-03-15 Thread kernel test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   1a4431a5db2bf800c647ee0ed87f2727b8d6c29c
commit: f16acaf328c5615fdaea74f9bd0b4019544532d6 md/raid5: resize stripe_head 
when reshape array
date:   6 months ago
config: openrisc-randconfig-m031-20210316 (attached as .config)
compiler: or1k-linux-gcc (GCC) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

New smatch warnings:
drivers/md/raid5.c:2539 resize_stripes() warn: inconsistent indenting

Old smatch warnings:
drivers/md/raid5.c:2840 raid5_end_write_request() error: uninitialized symbol 
'rdev'.
drivers/md/raid5.c:2845 raid5_end_write_request() error: uninitialized symbol 
'rdev'.
drivers/md/raid5.c:6950 alloc_thread_groups() warn: double check that we're 
allocating correct size: 108 vs 1

vim +2539 drivers/md/raid5.c

  2459  
  2460  static int resize_stripes(struct r5conf *conf, int newsize)
  2461  {
  2462  /* Make all the stripes able to hold 'newsize' devices.
  2463   * New slots in each stripe get 'page' set to a new page.
  2464   *
  2465   * This happens in stages:
  2466   * 1/ create a new kmem_cache and allocate the required number 
of
  2467   *stripe_heads.
  2468   * 2/ gather all the old stripe_heads and transfer the pages 
across
  2469   *to the new stripe_heads.  This will have the side effect 
of
  2470   *freezing the array as once all stripe_heads have been 
collected,
  2471   *no IO will be possible.  Old stripe heads are freed once 
their
  2472   *pages have been transferred over, and the old kmem_cache 
is
  2473   *freed when all stripes are done.
  2474   * 3/ reallocate conf->disks to be suitable bigger.  If this 
fails,
  2475   *we simple return a failure status - no need to clean 
anything up.
  2476   * 4/ allocate new pages for the new slots in the new 
stripe_heads.
  2477   *If this fails, we don't bother trying the shrink the
  2478   *stripe_heads down again, we just leave them as they are.
  2479   *As each stripe_head is processed the new one is released 
into
  2480   *active service.
  2481   *
  2482   * Once step2 is started, we cannot afford to wait for a write,
  2483   * so we use GFP_NOIO allocations.
  2484   */
  2485  struct stripe_head *osh, *nsh;
  2486  LIST_HEAD(newstripes);
  2487  struct disk_info *ndisks;
  2488  int err = 0;
  2489  struct kmem_cache *sc;
  2490  int i;
  2491  int hash, cnt;
  2492  
  2493  md_allow_write(conf->mddev);
  2494  
  2495  /* Step 1 */
  2496  sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
  2497 sizeof(struct 
stripe_head)+(newsize-1)*sizeof(struct r5dev),
  2498 0, 0, NULL);
  2499  if (!sc)
  2500  return -ENOMEM;
  2501  
  2502  /* Need to ensure auto-resizing doesn't interfere */
  2503  mutex_lock(&conf->cache_size_mutex);
  2504  
  2505  for (i = conf->max_nr_stripes; i; i--) {
  2506  nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
  2507  if (!nsh)
  2508  break;
  2509  
  2510  list_add(&nsh->lru, &newstripes);
  2511  }
  2512  if (i) {
  2513  /* didn't get enough, give up */
  2514  while (!list_empty(&newstripes)) {
  2515  nsh = list_entry(newstripes.next, struct 
stripe_head, lru);
  2516  list_del(&nsh->lru);
  2517  free_stripe(sc, nsh);
  2518  }
  2519  kmem_cache_destroy(sc);
  2520  mutex_unlock(&conf->cache_size_mutex);
  2521  return -ENOMEM;
  2522  }
  2523  /* Step 2 - Must use GFP_NOIO now.
  2524   * OK, we have enough stripes, start collecting inactive
  2525   * stripes and copying them over
  2526   */
  2527  hash = 0;
  2528  cnt = 0;
  2529  list_for_each_entry(nsh, &newstripes, lru) {
  2530  lock_device_hash_lock(conf, hash);
  2531  wait_event_cmd(conf->wait_for_stripe,
  2532  !list_empty(conf->inactive_list + 
hash),
  2533  unlock_device_hash_lock(conf, hash),
  2534  lock_device_hash_lock(conf, hash));
  2535  osh = get_free_stripe(conf, hash);
  2536  unlock_device_hash_lock(conf, hash);
  2537  
  2538  #if PAGE_SIZE != DEFAULT_STRIPE_SIZE
> 2539  for (i = 0; i < osh->nr_pages; i++) {
  2540

Re: [PATCH 0/2] x86: Remove ideal_nops[]

2021-03-15 Thread Sedat Dilek

On Mon, Mar 15, 2021 at 11:14 PM Peter Zijlstra  wrote:
>
> On Mon, Mar 15, 2021 at 07:23:29PM +0100, Sedat Dilek wrote:
>
> > You mean something like that ^^?
> >
> > - Sedat -
> >
> > [1] 
> > https://git.zx2c4.com/laptop-kernel/commit/?id=116badbe0a18bc36ba90acb8b80cff41f9ab0686
>
> *shudder*, I was more thinking you'd simply add it to you CFLAGS when
> building. I don't see any point in having that in Kconfig.

Simply adding the CFLAGS to arch/x86/Makefile.

If I forgot to mention:

   Tested-by: Sedat Dilek . # LLVM/Clang v12.0.0-rc3

- Sedat -

Re: [PATCH v2 3/3] iio: adc: add ADC driver for the TI TSC2046 controller

2021-03-15 Thread Oleksij Rempel

Hi,

On Sat, Mar 13, 2021 at 04:11:19PM +, Jonathan Cameron wrote:
> On Fri, 12 Mar 2021 11:55:15 +0100
> Oleksij Rempel  wrote:
> 
> > Basically the TI TSC2046 touchscreen controller is 8 channel ADC optimized 
> > for
> > the touchscreen use case. By implementing it as IIO ADC device, we can
> > make use of resistive-adc-touch and iio-hwmon drivers.
> > 
> > So far, this driver was tested with custom version of resistive-adc-touch 
> > driver,
> > since it need to be extended to make use of Z1 and Z2 channels. The X/Y
> > are working without additional changes.
> > 
> > Signed-off-by: Oleksij Rempel 
> 
> So the flow in here is still rather non obvious and touchscreen specific.
> Please add some documentation on what the trigger actually is. It seems
> to be some combination of an hrtimer and a interrupt driven trigger.
> I 'think' the idea is to ensure you get one 'no touch' measurement once
> touch is removed?


Yes, this IRQ line is just a voltage level converter integrated in to
controller. As soon voltage level reaches some threshold the IRQ level
is changed. It means, the IRQ is inactive as soon as nothing is pressing
on the screen, or the channel is changed. Since we need more sample and not so
frequent, I made this construction of rate limited IRQ with extra triggering on
the end.

Potentially we can use hrtimer trigger, especially if the IRQ line
is not attached or can't be used for some reason. But for most use cases
this trigger will provide better power efficiency.

I can imagine, that in case of field board diagnostic, we may need some extra
functionality. So the workflow will look like:
- unbind the touchscreen driver
- disable oversampling
- disable settling time
- attach sysfs trigger
- enable one of channels
- start grabbing data over IIO char dev

But this functionality will need some more work and currently has lowest
prio.

> I guess we don't need the performance but I was a bit surprised that I didn't
> see this doing overlapping of the previous read out with the config for the
> next sample.

I can't follow here. Can you please explain.

> Anyhow, overall this looks pretty good to me.  Add that a bit of documentation
> for the trigger to the comments at the top of the file and tidy up the last
> few things inline.

ok

> It's a bit unusual as ADC drivers go, but not so strange that it worries
> me that much.

:)

> Thanks,
> 
> Jonathan
> 
> > ---
> >  MAINTAINERS  |   8 +
> >  drivers/iio/adc/Kconfig  |  12 +
> >  drivers/iio/adc/Makefile |   1 +
> >  drivers/iio/adc/ti-tsc2046.c | 713 +++
> >  4 files changed, 734 insertions(+)
> >  create mode 100644 drivers/iio/adc/ti-tsc2046.c
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 3fea1a934b32..2d33c6442a55 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -17852,6 +17852,14 @@ S: Supported
> >  F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt
> >  F: drivers/nfc/trf7970a.c
> >  
> > +TI TSC2046 ADC DRIVER
> > +M: Oleksij Rempel 
> > +R: ker...@pengutronix.de
> > +L: linux-...@vger.kernel.org
> > +S: Maintained
> > +F: Documentation/devicetree/bindings/iio/adc/ti,tsc2046.yaml
> > +F: drivers/iio/adc/ti-tsc2046.c
> > +
> >  TI TWL4030 SERIES SOC CODEC DRIVER
> >  M: Peter Ujfalusi 
> >  L: alsa-de...@alsa-project.org (moderated for non-subscribers)
> > diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
> > index 15587a1bc80d..6ad6f04dfd20 100644
> > --- a/drivers/iio/adc/Kconfig
> > +++ b/drivers/iio/adc/Kconfig
> > @@ -1175,6 +1175,18 @@ config TI_TLC4541
> >   This driver can also be built as a module. If so, the module will be
> >   called ti-tlc4541.
> >  
> > +config TI_TSC2046
> > +   tristate "Texas Instruments TSC2046 ADC driver"
> > +   depends on SPI
> > +   select IIO_BUFFER
> > +   select IIO_TRIGGERED_BUFFER
> > +   help
> > + Say yes here to build support for ADC functionality of Texas
> > + Instruments TSC2046 touch screen controller.
> > +
> > + This driver can also be built as a module. If so, the module will be
> > + called ti-tsc2046.
> > +
> >  config TWL4030_MADC
> > tristate "TWL4030 MADC (Monitoring A/D Converter)"
> > depends on TWL4030_CORE
> > diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
> > index 5fca90ada0ec..440e18ac6780 100644
> > --- a/drivers/iio/adc/Makefile
> > +++ b/drivers/iio/adc/Makefile
> > @@ -105,6 +105,7 @@ obj-$(CONFIG_TI_ADS8688) += ti-ads8688.o
> >  obj-$(CONFIG_TI_ADS124S08) += ti-ads124s08.o
> >  obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o
> >  obj-$(CONFIG_TI_TLC4541) += ti-tlc4541.o
> > +obj-$(CONFIG_TI_TSC2046) += ti-tsc2046.o
> >  obj-$(CONFIG_TWL4030_MADC) += twl4030-madc.o
> >  obj-$(CONFIG_TWL6030_GPADC) += twl6030-gpadc.o
> >  obj-$(CONFIG_VF610_ADC) += vf610_adc.o
> > diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c
> > new file mode 100644
> > index ..7c3ae9181164
> > --- /dev/null
> > +++ b/d

RE: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json schema

2021-03-15 Thread Kuldeep Singh

> -Original Message-
> From: Pratyush Yadav 
> Sent: Tuesday, March 16, 2021 12:01 AM
> To: Kuldeep Singh 
> Cc: linux-...@vger.kernel.org; devicet...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Mark Brown ; Rob Herring
> ; Vladimir Oltean ; linux-
> m...@lists.infradead.org
> Subject: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json 
> schema
> 
> Caution: EXT Email
> 
> +Cc mtd list
> 
> Hi,
> 
> On 15/03/21 05:45PM, Kuldeep Singh wrote:
> > Convert the Freescale DSPI binding to DT schema format using json-schema.
> >
> > Signed-off-by: Kuldeep Singh 
> > ---
> > Hi Rob,
> > This patch is checked with following commands with no warnings observed.
> > make distclean; make allmodconfig;
> > make dt_binding_check
> > DT_SCHEMA_FILES=Documentation/devicetree/bindings/spi/fsl,spi-fsl-dspi
> > .yaml; make dtbs_check
> > DT_SCHEMA_FILES=Documentation/devicetree/bindings/spi/fsl,spi-fsl-dspi
> > .yaml
> 
> When I add the "fsl,spi-cs-sck-delay" property under the flash@0 node in the
> example and run dt_binding_check, I see the below error:
> 
>   /home/pratyush/src/linux/Documentation/devicetree/bindings/spi/fsl,spi-fsl-
> dspi.example.dt.yaml: flash@0: 'fsl,spi-cs-sck-delay' does not match any of 
> the
> regexes: '^partition@', 'pinctrl-[0-9]+'
>  From schema:
> /home/pratyush/src/lin/Documentation/devicetree/bindings/mtd/jedec,spi-
> nor.yaml

Hi Pratyush,

Thanks for mentioning, I just noticed the same error after adding 
fsl,spi-cs-sck-delay property.
Since my example is not using the property, the error went unnoticed.

Taking example of nvidia qspi bindings i.e 
https://lore.kernel.org/linux-devicetree/1608585459-17250-3-git-send-email-skomatin...@nvidia.com/

I constructed other properties in similar fashion and later noticed that
example in nvidia bindings uses compatibes as "spi-nor" instead of 
"jedec,spi-nor"
and therefore passes "make dt_binding_check".

> I am trying to solve a similar problem for the Cadence QSPI controller 
> binding and
> I wonder what the best solution for this is. The obvious one would be to add
> these properties to jedec,spi-nor.yaml. I haven't managed to come up with any
> other solution to this problem.

I agree with the solution to add properties in jedec,spi-nor.yaml and adding 
properties
particular to specific controllers for flashes in generic jedec,spi-nor.yaml 
may not be
a good solution though. Please let me know your views.

Other approach is to add these properties in same binding itself (if possible) 
so as 
to limit the scope of these properties. Looking forward for more suggestions.

Regards
Kuldeep

Re: [PATCH 6/6] clk: actions: Add NIC and ETHERNET clock support for Actions S500 SoC

2021-03-15 Thread Manivannan Sadhasivam

On Mon, Mar 08, 2021 at 07:18:31PM +0200, Cristian Ciocaltea wrote:
> Add support for the missing NIC and ETHERNET clocks in the Actions Semi
> Owl S500 SoC clock driver.
> 
> Additionally, change APB clock parent from AHB to the newly added NIC.
> 
> Signed-off-by: Cristian Ciocaltea 
> ---
>  drivers/clk/actions/owl-s500.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c
> index b9e434173b4f..0ccc9619b302 100644
> --- a/drivers/clk/actions/owl-s500.c
> +++ b/drivers/clk/actions/owl-s500.c
> @@ -112,6 +112,7 @@ static const char * const bisp_clk_mux_p[] = { 
> "display_pll_clk", "dev_clk" };
>  static const char * const sensor_clk_mux_p[] = { "hosc", "bisp_clk" };
>  static const char * const sd_clk_mux_p[] = { "dev_clk", "nand_pll_clk" };
>  static const char * const pwm_clk_mux_p[] = { "losc", "hosc" };
> +static const char * const nic_clk_mux_p[] = { "dev_clk", "display_pll_clk", 
> "nand_pll_clk", "ddr_pll_clk" };

As per the reg field order, this should come after "ahbprediv_clk_mux_p"

Rest looks good.

Thanks,
Mani

>  static const char * const ahbprediv_clk_mux_p[] = { "dev_clk", 
> "display_pll_clk", "nand_pll_clk", "ddr_pll_clk" };
>  static const char * const uart_clk_mux_p[] = { "hosc", "dev_pll_clk" };
>  static const char * const de_clk_mux_p[] = { "display_pll_clk", "dev_clk" };
> @@ -197,7 +198,7 @@ static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", 
> CMU_DEVCLKEN1, 3, 0, 0);
>  
>  /* divider clocks */
>  static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 2, 2, 
> h_div_table, 0, 0);
> -static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, 
> 0, 0);
> +static OWL_DIVIDER(apb_clk, "apb_clk", "nic_clk", CMU_BUSCLK1, 14, 2, NULL, 
> 0, 0);
>  static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", 
> CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0);
>  
>  /* factor clocks */
> @@ -205,6 +206,12 @@ static OWL_FACTOR(de1_clk, "de_clk1", "de_clk", 
> CMU_DECLK, 0, 4, de_factor_table
>  static OWL_FACTOR(de2_clk, "de_clk2", "de_clk", CMU_DECLK, 4, 4, 
> de_factor_table, 0, 0);
>  
>  /* composite clocks */
> +static OWL_COMP_DIV(nic_clk, "nic_clk", nic_clk_mux_p,
> + OWL_MUX_HW(CMU_BUSCLK1, 4, 3),
> + { 0 },
> + OWL_DIVIDER_HW(CMU_BUSCLK1, 16, 2, 0, NULL),
> + 0);
> +
>  static OWL_COMP_DIV(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p,
>   OWL_MUX_HW(CMU_BUSCLK1, 8, 3),
>   { 0 },
> @@ -320,6 +327,10 @@ static OWL_COMP_FIXED_FACTOR(i2c3_clk, "i2c3_clk", 
> "ethernet_pll_clk",
>   OWL_GATE_HW(CMU_DEVCLKEN1, 31, 0),
>   1, 5, 0);
>  
> +static OWL_COMP_FIXED_FACTOR(ethernet_clk, "ethernet_clk", 
> "ethernet_pll_clk",
> + OWL_GATE_HW(CMU_DEVCLKEN1, 22, 0),
> + 1, 20, 0);
> +
>  static OWL_COMP_DIV(uart0_clk, "uart0_clk", uart_clk_mux_p,
>   OWL_MUX_HW(CMU_UART0CLK, 16, 1),
>   OWL_GATE_HW(CMU_DEVCLKEN1, 6, 0),
> @@ -454,6 +465,8 @@ static struct owl_clk_common *s500_clks[] = {
>   &apb_clk.common,
>   &dmac_clk.common,
>   &gpio_clk.common,
> + &nic_clk.common,
> + ðernet_clk.common,
>  };
>  
>  static struct clk_hw_onecell_data s500_hw_clks = {
> @@ -513,6 +526,8 @@ static struct clk_hw_onecell_data s500_hw_clks = {
>   [CLK_APB]   = &apb_clk.common.hw,
>   [CLK_DMAC]  = &dmac_clk.common.hw,
>   [CLK_GPIO]  = &gpio_clk.common.hw,
> + [CLK_NIC]   = &nic_clk.common.hw,
> + [CLK_ETHERNET]  = ðernet_clk.common.hw,
>   },
>   .num = CLK_NR_CLKS,
>  };
> -- 
> 2.30.1
>

Re: [PATCH 5.10 113/290] net: dsa: implement a central TX reallocation procedure

2021-03-15 Thread gre...@linuxfoundation.org

On Mon, Mar 15, 2021 at 05:06:16PM -0400, Sasha Levin wrote:
> On Mon, Mar 15, 2021 at 07:56:02PM +, Vladimir Oltean wrote:
> > > Signed-off-by: Vladimir Oltean 
> > > Tested-by: Christian Eggers  # For tail taggers only
> > > Tested-by: Kurt Kanzenbach 
> > > Reviewed-by: Florian Fainelli 
> > > Signed-off-by: Jakub Kicinski 
> > > Signed-off-by: Sasha Levin 
> > > ---
> > 
> > For context, Sasha explains here:
> > https://www.spinics.net/lists/stable-commits/msg190151.html
> > (the conversation is somewhat truncated, unfortunately, because
> > stable-comm...@vger.kernel.org ate my replies)
> > that 13 patches were backported to get the unrelated commit 9200f515c41f
> > ("net: dsa: tag_mtk: fix 802.1ad VLAN egress") to apply cleanly with git-am.
> > 
> > I am not strictly against this, even though I would have liked to know
> > that the maintainers were explicitly informed about it.
> > 
> > Greg, could you make your stable backporting emails include the output
> > of ./get_maintainer.pl into the list of recipients? Thanks.
> 
> Did it not happen here? I've looked at Greg's script[1] and it seemed to
> me like it does go through get_maintainer.pl.
> 
> 
> [1] 
> https://github.com/gregkh/gregkh-linux/blob/master/scripts/generate_cc_list

That's just a script I use for "normal" kernel development when creating
patches, not for stable stuff.

thanks,

greg k-h

Re: [PATCH V2] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c

2021-03-15 Thread Randy Dunlap

On 3/15/21 10:42 PM, Bhaskar Chowdhury wrote:
> 
> s/kernal/kernel/
> 
>  A punctuation added too.
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 

> ---
>  Changes from V1:
>  Punctuation missed, added as per Randy's finding
> 
>  arch/x86/events/intel/bts.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
> index 731dd8d0dbb1..1ba93c40fc54 100644
> --- a/arch/x86/events/intel/bts.c
> +++ b/arch/x86/events/intel/bts.c
> @@ -594,7 +594,7 @@ static __init int bts_init(void)
>* we cannot use the user mapping since it will not be available
>* if we're not running the owning process.
>*
> -  * With PTI we can't use the kernal map either, because its not
> +  * With PTI we can't use the kernel map either, because it's not
>* there when we run userspace.
>*
>* For now, disable this driver when using PTI.
> --


-- 
~Randy

Re: [PATCH] platform/x86: pmc_atom: use callback for all dmi quirk entries

2021-03-15 Thread Henning Schild

Hoi Hans,

on a slighly different but also related topic. Did you ever come across
SMSC SCH5347? Seems to be pretty similar to 56xx, only with spec non
public ... and probably less often in use
Maybe you happen to have code, or know the differences. We already have
it working with a modified copy of sch56xx but that is still rough and
i thought i ask before we potentially duplicate work.

groetjes,
Henning

Am Mon, 15 Mar 2021 19:01:13 +0100
schrieb Hans de Goede :

> Hi,
> 
> On 3/15/21 6:00 PM, Henning Schild wrote:
> > Am Mon, 15 Mar 2021 17:31:49 +0100
> > schrieb Hans de Goede :
> >   
> >> Hi,
> >>
> >> On 3/15/21 3:58 PM, Henning Schild wrote:  
> >>> Introduce a global variable to remember the matching entry for
> >>> later printing. Also having a callback allows to stop matching
> >>> after the first hit.
> >>>
> >>> Signed-off-by: Henning Schild 
> >>> ---
> >>>  drivers/platform/x86/pmc_atom.c | 26 --
> >>>  1 file changed, 20 insertions(+), 6 deletions(-)
> >>>
> >>> diff --git a/drivers/platform/x86/pmc_atom.c
> >>> b/drivers/platform/x86/pmc_atom.c index 38542d547f29..d0f74856cd8b
> >>> 100644 --- a/drivers/platform/x86/pmc_atom.c
> >>> +++ b/drivers/platform/x86/pmc_atom.c
> >>> @@ -364,8 +364,16 @@ static void pmc_dbgfs_register(struct pmc_dev
> >>> *pmc) #endif /* CONFIG_DEBUG_FS */
> >>>  
> >>>  static bool pmc_clk_is_critical = true;
> >>> +static const struct dmi_system_id *dmi_critical;
> >>>  
> >>> -static int siemens_clk_is_critical(const struct dmi_system_id *d)
> >>> +static int dmi_callback(const struct dmi_system_id *d)
> >>> +{
> >>> + dmi_critical = d;
> >>
> >> Don't introduce a global variable for this please. Instead just
> >> directly print the ident of the matching dmi_system_id here.  
> > 
> > Sorry, missed that part. Result looks nice and clean, thanks. I
> > think i will squash it into 4/4 in v3 and not follow up here for
> > now.  
> 
> Ack, that sounds good to me.
> 
> Regards,
> 
> Hans
> 
> 
> >>> +
> >>> + return 1;
> >>> +}
> >>> +
> >>> +static int dmi_callback_siemens(const struct dmi_system_id *d)
> >>>  {
> >>>   u32 st_id;
> >>>  
> >>> @@ -373,7 +381,7 @@ static int siemens_clk_is_critical(const
> >>> struct dmi_system_id *d) goto out;
> >>>  
> >>>   if (st_id == SIMATIC_IPC_IPC227E || st_id ==
> >>> SIMATIC_IPC_IPC277E)
> >>> - return 1;
> >>> + return dmi_callback(d);
> >>>  
> >>>  out:
> >>>   pmc_clk_is_critical = false;
> >>> @@ -388,6 +396,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk0 is used for an external HSIC USB
> >>> HUB */ .ident = "MPL CEC1x",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"),
> >>>   DMI_MATCH(DMI_PRODUCT_NAME, "CEC10
> >>> Family"), @@ -396,6 +405,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk0 - 3 are used for the 4 ethernet
> >>> controllers */ .ident = "Lex 3I380D",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "Lex
> >>> BayTrail"), DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"),
> >>> @@ -404,6 +414,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk* - are used for ethernet
> >>> controllers */ .ident = "Lex 2I385SW",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "Lex
> >>> BayTrail"), DMI_MATCH(DMI_PRODUCT_NAME, "2I385SW"),
> >>> @@ -412,6 +423,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk* - are used for ethernet
> >>> controllers */ .ident = "Beckhoff CB3163",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff
> >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB3163"),
> >>> @@ -420,6 +432,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk* - are used for ethernet
> >>> controllers */ .ident = "Beckhoff CB4063",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff
> >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB4063"),
> >>> @@ -428,6 +441,7 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk* - are used for ethernet
> >>> controllers */ .ident = "Beckhoff CB6263",
> >>> + .callback = dmi_callback,
> >>>   .matches = {
> >>>   DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff
> >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB6263"),
> >>> @@ -436,13 +450,14 @@ static const struct dmi_system_id
> >>> critclk_systems[] = { {
> >>>   /* pmc_plt_clk* - are used for ethernet
> >>> controllers */ .ident = "Beckhoff CB6363",
> >>> + .callback = dmi_callback,

[PATCH v2 12/12] docs: path-lookup: update symlink description

2021-03-15 Thread Fox Chen

instead of lookup_real()/vfs_create(), i_op->lookup() and
i_op->create() will be called directly.

update vfs_open() logic

should_follow_link is merged into lookup_last() or open_last_lookup()
which returns symlink name instead of an integer.

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index eef6e9f68fba..adbc714740c2 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1202,16 +1202,15 @@ the code.
it.  If the file was found in the dcache, then ``vfs_open()`` is used for
this.  If not, then ``lookup_open()`` will either call ``atomic_open()`` (if
the filesystem provides it) to combine the final lookup with the open, or
-   will perform the separate ``lookup_real()`` and ``vfs_create()`` steps
+   will perform the separate ``i_op->lookup()`` and ``i_op->create()`` steps
directly.  In the later case the actual "open" of this newly found or
created file will be performed by ``vfs_open()``, just as if the name
were found in the dcache.
 
 2. ``vfs_open()`` can fail with ``-EOPENSTALE`` if the cached information
-   wasn't quite current enough.  Rather than restarting the lookup from
-   the top with ``LOOKUP_REVAL`` set, ``lookup_open()`` is called instead,
-   giving the filesystem a chance to resolve small inconsistencies.
-   If that doesn't work, only then is the lookup restarted from the top.
+   wasn't quite current enough.  If it's in RCU-walk -ECHILD will be returned
+   otherwise will return -ESTALE.  When -ESTALE is returned, the caller may
+   retry with LOOKUP_REVAL flag set.
 
 3. An open with O_CREAT **does** follow a symlink in the final component,
unlike other creation system calls (like ``mkdir``).  So the sequence::
@@ -1221,8 +1220,8 @@ the code.
 
will create a file called ``/tmp/bar``.  This is not permitted if
``O_EXCL`` is set but otherwise is handled for an O_CREAT open much
-   like for a non-creating open: ``should_follow_link()`` returns ``1``, and
-   so does ``do_last()`` so that ``trailing_symlink()`` gets called and the
+   like for a non-creating open: ``lookup_last()`` or ``open_last_lookup()``
+   returns a non ``Null`` value, and ``link_path_walk()`` gets called and the
open process continues on the symlink that was found.
 
 Updating the access time
-- 
2.30.2

[PATCH v2 10/12] docs: path-lookup: update WALK_GET, WALK_PUT desc

2021-03-15 Thread Fox Chen

WALK_GET is changed to WALK_TRAILING with a different meaning.
Here it should be WALK_NOFOLLOW. WALK_PUT dosn't exist, we have
WALK_MORE.

WALK_PUT == !WALK_MORE

And there is not should_follow_link().

Related commits:
commit 8c4efe22e7c4 ("namei: invert the meaning of WALK_FOLLOW")
commit 1c4ff1a87e46 ("namei: invert WALK_PUT logics")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index 0d41c61f7e4f..abd0153e2415 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1123,13 +1123,11 @@ stack in ``walk_component()`` immediately when the 
symlink is found;
 old symlink as it walks that last component.  So it is quite
 convenient for ``walk_component()`` to release the old symlink and pop
 the references just before pushing the reference information for the
-new symlink.  It is guided in this by two flags; ``WALK_GET``, which
-gives it permission to follow a symlink if it finds one, and
-``WALK_PUT``, which tells it to release the current symlink after it has been
-followed.  ``WALK_PUT`` is tested first, leading to a call to
-``put_link()``.  ``WALK_GET`` is tested subsequently (by
-``should_follow_link()``) leading to a call to ``pick_link()`` which sets
-up the stack frame.
+new symlink.  It is guided in this by two flags; ``WALK_NOFOLLOW``, which
+suggests whether to follow a symlink if it finds one, and
+``WALK_MORE``, which tells whether to release the current symlink after it has
+been followed.  ``WALK_MORE`` is tested first, leading to a call to
+``put_link()``.
 
 Symlinks with no final component
 
-- 
2.30.2

[PATCH v2 09/12] docs: path-lookup: no get_link()

2021-03-15 Thread Fox Chen

no get_link() anymore. we have step_into() and pick_link().

walk_component() will call step_into(), in turn call pick_link,
and return symlink name.

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index 8ab95dd9046e..0d41c61f7e4f 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1103,12 +1103,10 @@ doesn't need to notice.  Getting this ``name`` variable 
on and off the
 stack is very straightforward; pushing and popping the references is
 a little more complex.
 
-When a symlink is found, ``walk_component()`` returns the value ``1``
-(``0`` is returned for any other sort of success, and a negative number
-is, as usual, an error indicator).  This causes ``get_link()`` to be
-called; it then gets the link from the filesystem.  Providing that
-operation is successful, the old path ``name`` is placed on the stack,
-and the new value is used as the ``name`` for a while.  When the end of
+When a symlink is found, ``walk_component()`` calls ``pick_link()``,
+it then gets the link from the filesystem returning new path ``name``.
+Providing that operation is successful, the old path ``name`` is placed on the
+stack, and the new value is used as the ``name`` for a while.  When the end of
 the path is found (i.e. ``*name`` is ``'\0'``) the old ``name`` is restored
 off the stack and path walking continues.
 
-- 
2.30.2

[PATCH v2 11/12] docs: path-lookup: update get_link() ->follow_link description

2021-03-15 Thread Fox Chen

get_link() is merged into pick_link(). i_op->follow_link is
replaced with i_op->get_link(). get_link() can return ERR_PTR(0)
which equals NULL.

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index abd0153e2415..eef6e9f68fba 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1134,10 +1134,10 @@ Symlinks with no final component
 
 A pair of special-case symlinks deserve a little further explanation.
 Both result in a new ``struct path`` (with mount and dentry) being set
-up in the ``nameidata``, and result in ``get_link()`` returning ``NULL``.
+up in the ``nameidata``, and result in ``pick_link()`` returning ``NULL``.
 
 The more obvious case is a symlink to "``/``".  All symlinks starting
-with "``/``" are detected in ``get_link()`` which resets the ``nameidata``
+with "``/``" are detected in ``pick_link()`` which resets the ``nameidata``
 to point to the effective filesystem root.  If the symlink only
 contains "``/``" then there is nothing more to do, no components at all,
 so ``NULL`` is returned to indicate that the symlink can be released and
@@ -1154,12 +1154,11 @@ something that looks like a symlink.  It is really a 
reference to the
 target file, not just the name of it.  When you ``readlink`` these
 objects you get a name that might refer to the same file - unless it
 has been unlinked or mounted over.  When ``walk_component()`` follows
-one of these, the ``->follow_link()`` method in "procfs" doesn't return
+one of these, the ``->get_link()`` method in "procfs" doesn't return
 a string name, but instead calls ``nd_jump_link()`` which updates the
-``nameidata`` in place to point to that target.  ``->follow_link()`` then
-returns ``NULL``.  Again there is no final component and ``get_link()``
-reports this by leaving the ``last_type`` field of ``nameidata`` as
-``LAST_BIND``.
+``nameidata`` in place to point to that target.  ``->get_link()`` then
+returns ``0``.  Again there is no final component and ``pick_link()``
+returns NULL.
 
 Following the symlink in the final component
 
-- 
2.30.2

[PATCH v2 06/12] docs: path-lookup: Add macro name to symlink limit description

2021-03-15 Thread Fox Chen

Add macro name MAXSYMLINKS to the symlink limit description, so
that it is consistent with path name length description above.

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index 66697db74955..af5c20fecfef 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -992,8 +992,8 @@ is 4096.  There are a number of reasons for this limit; not 
letting the
 kernel spend too much time on just one path is one of them.  With
 symbolic links you can effectively generate much longer paths so some
 sort of limit is needed for the same reason.  Linux imposes a limit of
-at most 40 symlinks in any one path lookup.  It previously imposed a
-further limit of eight on the maximum depth of recursion, but that was
+at most 40 (MAXSYMLINKS) symlinks in any one path lookup.  It previously 
imposed
+a further limit of eight on the maximum depth of recursion, but that was
 raised to 40 when a separate stack was implemented, so there is now
 just the one limit.
 
-- 
2.30.2

[PATCH v2 07/12] docs: path-lookup: i_op->follow_link replaced with i_op->get_link

2021-03-15 Thread Fox Chen

follow_link has been replaced by get_link() which can be
called in RCU mode.

see commit: commit 6b2553918d8b ("replace ->follow_link() with
new method that could stay in RCU mode")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index af5c20fecfef..e6b6c43ff0f6 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1060,13 +1060,11 @@ filesystem cannot successfully get a reference in 
RCU-walk mode, it
 must return ``-ECHILD`` and ``unlazy_walk()`` will be called to return to
 REF-walk mode in which the filesystem is allowed to sleep.
 
-The place for all this to happen is the ``i_op->follow_link()`` inode
-method.  In the present mainline code this is never actually called in
-RCU-walk mode as the rewrite is not quite complete.  It is likely that
-in a future release this method will be passed an ``inode`` pointer when
-called in RCU-walk mode so it both (1) knows to be careful, and (2) has the
-validated pointer.  Much like the ``i_op->permission()`` method we
-looked at previously, ``->follow_link()`` would need to be careful that
+The place for all this to happen is the ``i_op->get_link()`` inode
+method. This is called both in RCU-walk and REF-walk. In RCU-walk the
+``dentry*`` argument is NULL, ``->get_link()`` can return -ECHILD to drop out 
of
+RCU-walk.  Much like the ``i_op->permission()`` method we
+looked at previously, ``->get_link()`` would need to be careful that
 all the data structures it references are safe to be accessed while
 holding no counted reference, only the RCU lock.  Though getting a
 reference with ``->follow_link()`` is not yet done in RCU-walk mode, the
-- 
2.30.2

[PATCH v2 08/12] docs: path-lookup: update i_op->put_link and cookie description

2021-03-15 Thread Fox Chen

No inode->put_link operation anymore. We use delayed_call to
deal with link destruction. Cookie has been replaced with
struct delayed_call.

Related commit: commit fceef393a538 ("switch ->get_link() to
delayed_call, kill ->put_link()")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 30 ++-
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index e6b6c43ff0f6..8ab95dd9046e 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1066,34 +1066,20 @@ method. This is called both in RCU-walk and REF-walk. 
In RCU-walk the
 RCU-walk.  Much like the ``i_op->permission()`` method we
 looked at previously, ``->get_link()`` would need to be careful that
 all the data structures it references are safe to be accessed while
-holding no counted reference, only the RCU lock.  Though getting a
-reference with ``->follow_link()`` is not yet done in RCU-walk mode, the
-code is ready to release the reference when that does happen.
-
-This need to drop the reference to a symlink adds significant
-complexity.  It requires a reference to the inode so that the
-``i_op->put_link()`` inode operation can be called.  In REF-walk, that
-reference is kept implicitly through a reference to the dentry, so
-keeping the ``struct path`` of the symlink is easiest.  For RCU-walk,
-the pointer to the inode is kept separately.  To allow switching from
-RCU-walk back to REF-walk in the middle of processing nested symlinks
-we also need the seq number for the dentry so we can confirm that
-switching back was safe.
-
-Finally, when providing a reference to a symlink, the filesystem also
-provides an opaque "cookie" that must be passed to ``->put_link()`` so that it
-knows what to free.  This might be the allocated memory area, or a
-pointer to the ``struct page`` in the page cache, or something else
-completely.  Only the filesystem knows what it is.
+holding no counted reference, only the RCU lock. A callback
+``struct delayed_called`` will be passed to get_link,
+file systems can set their own put_link function and argument through
+``set_delayed_call``. Later on, when vfs wants to put link, it will call
+``do_delayed_call`` to invoke that callback function with the argument.
 
 In order for the reference to each symlink to be dropped when the walk 
completes,
 whether in RCU-walk or REF-walk, the symlink stack needs to contain,
 along with the path remnants:
 
-- the ``struct path`` to provide a reference to the inode in REF-walk
-- the ``struct inode *`` to provide a reference to the inode in RCU-walk
+- the ``struct path`` to provide a reference to the previous path
+- the ``const char *`` to provide a reference to the to previous name
 - the ``seq`` to allow the path to be safely switched from RCU-walk to REF-walk
-- the ``cookie`` that tells ``->put_path()`` what to put.
+- the ``struct delayed_call`` for later invocation.
 
 This means that each entry in the symlink stack needs to hold five
 pointers and an integer instead of just one pointer (the path
-- 
2.30.2

[PATCH v2 05/12] docs: path-lookup: remove filename_mountpoint

2021-03-15 Thread Fox Chen

No filename_mountpoint any more
see commit: commit 161aff1d93ab ("LOOKUP_MOUNTPOINT:
fold path_mountpointat() into path_lookupat()")

Without filename_mountpoint and path_mountpoint(), the
numbers should be four & three:

"These four correspond roughly to the three path_*() functions"

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index a65cb477d524..66697db74955 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -652,9 +652,9 @@ restarts from the top with REF-walk.
 
 This pattern of "try RCU-walk, if that fails try REF-walk" can be
 clearly seen in functions like ``filename_lookup()``,
-``filename_parentat()``, ``filename_mountpoint()``,
-``do_filp_open()``, and ``do_file_open_root()``.  These five
-correspond roughly to the four ``path_*()`` functions we met earlier,
+``filename_parentat()``,
+``do_filp_open()``, and ``do_file_open_root()``.  These four
+correspond roughly to the three ``path_*()`` functions we met earlier,
 each of which calls ``link_path_walk()``.  The ``path_*()`` functions are
 called using different mode flags until a mode is found which works.
 They are first called with ``LOOKUP_RCU`` set to request "RCU-walk".  If
-- 
2.30.2

[PATCH v2 04/12] docs: path-lookup: update do_last() part

2021-03-15 Thread Fox Chen

traling_symlink() was merged into lookup_last, do_last().

do_last() has later been split into open_last_lookups()
and do_open().

see related commit: commit c5971b8c6354 ("take post-lookup
part of do_last() out of loop")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 35 ---
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index b6a301b78121..a65cb477d524 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -495,11 +495,11 @@ This is important when unmounting a filesystem that is 
inaccessible, such as
 one provided by a dead NFS server.
 
 Finally ``path_openat()`` is used for the ``open()`` system call; it
-contains, in support functions starting with "``do_last()``", all the
+contains, in support functions starting with "``open_last_lookups()``", all the
 complexity needed to handle the different subtleties of O_CREAT (with
 or without O_EXCL), final "``/``" characters, and trailing symbolic
 links.  We will revisit this in the final part of this series, which
-focuses on those symbolic links.  "``do_last()``" will sometimes, but
+focuses on those symbolic links.  "``open_last_lookups()``" will sometimes, but
 not always, take ``i_rwsem``, depending on what it finds.
 
 Each of these, or the functions which call them, need to be alert to
@@ -1199,26 +1199,27 @@ symlink.
 This case is handled by the relevant caller of ``link_path_walk()``, such as
 ``path_lookupat()`` using a loop that calls ``link_path_walk()``, and then
 handles the final component.  If the final component is a symlink
-that needs to be followed, then ``trailing_symlink()`` is called to set
-things up properly and the loop repeats, calling ``link_path_walk()``
-again.  This could loop as many as 40 times if the last component of
-each symlink is another symlink.
+that needs to be followed, then ``open_last_lookups()`` is
+called to set things up properly and the loop repeats, calling
+``link_path_walk()`` again.  This could loop as many as 40 times if the last
+component of each symlink is another symlink.
 
 The various functions that examine the final component and possibly
-report that it is a symlink are ``lookup_last()``, ``mountpoint_last()``
-and ``do_last()``, each of which use the same convention as
-``walk_component()`` of returning ``1`` if a symlink was found that needs
-to be followed.
+report that it is a symlink are ``lookup_last()``, ``open_last_lookups()``
+, each of which use the same convention as
+``walk_component()`` of returning ``char *name`` if a symlink was found that
+needs to be followed.
 
-Of these, ``do_last()`` is the most interesting as it is used for
-opening a file.  Part of ``do_last()`` runs with ``i_rwsem`` held and this
-part is in a separate function: ``lookup_open()``.
+Of these, ``open_last_lookups()`` is the most interesting as it works in tandem
+with ``do_open()`` for opening a file.  Part of ``open_last_lookups()`` runs
+with ``i_rwsem`` held and this part is in a separate function: 
``lookup_open()``.
 
-Explaining ``do_last()`` completely is beyond the scope of this article,
-but a few highlights should help those interested in exploring the
-code.
+Explaining ``open_last_lookups()`` and ``do_open()`` completely is beyond the 
scope
+of this article, but a few highlights should help those interested in exploring
+the code.
 
-1. Rather than just finding the target file, ``do_last()`` needs to open
+1. Rather than just finding the target file, ``do_open()`` is used after
+   ``open_last_lookup()`` to open
it.  If the file was found in the dcache, then ``vfs_open()`` is used for
this.  If not, then ``lookup_open()`` will either call ``atomic_open()`` (if
the filesystem provides it) to combine the final lookup with the open, or
-- 
2.30.2

[PATCH v2 02/12] docs: path-lookup: update path_to_nameidata() part

2021-03-15 Thread Fox Chen

No path_to_namei() anymore, step_into() will be called.
Related commit: commit c99687a03a78 ("fold path_to_nameidata()
into its only remaining caller")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index d07766375e13..a29d714431a3 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -455,9 +455,10 @@ directly from walk_component() or from handle_dots().  It 
calls
 ``handle_mount()``, to check and handle mount points, in which a new
 ``struct path`` containing a counted reference to the new dentry and a
 reference to the new ``vfsmount`` which is only counted if it is
-different from the previous ``vfsmount``.  It then calls
-``path_to_nameidata()`` to install the new ``struct path`` in the
-``struct nameidata`` and drop the unneeded references.
+different from the previous ``vfsmount`` will be created. Then if there is
+symbolic link, ``step_into()`` calls ``pick_link()`` to deal with it, otherwise
+installs the new ``struct path`` in the ``struct nameidata`` and drop the
+unneeded references.
 
 This "hand-over-hand" sequencing of getting a reference to the new
 dentry before dropping the reference to the previous dentry may
-- 
2.30.2

[PATCH v2 01/12] docs: path-lookup: update follow_managed() part

2021-03-15 Thread Fox Chen

No follow_managed() anymore, handle_mounts(),
traverse_mounts(), will do the job.
see commit 9deed3ebca24 ("new helper: traverse_mounts()")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index c482e1619e77..d07766375e13 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -448,10 +448,11 @@ described.  If it finds a ``LAST_NORM`` component it 
first calls
 filesystem to revalidate the result if it is that sort of filesystem.
 If that doesn't get a good result, it calls "``lookup_slow()``" which
 takes ``i_rwsem``, rechecks the cache, and then asks the filesystem
-to find a definitive answer.  Each of these will call
-``follow_managed()`` (as described below) to handle any mount points.
+to find a definitive answer.
 
-In the absence of symbolic links, ``walk_component()`` creates a new
+As the last step of ``walk_component()``, ``step_into()`` will be called either
+directly from walk_component() or from handle_dots().  It calls
+``handle_mount()``, to check and handle mount points, in which a new
 ``struct path`` containing a counted reference to the new dentry and a
 reference to the new ``vfsmount`` which is only counted if it is
 different from the previous ``vfsmount``.  It then calls
@@ -535,8 +536,7 @@ covered in greater detail in autofs.txt in the Linux 
documentation
 tree, but a few notes specifically related to path lookup are in order
 here.
 
-The Linux VFS has a concept of "managed" dentries which is reflected
-in function names such as "``follow_managed()``".  There are three
+The Linux VFS has a concept of "managed" dentries.  There are three
 potentially interesting things about these dentries corresponding
 to three different flags that might be set in ``dentry->d_flags``:
 
-- 
2.30.2

[PATCH v2 03/12] docs: path-lookup: update path_mountpoint() part

2021-03-15 Thread Fox Chen

path_mountpoint() doesn't exist anymore. Have been folded
into path_lookup_at when flag is set with LOOKUP_MOUNTPOINT.
Check commit: commit 161aff1d93abf0e ("LOOKUP_MOUNTPOINT: fold
path_mountpointat() into path_lookupat()")

Signed-off-by: Fox Chen 
---
 Documentation/filesystems/path-lookup.rst | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst 
b/Documentation/filesystems/path-lookup.rst
index a29d714431a3..b6a301b78121 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -472,7 +472,7 @@ Handling the final component
 ``nd->last_type`` to refer to the final component of the path.  It does
 not call ``walk_component()`` that last time.  Handling that final
 component remains for the caller to sort out. Those callers are
-``path_lookupat()``, ``path_parentat()``, ``path_mountpoint()`` and
+``path_lookupat()``, ``path_parentat()`` and
 ``path_openat()`` each of which handles the differing requirements of
 different system calls.
 
@@ -488,12 +488,10 @@ perform their operation.
 object is wanted such as by ``stat()`` or ``chmod()``.  It essentially just
 calls ``walk_component()`` on the final component through a call to
 ``lookup_last()``.  ``path_lookupat()`` returns just the final dentry.
-
-``path_mountpoint()`` handles the special case of unmounting which must
-not try to revalidate the mounted filesystem.  It effectively
-contains, through a call to ``mountpoint_last()``, an alternate
-implementation of ``lookup_slow()`` which skips that step.  This is
-important when unmounting a filesystem that is inaccessible, such as
+It is worth noting that when flag ``LOOKUP_MOUNTPOINT`` is set,
+``path_lookupat()`` will unset LOOKUP_JUMPED in nameidata so that in the 
further
+path traversal ``d_weak_revalidate()`` won't be called.
+This is important when unmounting a filesystem that is inaccessible, such as
 one provided by a dead NFS server.
 
 Finally ``path_openat()`` is used for the ``open()`` system call; it
-- 
2.30.2

[PATCH v2 00/12] docs: path-lookup: Update pathlookup docs

2021-03-15 Thread Fox Chen

The Path lookup is a very complex subject in VFS. The path-lookup
document provides a very detailed guidance to help people understand
how path lookup works in the kernel. This document was originally
written based on three lwn articles five years ago. As times goes by,
some of the content is outdated. This patchset is intended to update
the document to make it more relevant to current codebase.


---
v1: https://lore.kernel.org/lkml/20210126072443.33066-1-foxhlc...@gmail.com/
v2:
 - Fix problems in v1 reviewed by Neil:
   1. In Patch 01 and 02 rewrite a new paragrah to describe step_into()
   2. In Patch 01 instead of changing it to traverse_mounts, remove 
follow_managed()
   3. In Patch 03 re-telling the story rather than adding notes
   4. In Patch 04 do_open() should be outside of loop, fix it and fix other 
problems
  in following paragrah
   5. In Patch 07 use "drop out of RCU-walk"
   6. In Patch 08 "latter" should be "later", fix it and restructure the next 
paragrah
  removing "Finally"

To help review, I've put a compiled html version here: 
http://linux-docs.54fox.com/linux_docs/filesystems/path-lookup-v2.html


Fox Chen (12):
  docs: path-lookup: update follow_managed() part
  docs: path-lookup: update path_to_nameidata() part
  docs: path-lookup: update path_mountpoint() part
  docs: path-lookup: update do_last() part
  docs: path-lookup: remove filename_mountpoint
  docs: path-lookup: Add macro name to symlink limit description
  docs: path-lookup: i_op->follow_link replaced with i_op->get_link
  docs: path-lookup: update i_op->put_link and cookie description
  docs: path-lookup: no get_link()
  docs: path-lookup: update WALK_GET, WALK_PUT desc
  docs: path-lookup: update get_link() ->follow_link description
  docs: path-lookup: update symlink description

 Documentation/filesystems/path-lookup.rst | 164 ++
 1 file changed, 71 insertions(+), 93 deletions(-)

-- 
2.30.2

Re: [PATCH 5.10 113/290] net: dsa: implement a central TX reallocation procedure

2021-03-15 Thread gre...@linuxfoundation.org

On Mon, Mar 15, 2021 at 07:56:02PM +, Vladimir Oltean wrote:
> +Andrew, Vivien,
> 
> On Mon, Mar 15, 2021 at 02:53:26PM +0100, gre...@linuxfoundation.org wrote:
> > From: Greg Kroah-Hartman 
> >
> > From: Vladimir Oltean 
> >
> > [ Upstream commit a3b0b6479700a5b0af2c631cb2ec0fb7a0d978f2 ]
> >
> > At the moment, taggers are left with the task of ensuring that the skb
> > headers are writable (which they aren't, if the frames were cloned for
> > TX timestamping, for flooding by the bridge, etc), and that there is
> > enough space in the skb data area for the DSA tag to be pushed.
> >
> > Moreover, the life of tail taggers is even harder, because they need to
> > ensure that short frames have enough padding, a problem that normal
> > taggers don't have.
> >
> > The principle of the DSA framework is that everything except for the
> > most intimate hardware specifics (like in this case, the actual packing
> > of the DSA tag bits) should be done inside the core, to avoid having
> > code paths that are very rarely tested.
> >
> > So provide a TX reallocation procedure that should cover the known needs
> > of DSA today.
> >
> > Note that this patch also gives the network stack a good hint about the
> > headroom/tailroom it's going to need. Up till now it wasn't doing that.
> > So the reallocation procedure should really be there only for the
> > exceptional cases, and for cloned packets which need to be unshared.
> >
> > Signed-off-by: Vladimir Oltean 
> > Tested-by: Christian Eggers  # For tail taggers only
> > Tested-by: Kurt Kanzenbach 
> > Reviewed-by: Florian Fainelli 
> > Signed-off-by: Jakub Kicinski 
> > Signed-off-by: Sasha Levin 
> > ---
> 
> For context, Sasha explains here:
> https://www.spinics.net/lists/stable-commits/msg190151.html
> (the conversation is somewhat truncated, unfortunately, because
> stable-comm...@vger.kernel.org ate my replies)
> that 13 patches were backported to get the unrelated commit 9200f515c41f
> ("net: dsa: tag_mtk: fix 802.1ad VLAN egress") to apply cleanly with git-am.
> 
> I am not strictly against this, even though I would have liked to know
> that the maintainers were explicitly informed about it.
> 
> Greg, could you make your stable backporting emails include the output
> of ./get_maintainer.pl into the list of recipients? Thanks.

I cc: everyone on the signed-off-by list on the patch, why would we need
to add more?  A maintainer should always be on that list automatically.

thanks,

greg k-h

Re: [PATCH 4/6] clk: actions: Fix AHPPREDIV-H-AHB clock chain on Owl S500 SoC

2021-03-15 Thread Manivannan Sadhasivam

On Mon, Mar 08, 2021 at 07:18:29PM +0200, Cristian Ciocaltea wrote:
> There are a few issues with the setup of the Actions Semi Owl S500 SoC's
> clock chain involving AHPPREDIV, H and AHB clocks:
> 
> * AHBPREDIV clock is defined as a muxer only, although it also acts as
>   a divider.
> * H clock is defined as a standard divider, although the raw value zero
>   is not supported.

What do you mean by not supported? The datasheet lists "0" as the valid divisor
value for divide by 1.

Rest looks good to me.

Thanks,
Mani

> * AHB is defined as a multi-rate factor clock, but it is actually just
>   a fixed pass clock.
> 
> Let's provide the following fixes:
> 
> * Change AHBPREDIV clock to an ungated OWL_COMP_DIV definition.
> * Add a clock div table 'h_div_table' for the H clock to drop the
>   unsupported 0 rate and use the correct register shift value in the
>   OWL_DIVIDER definition.
> * Drop the unneeded 'ahb_factor_table[]' and change AHB clock to an
>   ungated OWL_COMP_FIXED_FACTOR definition.
> 
> Fixes: ed6b4795ece4 ("clk: actions: Add clock driver for S500 SoC")
> Signed-off-by: Cristian Ciocaltea 
> ---
>  drivers/clk/actions/owl-s500.c | 20 ++--
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c
> index abe8874353de..b9e434173b4f 100644
> --- a/drivers/clk/actions/owl-s500.c
> +++ b/drivers/clk/actions/owl-s500.c
> @@ -151,9 +151,9 @@ static struct clk_factor_table hde_factor_table[] = {
>   { 0, 0, 0 },
>  };
>  
> -static struct clk_factor_table ahb_factor_table[] = {
> - { 1, 1, 2 }, { 2, 1, 3 },
> - { 0, 0, 0 },
> +static struct clk_div_table h_div_table[] = {
> + { 1, 2 }, { 2, 3 }, { 3, 4 },
> + { 0, 0 },
>  };
>  
>  static struct clk_div_table rmii_ref_div_table[] = {
> @@ -184,7 +184,6 @@ static struct clk_div_table nand_div_table[] = {
>  
>  /* mux clock */
>  static OWL_MUX(dev_clk, "dev_clk", dev_clk_mux_p, CMU_DEVPLL, 12, 1, 
> CLK_SET_RATE_PARENT);
> -static OWL_MUX(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, 
> CMU_BUSCLK1, 8, 3, CLK_SET_RATE_PARENT);
>  
>  /* gate clocks */
>  static OWL_GATE(gpio_clk, "gpio_clk", "apb_clk", CMU_DEVCLKEN0, 18, 0, 0);
> @@ -197,16 +196,25 @@ static OWL_GATE(timer_clk, "timer_clk", "hosc", 
> CMU_DEVCLKEN1, 27, 0, 0);
>  static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", CMU_DEVCLKEN1, 3, 0, 0);
>  
>  /* divider clocks */
> -static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 12, 2, 
> NULL, 0, 0);
> +static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 2, 2, 
> h_div_table, 0, 0);
>  static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, 
> 0, 0);
>  static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", 
> CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0);
>  
>  /* factor clocks */
> -static OWL_FACTOR(ahb_clk, "ahb_clk", "h_clk", CMU_BUSCLK1, 2, 2, 
> ahb_factor_table, 0, 0);
>  static OWL_FACTOR(de1_clk, "de_clk1", "de_clk", CMU_DECLK, 0, 4, 
> de_factor_table, 0, 0);
>  static OWL_FACTOR(de2_clk, "de_clk2", "de_clk", CMU_DECLK, 4, 4, 
> de_factor_table, 0, 0);
>  
>  /* composite clocks */
> +static OWL_COMP_DIV(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p,
> + OWL_MUX_HW(CMU_BUSCLK1, 8, 3),
> + { 0 },
> + OWL_DIVIDER_HW(CMU_BUSCLK1, 12, 2, 0, NULL),
> + 0);
> +
> +static OWL_COMP_FIXED_FACTOR(ahb_clk, "ahb_clk", "h_clk",
> + { 0 },
> + 1, 1, CLK_SET_RATE_PARENT);
> +
>  static OWL_COMP_FACTOR(vce_clk, "vce_clk", hde_clk_mux_p,
>   OWL_MUX_HW(CMU_VCECLK, 4, 2),
>   OWL_GATE_HW(CMU_DEVCLKEN0, 26, 0),
> -- 
> 2.30.1
>

[PATCH V2] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c

2021-03-15 Thread Bhaskar Chowdhury



s/kernal/kernel/

 A punctuation added too.

Signed-off-by: Bhaskar Chowdhury 
---
 Changes from V1:
 Punctuation missed, added as per Randy's finding

 arch/x86/events/intel/bts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 731dd8d0dbb1..1ba93c40fc54 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -594,7 +594,7 @@ static __init int bts_init(void)
 * we cannot use the user mapping since it will not be available
 * if we're not running the owning process.
 *
-* With PTI we can't use the kernal map either, because its not
+* With PTI we can't use the kernel map either, because it's not
 * there when we run userspace.
 *
 * For now, disable this driver when using PTI.
--
2.30.2

Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files

2021-03-15 Thread Viresh Kumar

On 16-03-21, 00:36, Frank Rowand wrote:
> I should have looked at patch 3/5 more carefully instead of counting on
> Masahiro to check it out and simply build testing.
> 
> Patch 3/5 does not seem correct.  I'll look over all the makefile related
> changes that have been accepted (if any) and patch 3/5 later today (Tuesday).

What is already merged by Rob is what everyone reviewed and it wasn't
related to this .dtso/.dtbo discussion we are having. I will resend a
new patchset with the stuff left for merging (which will involve the
thing we are discussing here), so we can get a clear picture of it.

-- 
viresh

Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files

2021-03-15 Thread Frank Rowand

On 3/15/21 5:12 PM, Laurent Pinchart wrote:
> Hi Yamada-san,
> 
> On Tue, Mar 16, 2021 at 02:43:45AM +0900, Masahiro Yamada wrote:
>> On Mon, Mar 15, 2021 at 3:40 PM Viresh Kumar wrote:
>>> On 14-03-21, 20:16, Frank Rowand wrote:
 On 3/12/21 11:11 PM, Frank Rowand wrote:
> On 3/12/21 1:13 AM, Viresh Kumar wrote:
>> On 12-03-21, 01:09, Frank Rowand wrote:
>>> I suggested having the .dtso files include the .dts file because that 
>>> is a relatively
>>> small and easy change to test.  What would probably make more sense is 
>>> the rename
>>> the existing overlay .dts files to be .dtso files and then for each 
>>> overlay .dtso
>>> file create a new .dts file that #includes the corresponding .dtso 
>>> file.  This is
>>> more work and churn, but easier to document that the .dts files are a 
>>> hack that is
>>> needed so that the corresponding .dtb.S files will be generated.
>>
>> What about creating links instead then ?
>>
>
> I don't really like the idea of using links here.
>
> Maybe it is best to make the changes needed to allow the unittest
> overlays to be .dtso instead of .dts.
>
> Off the top of my head:
>
>   scripts/Makefile.lib:
>  The rule for %.dtb.S invokes cmd_dt_S_dtb, which puts the
>  overlay data in section .dtb.init.rodata, with a label
>  pointing to the beginning of the overlay __dtb_XXX_begin and
>  a label pointing to the end of the overlay __dtb_XXX_end,
>  for the overlay named XXX.  I _think_ that you could simply
>  add a corresponding rule for %.dtbo.S using a new command
>  cmd_dt_S_dtbo (the same as cmd_dt_S_dtb, except use labels
>  __dtbo_XXX_begin and __dtbo_XXX_end).

 If you do the above, please put it in drivers/of/unittest-data/Makefile
 instead of scripts/Makefile.lib because it is unittest.c specific and
 not meant to be anywhere else in the kernel.
>>>
>>> What about doing this then in unittest's Makefile instead (which I
>>> already suggested earlier), that will make everything work just fine
>>> without any other changes ?
>>>
>>> +# Required for of unittest files as they can't be renamed to .dtso
>>> +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
>>> +   $(call if_changed_dep,dtc)
>>>
>>
>> If those rules are only needed by drivers/of/unittest-data/Makefile,
>> they should not be located in scripts/Makefile.lib.
>>
>> But how can we fix drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a779*.dts
>> if these are doing bad things.
>> They seem to be overlay files even though the file name suffix is .dts
> 
> That is correct, they are overlays. I have no issue with those files
> being renamed to .dtso if that can help (but I haven't checked if that
> would have any adverse effect on the R-Car DU driver).

As Laurent replied, yes these are overlays.  They were grandfathered in
as a deprecated use of overlays.

> 
> These files are there to ensure backward compatibility with older DT
> bindings. The change was made 3 years ago and I wouldn't object to
> dropping this completely, but I understand I may not be the most
> cautious person when it comes to ensuring DT backward compatibility :-)

My memory is that the goal was to eventually remove these overlays
at some point in the future.  If everyone agrees that today is the
proper time, it would be helpful to go ahead and remove these .dts
files and the code that uses them.

-Frank

> 
>> $ find drivers -name '*.dts'
>> drivers/staging/pi433/Documentation/devicetree/pi433-overlay.dts
>> drivers/staging/mt7621-dts/gbpc2.dts
>> drivers/staging/mt7621-dts/gbpc1.dts
>> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts
>> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts
>> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts
>> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts
>> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts
>> drivers/of/unittest-data/overlay_1.dts
>> drivers/of/unittest-data/testcases.dts
>> drivers/of/unittest-data/overlay_bad_add_dup_node.dts
>> drivers/of/unittest-data/overlay_bad_symbol.dts
>> drivers/of/unittest-data/overlay_0.dts
>> drivers/of/unittest-data/overlay_11.dts
>> drivers/of/unittest-data/overlay_gpio_03.dts
>> drivers/of/unittest-data/overlay_gpio_04a.dts
>> drivers/of/unittest-data/overlay_gpio_04b.dts
>> drivers/of/unittest-data/overlay_5.dts
>> drivers/of/unittest-data/overlay_bad_add_dup_prop.dts
>> drivers/of/unittest-data/overlay_gpio_01.dts
>> drivers/of/unittest-data/overlay_10.dts
>> drivers/of/unittest-data/overlay_7.dts
>> drivers/of/unittest-data/overlay_bad_phandle.dts
>> drivers/of/unittest-data/overlay_3.dts
>> drivers/of/unittest-data/overlay_6.dts
>> drivers/of/unittest-data/overlay_8.dts
>> drivers/of/unittest-data/overlay_12.dts
>> drivers/of/unittest-data/overlay_gpio_02a.dts
>> drivers/of/unittest-data/overlay_gpio_02b.dts
>> drivers/of/unittest-data/overlay_4.dts
>> drivers/of/unittest

Re: [PATCH v1 3/5] dt-bindings: arm: Add cpu-idle-states to Tegra194 CPU nodes

2021-03-15 Thread Sudeep Holla

+Lorenzo

Hi Sowjanya,

Sorry for the delayed response. I am still in vacation 😉

On Thu, Mar 11, 2021 at 01:11:37PM -0800, Sowjanya Komatineni wrote:
>
> On 3/10/21 6:52 PM, Sudeep Holla wrote:
> > On Mon, Mar 08, 2021 at 10:32:17AM -0800, Sowjanya Komatineni wrote:
> > > On 3/7/21 8:37 PM, Sudeep Holla wrote:
> > > > On Wed, Mar 03, 2021 at 10:08:10PM -0800, Sowjanya Komatineni wrote:
> > > > > This patch adds cpu-idle-states and corresponding state nodes to
> > > > > Tegra194 CPU in dt-binding document
> > > > >
> > > > I see that this platform has PSCI support. Can you care to explain why
> > > > you need additional DT bindings and driver for PSCI based CPU suspend.
> > > > Until the reasons are convincing, consider NACK from my side for this
> > > > driver and DT bindings. You should be really using those bindings and
> > > > the driver may be with minor changes there.
> > > >
> > > MCE firmware is in charge of state transition for Tegra194 carmel CPUs.
> > >
> > Sure, but I assume only TF-A talks to MCE and not any OSPM/Linux kernel.
>
> No. Tegra194 CPU idle driver works with MCE firmware running in background
> so cpuidle kernel driver also talks to MCE firmware directly on state
> information.

If that is the case I wouldn't term this as PSCI compliant firmware and
wouldn't attempt to use PSCI CPU idle driver. Now if we would what to allow
non-PSCI idle driver for Arm64 is entirely different question that deserves
a separate discussion IMO.

> >
> > > For run-time state transitions, need to provide state request along with 
> > > its
> > > residency time to MCE firmware which is running in the background.
> > >
> > Sounds similar to x86 mwait, perhaps we need to extend PSCI if we need
> > to make this firmware PSCI compliant or just say it is not and implement
> > completely independent implementation. I am not saying that is acceptable
> > ATM but I prefer not to mix some implementation to make it look like
> > PSCI compliant.
> >
> > > State min residency is updated into power_state value along with state id
> > > that is passed to psci_cpu_suspend_enter
> > >
> > Sounds like a hack/workaround. I would prefer to standardise that. IIUC
> > the power_state is more static and derived from DT. I don't like to
> > overload that TBH. Need to check with authors of that binding.
>
> Passing state idle time to ATF along with state to enter is Tegra specific
> as ATF firmware updates idle time to Tegra MCE firmware which will be used
> for deciding on state transition along with other information and background
> load.
>

So far we don't have any platform specific PSCI in OSPM and I prefer to keep
it that way.

> Not sure if this need to be standardized but will try to find alternate way
> to update idle time without misusing power-state value.
>

Sure, we can always review and see if any alternatives are acceptable, but
I am bit nervous to tie this as PSCI if it is not strictly spec compliant.

> Will discuss on this internally and get back.
>

Thanks.

> >
> > > Also states cross-over idle times need to be provided to MCE firmware.
> > >
> > New requirements if this has to be PSCI compliant.
>
> Updating cross-over idle times from DT to MCE firmware directly from cpuidle
> kernel driver with corresponding MCE ARI commands is again Tegra specific.
>

So all there are platform specific but static information you need from DT ?
If so, what can't it be made part of TF-A and OSPM can avoid interfering
with that info completely. My understanding was that OSPM provides runtime
hints like x86 mwait. If that's not the case, I am failing to understand
the need for OSPM to pass such static information from DT to the firmware.
Why can't that be just part of the firmware to begin with ?

> >
> > > MCE firmware decides on state transition based on these inputs along with
> > > its background work load.
> > >

What do you mean by this *"background work load"* ?

--
Regards,
Sudeep

Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files

2021-03-15 Thread Frank Rowand

On 3/15/21 1:40 AM, Viresh Kumar wrote:
> On 14-03-21, 20:16, Frank Rowand wrote:
>> On 3/12/21 11:11 PM, Frank Rowand wrote:
>>> On 3/12/21 1:13 AM, Viresh Kumar wrote:
 On 12-03-21, 01:09, Frank Rowand wrote:
> I suggested having the .dtso files include the .dts file because that is 
> a relatively
> small and easy change to test.  What would probably make more sense is 
> the rename
> the existing overlay .dts files to be .dtso files and then for each 
> overlay .dtso
> file create a new .dts file that #includes the corresponding .dtso file.  
> This is
> more work and churn, but easier to document that the .dts files are a 
> hack that is
> needed so that the corresponding .dtb.S files will be generated.

 What about creating links instead then ?

>>>
>>> I don't really like the idea of using links here.
>>>
>>> Maybe it is best to make the changes needed to allow the unittest
>>> overlays to be .dtso instead of .dts.
>>>
>>> Off the top of my head:
>>>
>>>   scripts/Makefile.lib:
>>>  The rule for %.dtb.S invokes cmd_dt_S_dtb, which puts the
>>>  overlay data in section .dtb.init.rodata, with a label
>>>  pointing to the beginning of the overlay __dtb_XXX_begin and
>>>  a label pointing to the end of the overlay __dtb_XXX_end,
>>>  for the overlay named XXX.  I _think_ that you could simply
>>>  add a corresponding rule for %.dtbo.S using a new command
>>>  cmd_dt_S_dtbo (the same as cmd_dt_S_dtb, except use labels
>>>  __dtbo_XXX_begin and __dtbo_XXX_end).
>>
>> If you do the above, please put it in drivers/of/unittest-data/Makefile
>> instead of scripts/Makefile.lib because it is unittest.c specific and
>> not meant to be anywhere else in the kernel.
> 
> What about doing this then in unittest's Makefile instead (which I
> already suggested earlier), that will make everything work just fine
> without any other changes ?
> 
> +# Required for of unittest files as they can't be renamed to .dtso
> +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
> +   $(call if_changed_dep,dtc)
> 

I should have looked at patch 3/5 more carefully instead of counting on
Masahiro to check it out and simply build testing.

Patch 3/5 does not seem correct.  I'll look over all the makefile related
changes that have been accepted (if any) and patch 3/5 later today (Tuesday).

-Frank

Re: [PATCH] dlm: Mundane typo fixes throughout the file lock.c

2021-03-15 Thread Randy Dunlap

On 3/15/21 10:27 PM, Bhaskar Chowdhury wrote:
> 
> Trivial typo fixes throughout the file.
> 
> cc: triv...@vger.kernel.org
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 


> ---
>  fs/dlm/lock.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
> index 002123efc6b0..caadc426c8b4 100644
> --- a/fs/dlm/lock.c
> +++ b/fs/dlm/lock.c
> @@ -91,7 +91,7 @@ static void del_timeout(struct dlm_lkb *lkb);
>  static void toss_rsb(struct kref *kref);
> 
>  /*
> - * Lock compatibilty matrix - thanks Steve
> + * Lock compatibility matrix - thanks Steve
>   * UN = Unlocked state. Not really a state, used as a flag
>   * PD = Padding. Used to make the matrix a nice power of two in size
>   * Other states are the same as the VMS DLM.
> @@ -1535,7 +1535,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, 
> int mstype,
>   return -1;
>   }
> 
> - /* Remove for the convert reply, and premptively remove for the
> + /* Remove for the convert reply, and preemptively remove for the
>  cancel reply.  A convert has been granted while there's still
>  an outstanding cancel on it (the cancel is moot and the result
>  in the cancel reply should be 0).  We preempt the cancel reply
> @@ -2357,14 +2357,14 @@ static int _can_be_granted(struct dlm_rsb *r, struct 
> dlm_lkb *lkb, int now,
>* 6-5: But the default algorithm for deciding whether to grant or
>* queue conversion requests does not by itself guarantee that such
>* requests are serviced on a "first come first serve" basis.  This, in
> -  * turn, can lead to a phenomenon known as "indefinate postponement".
> +  * turn, can lead to a phenomenon known as "indefinite postponement".
>*
>* 6-7: This issue is dealt with by using the optional QUECVT flag with
>* the system service employed to request a lock conversion.  This flag
>* forces certain conversion requests to be queued, even if they are
>* compatible with the granted modes of other locks on the same
>* resource.  Thus, the use of this flag results in conversion requests
> -  * being ordered on a "first come first servce" basis.
> +  * being ordered on a "first come first serve" basis.
>*
>* DCT: This condition is all about new conversions being able to occur
>* "in place" while the lock remains on the granted queue (assuming
> --


-- 
~Randy

Re: [PATCH] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c

2021-03-15 Thread Bhaskar Chowdhury


On 22:19 Mon 15 Mar 2021, Randy Dunlap wrote:

On 3/15/21 9:19 PM, Bhaskar Chowdhury wrote:


s/kernal/kernel/

Signed-off-by: Bhaskar Chowdhury 
---
 arch/x86/events/intel/bts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 731dd8d0dbb1..6320d2cfd9d3 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -594,7 +594,7 @@ static __init int bts_init(void)
 * we cannot use the user mapping since it will not be available
 * if we're not running the owning process.
 *
-* With PTI we can't use the kernal map either, because its not
+* With PTI we can't use the kernel map either, because its not


it's


Heck!

 * there when we run userspace.
 *
 * For now, disable this driver when using PTI.
--



--
~Randy



signature.asc
Description: PGP signature

Re: [PATCH] bpf: selftests: remove unused 'nospace_err' in tests for batched ops in array maps

2021-03-15 Thread patchwork-bot+netdevbpf

Hello:

This patch was applied to bpf/bpf-next.git (refs/heads/master):

On Mon, 15 Mar 2021 14:29:51 +0100 you wrote:
> This seems to be a reminiscent from the hashmap tests.
> 
> Signed-off-by: Pedro Tammela 
> ---
>  tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c | 5 -
>  1 file changed, 5 deletions(-)

Here is the summary with links:
  - bpf: selftests: remove unused 'nospace_err' in tests for batched ops in 
array maps
https://git.kernel.org/bpf/bpf-next/c/23f50b5ac331

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html

Re: [PATCH 5.4 000/168] 5.4.106-rc1 review

2021-03-15 Thread Naresh Kamboju

On Mon, 15 Mar 2021 at 19:35,  wrote:
>
> From: Greg Kroah-Hartman 
>
> This is the start of the stable review cycle for the 5.4.106 release.
> There are 168 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed, 17 Mar 2021 13:55:26 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.106-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.4.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm, x86_64, and i386.

Tested-by: Linux Kernel Functional Testing 

Summary


kernel: 5.4.106-rc1
git repo: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-5.4.y
git commit: 26ba2df2641dff3b9583fc4d1fbdc668bd346f00
git describe: v5.4.105-169-g26ba2df2641d
Test details: 
https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.4.y/build/v5.4.105-169-g26ba2df2641d

No regressions (compared to build v5.4.105)

No fixes (compared to build v5.4.105)

Ran 50881 total tests in the following environments and test suites.

Environments
--
- arc
- arm
- arm64
- dragonboard-410c
- hi6220-hikey
- i386
- juno-r2
- juno-r2-compat
- juno-r2-kasan
- mips
- nxp-ls2088
- nxp-ls2088-64k_page_size
- parisc
- powerpc
- qemu-arm-clang
- qemu-arm64-clang
- qemu-arm64-kasan
- qemu-x86_64-clang
- qemu-x86_64-kasan
- qemu-x86_64-kcsan
- qemu_arm
- qemu_arm64
- qemu_arm64-compat
- qemu_i386
- qemu_x86_64
- qemu_x86_64-compat
- riscv
- s390
- sh
- sparc
- x15
- x86
- x86-kasan
- x86_64

Test Suites
---
* build
* linux-log-parser
* install-android-platform-tools-r2600
* kselftest-
* kselftest-android
* kselftest-bpf
* kselftest-capabilities
* kselftest-cgroup
* kselftest-clone3
* kselftest-core
* kselftest-cpu-hotplug
* kselftest-cpufreq
* kselftest-efivarfs
* kselftest-filesystems
* kselftest-firmware
* kselftest-fpu
* kselftest-futex
* kselftest-gpio
* kselftest-intel_pstate
* kselftest-ipc
* kselftest-ir
* kselftest-kcmp
* kselftest-kvm
* kselftest-lib
* kselftest-livepatch
* kselftest-lkdtm
* kselftest-membarrier
* kselftest-memfd
* kselftest-memory-hotplug
* kselftest-mincore
* kselftest-mount
* kselftest-mqueue
* kselftest-openat2
* kselftest-pid_namespace
* kselftest-pidfd
* kselftest-proc
* kselftest-pstore
* kselftest-ptrace
* kselftest-rseq
* kselftest-rtc
* kselftest-seccomp
* kselftest-sigaltstack
* kselftest-size
* kselftest-splice
* kselftest-static_keys
* kselftest-sync
* kselftest-sysctl
* kselftest-timens
* kselftest-timers
* kselftest-tmpfs
* kselftest-tpm2
* kselftest-user
* kselftest-zram
* libhugetlbfs
* ltp-controllers-tests
* ltp-dio-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-io-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-tracing-tests
* perf
* v4l2-compliance
* fwts
* kselftest-net
* kselftest-netfilter
* kselftest-nsfs
* kselftest-tc-testing
* kvm-unit-tests
* ltp-cap_bounds-tests
* ltp-commands-tests
* ltp-containers-tests
* ltp-cpuhotplug-tests
* ltp-crypto-tests
* ltp-cve-tests
* ltp-hugetlb-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-mm-tests
* network-basic-tests
* kselftest-kexec
* kselftest-vm
* kselftest-x86
* ltp-cap_bounds-test[
* ltp-open-posix-tests
* ltp-syscalls-tests
* rcutorture
* ssuite

-- 
Linaro LKFT
https://lkft.linaro.org

[PATCH] dlm: Mundane typo fixes throughout the file lock.c

2021-03-15 Thread Bhaskar Chowdhury



Trivial typo fixes throughout the file.

cc: triv...@vger.kernel.org

Signed-off-by: Bhaskar Chowdhury 
---
 fs/dlm/lock.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 002123efc6b0..caadc426c8b4 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -91,7 +91,7 @@ static void del_timeout(struct dlm_lkb *lkb);
 static void toss_rsb(struct kref *kref);

 /*
- * Lock compatibilty matrix - thanks Steve
+ * Lock compatibility matrix - thanks Steve
  * UN = Unlocked state. Not really a state, used as a flag
  * PD = Padding. Used to make the matrix a nice power of two in size
  * Other states are the same as the VMS DLM.
@@ -1535,7 +1535,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int 
mstype,
return -1;
}

-   /* Remove for the convert reply, and premptively remove for the
+   /* Remove for the convert reply, and preemptively remove for the
   cancel reply.  A convert has been granted while there's still
   an outstanding cancel on it (the cancel is moot and the result
   in the cancel reply should be 0).  We preempt the cancel reply
@@ -2357,14 +2357,14 @@ static int _can_be_granted(struct dlm_rsb *r, struct 
dlm_lkb *lkb, int now,
 * 6-5: But the default algorithm for deciding whether to grant or
 * queue conversion requests does not by itself guarantee that such
 * requests are serviced on a "first come first serve" basis.  This, in
-* turn, can lead to a phenomenon known as "indefinate postponement".
+* turn, can lead to a phenomenon known as "indefinite postponement".
 *
 * 6-7: This issue is dealt with by using the optional QUECVT flag with
 * the system service employed to request a lock conversion.  This flag
 * forces certain conversion requests to be queued, even if they are
 * compatible with the granted modes of other locks on the same
 * resource.  Thus, the use of this flag results in conversion requests
-* being ordered on a "first come first servce" basis.
+* being ordered on a "first come first serve" basis.
 *
 * DCT: This condition is all about new conversions being able to occur
 * "in place" while the lock remains on the granted queue (assuming
--
2.30.2

Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files

2021-03-15 Thread Viresh Kumar

On 16-03-21, 02:43, Masahiro Yamada wrote:
> On Mon, Mar 15, 2021 at 3:40 PM Viresh Kumar  wrote:
> > On 14-03-21, 20:16, Frank Rowand wrote:
> > What about doing this then in unittest's Makefile instead (which I
> > already suggested earlier), that will make everything work just fine
> > without any other changes ?
> >
> > +# Required for of unittest files as they can't be renamed to .dtso
> > +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
> > +   $(call if_changed_dep,dtc)
> 
> If those rules are only needed by drivers/of/unittest-data/Makefile,
> they should not be located in scripts/Makefile.lib.

Right, this is exactly what I suggested.

> But how can we fix drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a779*.dts
> if these are doing bad things.
> They seem to be overlay files even though the file name suffix is .dts
> 
> $ find drivers -name '*.dts'
> drivers/staging/pi433/Documentation/devicetree/pi433-overlay.dts
> drivers/staging/mt7621-dts/gbpc2.dts
> drivers/staging/mt7621-dts/gbpc1.dts
> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts
> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts
> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts
> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts
> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts

For all the above files, even if they are really overlay files, we
won't use fdtoverlay tool to apply them to some base dtb and so if we
leave them as is, i.e. .dts->.dtb, it won't break anything.

The problem only happens if someone wants to generate .dtbo for them
instead and then they should be named .dtso as we won't allow .dts ->
.dtbo conversion there.

-- 
viresh

Re: [PATCH v2 16/27] perf evlist: Warn as events from different hybrid PMUs in a group

2021-03-15 Thread Jin, Yao


Hi Jiri,

On 3/16/2021 7:03 AM, Jiri Olsa wrote:

On Thu, Mar 11, 2021 at 03:07:31PM +0800, Jin Yao wrote:

SNIP


goto try_again;
}
+
+   if (errno == EINVAL && perf_pmu__hybrid_exist())
+   evlist__warn_hybrid_group(evlist);
rc = -errno;
evsel__open_strerror(pos, &opts->target, errno, msg, 
sizeof(msg));
ui__error("%s\n", msg);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7a732508b2b4..6f780a039db0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -239,6 +239,9 @@ static void evlist__check_cpu_maps(struct evlist *evlist)
struct evsel *evsel, *pos, *leader;
char buf[1024];
  
+	if (evlist__hybrid_exist(evlist))

+   return;


this should be in separate patch and explained



Now I have another idea. If a group consists of atom events and core events, we still follow current 
disabling group solution?


I mean removing following code:

if (evlist__hybrid_exist(evlist))
return;

evlist__check_cpu_maps then continues running and disabling the group. But also report with a 
warning that says "WARNING: Group has events from different hybrid PMUs".


Do you like this way?


+
evlist__for_each_entry(evlist, evsel) {
leader = evsel->leader;
  
@@ -726,6 +729,10 @@ enum counter_recovery {

  static enum counter_recovery stat_handle_error(struct evsel *counter)
  {
char msg[BUFSIZ];
+
+   if (perf_pmu__hybrid_exist() && errno == EINVAL)
+   evlist__warn_hybrid_group(evsel_list);
+
/*
 * PPC returns ENXIO for HW counters until 2.6.37
 * (behavior changed with commit b0a873e).
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index f139151b9433..5ec891418cdd 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -2224,3 +2224,47 @@ void evlist__invalidate_all_cpus(struct evlist *evlist)
perf_cpu_map__put(evlist->core.all_cpus);
evlist->core.all_cpus = perf_cpu_map__empty_new(1);
  }
+
+static bool group_hybrid_conflict(struct evsel *leader)
+{
+   struct evsel *pos, *prev = NULL;
+
+   for_each_group_evsel(pos, leader) {
+   if (!pos->pmu_name || !perf_pmu__is_hybrid(pos->pmu_name))
+   continue;
+
+   if (prev && strcmp(prev->pmu_name, pos->pmu_name))
+   return true;
+
+   prev = pos;
+   }
+
+   return false;
+}
+
+void evlist__warn_hybrid_group(struct evlist *evlist)
+{
+   struct evsel *evsel;
+
+   evlist__for_each_entry(evlist, evsel) {
+   if (evsel__is_group_leader(evsel) &&
+   evsel->core.nr_members > 1 &&


hm, could we just iterate all the members and make sure the first found
hybrid event's pmu matches the pmu of the rest hybrid events in the list?



'{cpu_core/event1/,cpu_core/event2/}','{cpu_atom/event3/,cpu_atom/event4/}'

Two or more groups need to be supported. We get the first hybrid event's pmu (cpu_core in this 
example) but it doesn't match the cpu_atom/event3/ and cpu_atom/event4/. But actually this case 
should be supported, right?



+   group_hybrid_conflict(evsel)) {
+   WARN_ONCE(1, "WARNING: Group has events from "
+"different hybrid PMUs\n");
+   return;
+   }
+   }
+}
+
+bool evlist__hybrid_exist(struct evlist *evlist)


evlist__has_hybrid seems better



Yes, agree.

Thanks
Jin Yao



jirka

Re: [PATCHv2 0/3] Add UVC 1.5 Region Of Interest control to uvcvideo

2021-03-15 Thread Sergey Senozhatsky

On (21/02/08 14:17), Sergey Senozhatsky wrote:
>   Hello,
> 
>   RFC

Hi Laurent,

Gentle ping.

-ss

Re: [PATCH] fs: Trivial typo fix in the file coredump.c

2021-03-15 Thread Randy Dunlap

On 3/15/21 10:03 PM, Bhaskar Chowdhury wrote:
> 
> s/postion/position/
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 

> ---
>  Al, I hope this time I read the comments well enough,if still
>  I am at fault , curse me!
> 
>  fs/coredump.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/coredump.c b/fs/coredump.c
> index 1c0fdc1aa70b..3ecae122ffd9 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -923,7 +923,7 @@ EXPORT_SYMBOL(dump_align);
> 
>  /*
>   * Ensures that file size is big enough to contain the current file
> - * postion. This prevents gdb from complaining about a truncated file
> + * position. This prevents gdb from complaining about a truncated file
>   * if the last "write" to the file was dump_skip.
>   */
>  void dump_truncate(struct coredump_params *cprm)
> --


-- 
~Randy

[PATCH v10] sched/fair: select idle cpu from idle cpumask for task wakeup

2021-03-15 Thread Aubrey Li

From: Aubrey Li 

Add idle cpumask to track idle cpus in sched domain. Every time
a CPU enters idle, the CPU is set in idle cpumask to be a wakeup
target. And if the CPU is not in idle, the CPU is cleared in idle
cpumask during scheduler tick to ratelimit idle cpumask update.

When a task wakes up to select an idle cpu, scanning idle cpumask
has lower cost than scanning all the cpus in last level cache domain,
especially when the system is heavily loaded.

v9->v10:
- Update scan cost only when the idle cpumask is scanned, i.e, the
  idle cpumask is not empty

v8->v9:
- rebase on top of tip/sched/core, no code change

v7->v8:
- refine update_idle_cpumask, no functionality change
- fix a suspicious RCU usage warning with CONFIG_PROVE_RCU=y

v6->v7:
- place the whole idle cpumask mechanism under CONFIG_SMP

v5->v6:
- decouple idle cpumask update from stop_tick signal, set idle CPU
  in idle cpumask every time the CPU enters idle

v4->v5:
- add update_idle_cpumask for s2idle case
- keep the same ordering of tick_nohz_idle_stop_tick() and update_
  idle_cpumask() everywhere

v3->v4:
- change setting idle cpumask from every idle entry to tickless idle
  if cpu driver is available
- move clearing idle cpumask to scheduler_tick to decouple nohz mode

v2->v3:
- change setting idle cpumask to every idle entry, otherwise schbench
  has a regression of 99th percentile latency
- change clearing idle cpumask to nohz_balancer_kick(), so updating
  idle cpumask is ratelimited in the idle exiting path
- set SCHED_IDLE cpu in idle cpumask to allow it as a wakeup target

v1->v2:
- idle cpumask is updated in the nohz routines, by initializing idle
  cpumask with sched_domain_span(sd), nohz=off case remains the original
  behavior

Cc: Peter Zijlstra 
Cc: Mel Gorman 
Cc: Vincent Guittot 
Cc: Qais Yousef 
Cc: Valentin Schneider 
Cc: Jiang Biao 
Cc: Tim Chen 
Signed-off-by: Aubrey Li 
---
 include/linux/sched/topology.h | 13 
 kernel/sched/core.c|  2 ++
 kernel/sched/fair.c| 47 --
 kernel/sched/idle.c|  5 +
 kernel/sched/sched.h   |  4 
 kernel/sched/topology.c|  3 ++-
 6 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 8f0f778..905e382 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -74,8 +74,21 @@ struct sched_domain_shared {
atomic_tref;
atomic_tnr_busy_cpus;
int has_idle_cores;
+   /*
+* Span of all idle CPUs in this domain.
+*
+* NOTE: this field is variable length. (Allocated dynamically
+* by attaching extra space to the end of the structure,
+* depending on how many CPUs the kernel has booted up with)
+*/
+   unsigned long   idle_cpus_span[];
 };
 
+static inline struct cpumask *sds_idle_cpus(struct sched_domain_shared *sds)
+{
+   return to_cpumask(sds->idle_cpus_span);
+}
+
 struct sched_domain {
/* These fields must be setup */
struct sched_domain __rcu *parent;  /* top domain must be null 
terminated */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca2bb62..310bf9a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4552,6 +4552,7 @@ void scheduler_tick(void)
 
 #ifdef CONFIG_SMP
rq->idle_balance = idle_cpu(cpu);
+   update_idle_cpumask(cpu, rq->idle_balance);
trigger_load_balance(rq);
 #endif
 }
@@ -8209,6 +8210,7 @@ void __init sched_init(void)
rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+   rq->last_idle_state = 1;
 
INIT_LIST_HEAD(&rq->cfs_tasks);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 794c2cb..24384b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6134,7 +6134,12 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
if (!this_sd)
return -1;
 
-   cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+   /*
+* sched_domain_shared is set only at shared cache level,
+* this works only because select_idle_cpu is called with
+* sd_llc.
+*/
+   cpumask_and(cpus, sds_idle_cpus(sd->shared), p->cpus_ptr);
 
if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;
@@ -6173,7 +6178,7 @@ static int select_idle_cpu(struct task_struct *p, struct 
sched_domain *sd, int t
if (smt)
set_idle_cores(this, false);
 
-   if (sched_feat(SIS_PROP) && !smt) {
+   if (sched_feat(SIS_PROP) && !smt && (cpu < nr_cpumask_bits)) {
time = cpu_clock(this) - time;
update_avg(&this_sd->avg_scan_cost, time);
}
@@ -6838,6 +6843,44 @@ balance_fair(stru

Re: [PATCH] usb: host: Mundane spello fix in the file sl811_cs.c

2021-03-15 Thread Randy Dunlap

On 3/15/21 9:52 PM, Bhaskar Chowdhury wrote:
> 
> s/seting/setting/
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 

> ---
>  drivers/usb/host/sl811_cs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c
> index 72136373ffab..16d157013018 100644
> --- a/drivers/usb/host/sl811_cs.c
> +++ b/drivers/usb/host/sl811_cs.c
> @@ -94,7 +94,7 @@ static int sl811_hc_init(struct device *parent, 
> resource_size_t base_addr,
>   return -EBUSY;
>   platform_dev.dev.parent = parent;
> 
> - /* finish seting up the platform device */
> + /* finish setting up the platform device */
>   resources[0].start = irq;
> 
>   resources[1].start = base_addr;
> --


-- 
~Randy

Re: [PATCH] samples: bpf: Fix a spelling typo in do_hbm_test.sh

2021-03-15 Thread patchwork-bot+netdevbpf

Hello:

This patch was applied to bpf/bpf-next.git (refs/heads/master):

On Mon, 15 Mar 2021 21:44:54 +0900 you wrote:
> This patch fixes a spelling typo in do_hbm_test.sh
> 
> Signed-off-by: Masanari Iida 
> ---
>  samples/bpf/do_hbm_test.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Here is the summary with links:
  - samples: bpf: Fix a spelling typo in do_hbm_test.sh
https://git.kernel.org/bpf/bpf-next/c/d94436a5d1a0

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html

Re: [PATCH 5.10 000/290] 5.10.24-rc1 review

2021-03-15 Thread Naresh Kamboju

On Mon, 15 Mar 2021 at 19:27,  wrote:
>
> From: Greg Kroah-Hartman 
>
> This is the start of the stable review cycle for the 5.10.24 release.
> There are 290 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed, 17 Mar 2021 13:55:02 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.24-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.10.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h


Results from Linaro’s test farm.
No regressions on arm64, arm, x86_64, and i386.

Tested-by: Linux Kernel Functional Testing 

Summary


kernel: 5.10.24-rc1
git repo: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-5.10.y
git commit: c6b3724e56923191dc567ceb626ba15daa49313c
git describe: v5.10.23-291-gc6b3724e5692
Test details: 
https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.10.y/build/v5.10.23-291-gc6b3724e5692

No regressions (compared to build v5.10.23)

No fixes (compared to build v5.10.23)

Ran 58761 total tests in the following environments and test suites.

Environments
--
- arc
- arm
- arm64
- dragonboard-410c
- hi6220-hikey
- i386
- juno-r2
- juno-r2-compat
- juno-r2-kasan
- mips
- nxp-ls2088
- nxp-ls2088-64k_page_size
- parisc
- powerpc
- qemu-arm-clang
- qemu-arm64-clang
- qemu-arm64-kasan
- qemu-i386-clang
- qemu-x86_64-clang
- qemu-x86_64-kasan
- qemu-x86_64-kcsan
- qemu_arm
- qemu_arm64
- qemu_arm64-compat
- qemu_i386
- qemu_x86_64
- qemu_x86_64-compat
- riscv
- s390
- sh
- sparc
- x15
- x86
- x86-kasan
- x86_64

Test Suites
---
* build
* linux-log-parser
* install-android-platform-tools-r2600
* kselftest-lkdtm
* ltp-containers-tests
* ltp-cve-tests
* ltp-dio-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-mm-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* perf
* fwts
* kselftest-
* kselftest-bpf
* kselftest-intel_pstate
* kselftest-kvm
* kselftest-livepatch
* kselftest-ptrace
* kselftest-rseq
* kselftest-rtc
* kselftest-seccomp
* kselftest-sigaltstack
* kselftest-size
* kselftest-splice
* kselftest-static_keys
* kselftest-sync
* kselftest-sysctl
* kselftest-timens
* kselftest-timers
* kselftest-tmpfs
* kselftest-tpm2
* kselftest-user
* kselftest-zram
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-commands-tests
* ltp-cpuhotplug-tests
* ltp-crypto-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-sched-tests
* ltp-tracing-tests
* network-basic-tests
* v4l2-compliance
* kselftest-android
* kselftest-capabilities
* kselftest-cgroup
* kselftest-clone3
* kselftest-core
* kselftest-cpu-hotplug
* kselftest-efivarfs
* kselftest-filesystems
* kselftest-firmware
* kselftest-fpu
* kselftest-futex
* kselftest-gpio
* kselftest-ipc
* kselftest-ir
* kselftest-kcmp
* kselftest-kexec
* kselftest-lib
* kselftest-membarrier
* kselftest-memfd
* kselftest-memory-hotplug
* kselftest-mincore
* kselftest-mount
* kselftest-mqueue
* kselftest-net
* kselftest-netfilter
* kselftest-nsfs
* kselftest-openat2
* kselftest-pid_namespace
* kselftest-pidfd
* kselftest-proc
* kselftest-pstore
* kselftest-tc-testing
* kselftest-vm
* kselftest-x86
* ltp-controllers-tests
* ltp-open-posix-tests
* kselftest-cpufreq
* kvm-unit-tests
* kunit
* rcutorture
* ssuite
* kselftest-vsyscall-mode-native-
* kselftest-vsyscall-mode-none-

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c

2021-03-15 Thread Randy Dunlap

On 3/15/21 9:19 PM, Bhaskar Chowdhury wrote:
> 
> s/kernal/kernel/
> 
> Signed-off-by: Bhaskar Chowdhury 
> ---
>  arch/x86/events/intel/bts.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
> index 731dd8d0dbb1..6320d2cfd9d3 100644
> --- a/arch/x86/events/intel/bts.c
> +++ b/arch/x86/events/intel/bts.c
> @@ -594,7 +594,7 @@ static __init int bts_init(void)
>* we cannot use the user mapping since it will not be available
>* if we're not running the owning process.
>*
> -  * With PTI we can't use the kernal map either, because its not
> +  * With PTI we can't use the kernel map either, because its not

it's

>* there when we run userspace.
>*
>* For now, disable this driver when using PTI.
> --


-- 
~Randy

Re: [PATCH] samples: bpf: Fix a spelling typo in do_hbm_test.sh

2021-03-15 Thread Andrii Nakryiko

On Mon, Mar 15, 2021 at 5:45 AM Masanari Iida  wrote:
>
> This patch fixes a spelling typo in do_hbm_test.sh
>
> Signed-off-by: Masanari Iida 
> ---

Thanks, applied to bpf-next. For the future patches, please use [PATCH
bpf-next] subject prefix if you are sending patches against bpf-next
tree (of [PATCH bpf], if it's against bpf tree).

>  samples/bpf/do_hbm_test.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh
> index 21790ea5c460..38e4599350db 100755
> --- a/samples/bpf/do_hbm_test.sh
> +++ b/samples/bpf/do_hbm_test.sh
> @@ -10,7 +10,7 @@
>  Usage() {
>echo "Script for testing HBM (Host Bandwidth Manager) framework."
>echo "It creates a cgroup to use for testing and load a BPF program to 
> limit"
> -  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to 
> create"
> +  echo "egress or ingress bandwidth. It then uses iperf3 or netperf to 
> create"
>echo "loads. The output is the goodput in Mbps (unless -D was used)."
>echo ""
>echo "USAGE: $name [out] [-b=|--bpf=] [-c=|--cc=]"
> --
> 2.25.0
>

Re: [PATCH V2] mips: asm: octeon: A typo fix in the file cvmx-address.h

2021-03-15 Thread Randy Dunlap

On 3/15/21 9:33 PM, Bhaskar Chowdhury wrote:
> 
> s/techically/technically/
> 
> Signed-off-by: Bhaskar Chowdhury 

Acked-by: Randy Dunlap 

> ---
>  Changes from V1:
>  Meh, missed the changelog text, so added :)
> 
>  arch/mips/include/asm/octeon/cvmx-address.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/mips/include/asm/octeon/cvmx-address.h 
> b/arch/mips/include/asm/octeon/cvmx-address.h
> index ef8c4a61..5df5c90f6a5d 100644
> --- a/arch/mips/include/asm/octeon/cvmx-address.h
> +++ b/arch/mips/include/asm/octeon/cvmx-address.h
> @@ -152,7 +152,7 @@ typedef union {
> 
>   /* physical mem address */
>   struct {
> - /* techically, <47:40> are dont-cares */
> + /* technically, <47:40> are dont-cares */
>   uint64_t zeroes:24;
>   /* the hardware ignores <39:36> in Octeon I */
>   uint64_t unaddr:4;
> --


-- 
~Randy

Re: [PATCH 5.11 000/306] 5.11.7-rc1 review

2021-03-15 Thread Naresh Kamboju

On Mon, 15 Mar 2021 at 19:27,  wrote:
>
> From: Greg Kroah-Hartman 
>
> This is the start of the stable review cycle for the 5.11.7 release.
> There are 306 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Wed, 17 Mar 2021 13:54:26 +.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.11.7-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-5.11.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h


Results from Linaro’s test farm.
No regressions on arm64, arm, x86_64, and i386.

Tested-by: Linux Kernel Functional Testing 

Summary


kernel: 5.11.7-rc1
git repo: 
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-5.11.y
git commit: f109baa2424b2523fbb975de996fedc7a53ef531
git describe: v5.11.6-307-gf109baa2424b
Test details: 
https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.11.y/build/v5.11.6-307-gf109baa2424b

No regressions (compared to build v5.11.6)

No fixes (compared to build v5.11.6)


Ran 55999 total tests in the following environments and test suites.

Environments
--
- arc
- arm
- arm64
- dragonboard-410c
- hi6220-hikey
- i386
- juno-64k_page_size
- juno-r2
- juno-r2-compat
- juno-r2-kasan
- mips
- nxp-ls2088
- nxp-ls2088-64k_page_size
- parisc
- powerpc
- qemu-arm-clang
- qemu-arm64-clang
- qemu-arm64-kasan
- qemu-i386-clang
- qemu-x86_64-clang
- qemu-x86_64-kasan
- qemu-x86_64-kcsan
- qemu_arm
- qemu_arm64
- qemu_arm64-compat
- qemu_i386
- qemu_x86_64
- qemu_x86_64-compat
- riscv
- s390
- sh
- sparc
- x15
- x86
- x86-kasan
- x86_64

Test Suites
---
* build
* linux-log-parser
* install-android-platform-tools-r2600
* kselftest-
* kselftest-efivarfs
* kselftest-filesystems
* kselftest-firmware
* kselftest-fpu
* kselftest-futex
* kselftest-gpio
* kselftest-ipc
* kselftest-ir
* kselftest-kcmp
* kselftest-lib
* kselftest-lkdtm
* kselftest-membarrier
* kselftest-memfd
* kselftest-memory-hotplug
* kselftest-mincore
* kselftest-mount
* kselftest-mqueue
* kselftest-net
* kselftest-netfilter
* kselftest-nsfs
* kselftest-openat2
* kselftest-pid_namespace
* kselftest-pidfd
* kselftest-proc
* kselftest-pstore
* kselftest-rseq
* kselftest-rtc
* kselftest-seccomp
* kselftest-sigaltstack
* kselftest-size
* kselftest-splice
* kselftest-static_keys
* kselftest-sync
* kselftest-sysctl
* kselftest-tc-testing
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cpuhotplug-tests
* ltp-crypto-tests
* ltp-dio-tests
* ltp-fs-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-syscalls-tests
* perf
* v4l2-compliance
* fwts
* kselftest-android
* kselftest-bpf
* kselftest-capabilities
* kselftest-cgroup
* kselftest-clone3
* kselftest-core
* kselftest-cpu-hotplug
* kselftest-cpufreq
* kselftest-timens
* kselftest-timers
* kselftest-tmpfs
* kselftest-tpm2
* kselftest-user
* kselftest-zram
* ltp-commands-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-tracing-tests
* network-basic-tests
* kselftest-intel_pstate
* kselftest-kexec
* kselftest-kvm
* kselftest-livepatch
* kselftest-ptrace
* kselftest-vm
* kselftest-x86
* libhugetlbfs
* ltp-controllers-tests
* ltp-mm-tests
* ltp-open-posix-tests
* kvm-unit-tests
* rcutorture
* kunit
* kselftest-vsyscall-mode-native-
* kselftest-vsyscall-mode-none-
* ssuite

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH] smp: kernel/panic.c - silence warnings

2021-03-15 Thread Randy Dunlap

On 3/15/21 9:08 PM, He Ying wrote:
> We found these warnings in kernel/panic.c by using sparse tool:
> warning: symbol 'panic_smp_self_stop' was not declared.
> warning: symbol 'nmi_panic_self_stop' was not declared.
> warning: symbol 'crash_smp_send_stop' was not declared.
> 
> To avoid them, add declarations for these three functions in
> include/linux/smp.h.
> 
> Reported-by: Hulk Robot 
> Signed-off-by: He Ying 
> ---
>  include/linux/smp.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/include/linux/smp.h b/include/linux/smp.h
> index 70c6f6284dcf..861a253cc179 100644
> --- a/include/linux/smp.h
> +++ b/include/linux/smp.h
> @@ -50,6 +50,14 @@ extern unsigned int total_cpus;
>  int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
>int wait);
>  
> +/*
> + * Cpus stopping functions in panic. All have default weak definations.

  definitions.

> + * Architecure dependent code may override them.

  Architecture-dependent


> + */
> +void panic_smp_self_stop(void);
> +void nmi_panic_self_stop(struct pt_regs *regs);
> +void crash_smp_send_stop(void);
> +
>  /*
>   * Call a function on all processors
>   */
> 


-- 
~Randy

Re: [PATCH v12 0/2] UIO support for dfl devices

2021-03-15 Thread Xu Yilun

Hi Greg:

I listed below some answers from Moritz and Yilun from previous mails for
your question.

Do you have more comments?

Thanks in advance,
Yilun

On Mon, Mar 08, 2021 at 09:59:34AM +0800, Xu Yilun wrote:
> This patchset supports some dfl device drivers written in userspace.
> 
> There are some Q&A about why UIO driver is needed in v11:
> 
> >From Greg:
>   Why are you saying that an ethernet driver should be using the UIO
>   interface?
> 
>   And why can't you use the existing UIO drivers that bind to memory
>   regions specified by firmware?  Without an interrupt being used, why is
>   UIO needed at all?
> 
> >From Moritz:
>   Essentially I see two options:
>   - Have a DFL bus driver instantiate a platform driver (uio_pdrv_genirq)
> which I *think* you described above?
>   - What this patch implements -- a UIO driver on the DFL bus
> 
>   These FPGA devices can on the fly change their contents and -- even if
>   just for test -- being able to expose a bunch of registers via UIO can
>   be extremely useful.
> 
>   Whether a device should expose registers or not should be up to the
>   implemeneter of the FPGA design I think (policy). This patch (or the
>   previous version) provides a mechanism to do so via DFL.
> 
>   This is similar in nature to uio_pdrv_genirq on a DT based platform, to
>   expose the registers you instantiate the DT node.
> 
>   Re-implementing a new driver for each of these instances doesn't seem
>   desirable and tying DFL as enumeration mechanism to UIO seems like a
>   good compromise for enabling this kind of functionality.
> 
>   Note this is *not* an attempt to bypass the network stack or other
>   existing subsystems.
> 
> See the original message in:
>   
> https://lore.kernel.org/linux-fpga/ydvq8ao8v3nhl...@epycbox.lan/T/#m66ba2c96848e3dea38d1a4f16dfea3cb291f7975
> 
> 
> >From Yilun:
>   The ETH GROUP IP is not designed as the full functional ethernet
>   controller. It is specially developed for the Intel N3000 NIC. Since it
>   is an FPGA based card, it is designed for the users to runtime reload
>   part of the MAC layer logic developed by themselves, while the ETH GROUP
>   is another part of the MAC which is not expected to be reloaded by
>   customers, but it provides some configurations for software to work with
>   the user logic.
> 
>   So I category the feature as the devices that "designed for specific
>   purposes and does not fit into one of the standard kernel subsystems".
>   Some related description could be found in Patch #2, to illustrate why
>   using UIO for some DFL devices.
> 
>   There are now UIO drivers for PCI or platform devices, but in this case
>   we are going to export a DFL(Device Feature List) bus device to
>   userspace, a DFL driver for UIO is needed to bind to it.
> 
> See the original message in:
>   
> https://lore.kernel.org/linux-fpga/ydvq8ao8v3nhl...@epycbox.lan/T/#m91b303fd61485644353fad1e1e9c11d528844684
> 
> 
> Xu Yilun (2):
>   uio: uio_dfl: add userspace i/o driver for DFL bus
>   Documentation: fpga: dfl: Add description for DFL UIO support
> 
>  Documentation/fpga/dfl.rst | 26 ++
>  MAINTAINERS|  1 +
>  drivers/uio/Kconfig| 17 
>  drivers/uio/Makefile   |  1 +
>  drivers/uio/uio_dfl.c  | 66 
> ++
>  5 files changed, 111 insertions(+)
>  create mode 100644 drivers/uio/uio_dfl.c
> 
> -- 
> 2.7.4

[RFC PATCH v3 18/18] dyndbg: shuffle ddebug_table fields

2021-03-15 Thread Jim Cromie

In preparation to unionize structs _ddebug & ddebug_table, shuffle
fields in latter so they match the layout of the former.  This MAY
simplify initialization of the header field, in particular by
preserving *sites.

It also sets up a later conversion to a flex-array ddebugs[].

This step is mostly to isolate/prove no breakage before HEAD++

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 29 +
 lib/dynamic_debug.c   |  2 +-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 229cfd81ffb3..22f11218047f 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -66,6 +66,35 @@ struct _ddebug {
 #endif
 } __aligned(8);
 
+/* this pair of header structs correspond quite deeply to struct
+ * _ddebug(|_site)s above; they are also created into __dyndbg*
+ * sections (by DEFINE_DYNAMIC_DEBUG_TABLE), and should be unionized
+ * with them to reinforce this.
+ *
+ * struct _ddebug_header is the important one, it has enough space to
+ * take struct ddebug_table's job, ie: link together into a list, and
+ * keep track of the modname & _ddebug(|_sites) vectors.
+ *
+ * Its other job is handled by its placement in the front of a
+ * module's _ddebug[N] entries.  Each _ddebug knows its N, so the
+ * header's address is computable, and its site pointer is available
+ * to get _ddebug_sites[N].  Then we can drop _ddebug.sites, regaining
+ * parity with original _ddebug footprint.
+ *
+ * Eventually, N will index a fetch from a compressed block, or for
+ * enabled callsites, a hash.  A global hash is probably adequate, if
+ * ~5k elements doesnt degrade access time.
+ */
+struct _ddebug_site_header {
+   /* we have 24 bytes total here, all currently unused */
+} __aligned(8);
+
+struct _ddebug_header {
+   struct _ddebug_site* sites;
+   struct list_head link;
+   const char* mod_name;
+   unsigned num_ddebugs;
+} __aligned(8);
 
 #if defined(CONFIG_DYNAMIC_DEBUG_CORE)
 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c1a7345277eb..5d1ce7f21c30 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -45,11 +45,11 @@ extern struct _ddebug_site __start___dyndbg_sites[];
 extern struct _ddebug_site __stop___dyndbg_sites[];
 
 struct ddebug_table {
+   struct _ddebug_site *sites;
struct list_head link;
const char *mod_name;
unsigned int num_ddebugs;
struct _ddebug *ddebugs;
-   struct _ddebug_site *sites;
 };
 
 struct ddebug_query {
-- 
2.29.2

[RFC PATCH v3 17/18] dyndbg: RFC - DECLARE/DEFINE_DYNAMIC_DEBUG_TABLE

2021-03-15 Thread Jim Cromie

V4-proto - now currently diet-3i

Simplify:
.gnu.linkonce._mod_.dyndbg -> .gnu.linkonce.dyndbg
ie drop _mod_

This puts a single header record at front of the vectors, solving 2
problems (discussed below) simultaneously:

- header in front, allowing flex-array rep of layout.
- single header, not per-module. adequate for needs, no wasted space.

this now appears to work
- get: dp range-check good on builtin, !builtin.

todo:
- _sym_##_site symbols in init/main.i still busted
- _index init in __ddebug_add_module
- cleanup commentary below.

DEFINE_DYNAMIC_DEBUG_TABLE is based on DECLARE_DYNAMIC_DEBUG_METADATA.
Like its model, it creates/allocates a pair of structs: _ddebug &
_ddebug_site, and inits them distinctively.

Its purpose is to create an in-situ module-header-pair for each
module's sub-vectors of _ddebug[], _ddebug_sites[].  Once the
header-pair is reliably linked into the vectors, code can get from
_ddebug[N] to the header-pair, then to _ddebug_sites[N] at runtime by
saving N into _ddebug._index during init.  With this, we can drop the
site pointer in struct _ddebug.

Eventually, this module-header-pair can be adapted to be an in-situ
replacement for the separately allocated struct ddebug_tables, and be
linked into the ddebug_tables list.

RFC, NOTES:

DYNAMIC_DEBUG is a 'transparent' facility, in that uses of pr_debug
get extra features without additional api.  Because of this,
DEFINE_DYNAMIC_DEBUG_TABLE is 'transparently' invoked by
dynamic_debug.h on behalf of all its (indirect) users, including
printk.h.

IOW this has wide effects; it results in multiple redundant
declarations of header records, even single object files may get
multiple copies.  Using .gnu.linkonce._mod_.dyndbg(|_site) section
names and "__used __weak " seems to resolve the redundancies.  I
havent tried with clang.

In vmlinux-lds.h, the 2 KEEPs are modified to append those 2 new
header sections to their respective existing __dyndbg* sections, in an
interleaved manner.  This places the header records immediately after
the modules' block of _ddebug*s, in a knowable offset from &__dyndbg[0].

scripts/Makefile.lib gets a new -DKBUILD_MODSYM defn, with a value
like KBUILD_MODNAME, except that its not __stringified.  It is used to
create a pair of module-ish named variables: _sym_##_dyndbg_base.

For some non-obvious reason, the substitution doesnt work, resulting
in per-module symbol names like KBUILD_MODSYM_dyndbg_base.  This
subtly alters the header initialization and is_dyndbg_header_pair(),
which is used in __init to find the headers adjacent to each modules'
block of _ddebug records.  This isnt fatal to the plan; we just need
the storage reserved where its accessible by known offset from the
_ddebug[N] record of an enabled pr-debug.  But it would be nice to
have the symbol names consistent with the intent.  I looked for a
MODULE_SINGLETON(name_suffix) to use, similarly to how UNIQUE_ID is
used to construct names, but found nothing.

The .gnu.linkonce._mod_. works to eliminate all the extra headers in
each module, but a problem remains; it adds unneeded headers for
modules with zero pr-debugs.  So Im seeing ~1500 extra headers.

I tried several flavors of conditional linking, now think I want/need
a linker-script language extension:

  KEEP ( *(__dyndbg) AND *(.gnu.linkonce.dyndbg) )  # my need
  KEEP ( *(foo_tbl)   OR *(.gnu.linkonce.foo_alt) ) # for completeness

I've managed to alter ld's grammar, but its only compile tested, and
is missing the conditional linking pieces.  Since this patchset's
value proposition is a memory shrink, ~1500 extra headers is fatal,
and this patchset must be a slow-cook.

V4 todo:

Time to simplify, drop _mod_, and change _ddebug.module_index to
._index, indicating that its no longer keyed to module, but to the
whole compilation unit, which for the kernel includes all the builtin
modules.  loadable modules should get their own. tbt.

this could obsolete all the above problems.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/i915/i915_drv.c   |   2 +
 include/asm-generic/vmlinux.lds.h |  27 +---
 include/linux/dynamic_debug.h | 100 +-
 lib/dynamic_debug.c   |  28 ++---
 scripts/Makefile.lib  |   2 +
 5 files changed, 139 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8e9cb44e66e5..51163e0d40cf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -91,6 +91,8 @@
 
 static const struct drm_driver driver;
 
+//DEFINE_DYNAMIC_DEBUG_TABLE();
+
 static int i915_get_bridge_dev(struct drm_i915_private *dev_priv)
 {
int domain = pci_domain_nr(dev_priv->drm.pdev->bus);
diff --git a/include/asm-generic/vmlinux.lds.h 
b/include/asm-generic/vmlinux.lds.h
index 4f2af9de2f03..482d94078785 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -335,6 +335,22 @@
KEEP(*(.dtb.init.rodata))

[RFC PATCH v3 16/18] dyndbg: prevent build bugs via -DNO_DYNAMIC_DEBUG_TABLE

2021-03-15 Thread Jim Cromie

The next patch adds DEFINE_DYNAMIC_DEBUG_TABLE(), which breaks some
subtrees with special compile constraints (efi etc).

Avoid this by adding a define to suppress the *remote declaration*
done by DEFINE_DYNAMIC_DEBUG_TABLE(), automatically, on behalf of all
possible users of pr_debug.

Signed-off-by: Jim Cromie 
---
 arch/x86/boot/compressed/Makefile | 1 +
 arch/x86/entry/vdso/Makefile  | 3 +++
 arch/x86/purgatory/Makefile   | 1 +
 drivers/firmware/efi/libstub/Makefile | 3 ++-
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/Makefile 
b/arch/x86/boot/compressed/Makefile
index e0bc3988c3fa..ada4eb960d95 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -31,6 +31,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 
vmlinux.bin.lzma \
 KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE
 cflags-$(CONFIG_X86_32) := -march=i386
 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
 KBUILD_CFLAGS += $(cflags-y)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 05c4abc2fdfd..619878f2c427 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -29,6 +29,9 @@ vobjs32-y := vdso32/note.o vdso32/system_call.o 
vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
 vobjs-$(CONFIG_X86_SGX)+= vsgx.o
 
+# avoid a x86_64_RELATIVE error
+KBUILD_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE
+
 # files to link into kernel
 obj-y  += vma.o extable.o
 KASAN_SANITIZE_vma.o   := y
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 95ea17a9d20c..95ba7b18410f 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -35,6 +35,7 @@ PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
 PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss 
-g0
 PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING
 PURGATORY_CFLAGS += -fno-stack-protector
+PURGATORY_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE
 
 # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
 # in turn leaves some undefined symbols like __fentry__ in purgatory and not
diff --git a/drivers/firmware/efi/libstub/Makefile 
b/drivers/firmware/efi/libstub/Makefile
index c23466e05e60..def8febefbd3 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -13,7 +13,8 @@ cflags-$(CONFIG_X86)  += -m$(BITS) -D__KERNEL__ \
   -Wno-pointer-sign \
   $(call cc-disable-warning, 
address-of-packed-member) \
   $(call cc-disable-warning, gnu) \
-  -fno-asynchronous-unwind-tables
+  -fno-asynchronous-unwind-tables \
+  -DNO_DYNAMIC_DEBUG_TABLE
 
 # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly
 # disable the stackleak plugin
-- 
2.29.2

[RFC PATCH v3 13/18] dyndbg+module: expose ddebug_sites to modules

2021-03-15 Thread Jim Cromie

In order to drop the pointer connecting _ddebug's to _ddebug_sites,
we need to elevate the latter; we need to track it in (internal)
ddebug_tables, and set it in ddebug_add_module.  That last part
exposes it by interface to module.c, so we add a field to load_info,
and adjust load_module to initialize it from the elf section.

Its possible that this closes a "hole" created when __dyndbg_sites
section was added, in that the section wasn't "managed" directly, and
could conceivably get lost later.  I never saw any misbehavior loading
i915.ko into a vm, but still..

TBD/RFC:

these 2 vectors should be in a single struct.  if this struct can have
ddebugs[], ie a flex-array, then container_of can get us to the
sites*, yielding &ddebug_sites[N], and allowing to drop _ddebug.site

The trouble with this is that ddebugs* now points to someone elses
memory, and we cant just steal it and stomp on the memory just in
front of it (for the sites ptr).

rename n to numdbgs

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h |  4 ++--
 kernel/module-internal.h  |  1 +
 kernel/module.c   |  9 ++---
 lib/dynamic_debug.c   | 18 +++---
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index d811273c8484..7d33475d226a 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -70,8 +70,8 @@ struct _ddebug {
 /* exported for module authors to exercise >control */
 int dynamic_debug_exec_queries(const char *query, const char *modname);
 
-int ddebug_add_module(struct _ddebug *tab, unsigned int n,
-   const char *modname);
+int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites,
+ unsigned int numdbgs, const char *modname);
 extern int ddebug_remove_module(const char *mod_name);
 extern __printf(2, 3)
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 33783abc377b..fb61eb2f8032 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -18,6 +18,7 @@ struct load_info {
char *secstrings, *strtab;
unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs;
struct _ddebug *debug;
+   struct _ddebug_site *sites;
unsigned int num_debug;
bool sig_ok;
 #ifdef CONFIG_KALLSYMS
diff --git a/kernel/module.c b/kernel/module.c
index 30479355ab85..792903230acd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2780,11 +2780,12 @@ static void add_kallsyms(struct module *mod, const 
struct load_info *info)
 }
 #endif /* CONFIG_KALLSYMS */
 
-static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, 
unsigned int num)
+static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug,
+   struct _ddebug_site *sites, unsigned int num)
 {
if (!debug)
return;
-   ddebug_add_module(debug, num, mod->name);
+   ddebug_add_module(debug, sites, num, mod->name);
 }
 
 static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
@@ -,6 +3334,8 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 
info->debug = section_objs(info, "__dyndbg",
   sizeof(*info->debug), &info->num_debug);
+   info->sites = section_objs(info, "__dyndbg_sites",
+  sizeof(*info->sites), &info->num_debug);
 
return 0;
 }
@@ -4004,7 +4007,7 @@ static int load_module(struct load_info *info, const char 
__user *uargs,
goto free_arch_cleanup;
}
 
-   dynamic_debug_setup(mod, info->debug, info->num_debug);
+   dynamic_debug_setup(mod, info->debug, info->sites, info->num_debug);
 
/* Ftrace init must be called in the MODULE_STATE_UNFORMED state */
ftrace_module_init(mod);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index fdc2d0e15731..5529b17461ae 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -49,6 +49,7 @@ struct ddebug_table {
const char *mod_name;
unsigned int num_ddebugs;
struct _ddebug *ddebugs;
+   struct _ddebug_site *sites;
 };
 
 struct ddebug_query {
@@ -1014,8 +1015,8 @@ static const struct proc_ops proc_fops = {
  * Allocate a new ddebug_table for the given module
  * and add it to the global list.
  */
-int ddebug_add_module(struct _ddebug *tab, unsigned int n,
-const char *name)
+int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites,
+ unsigned int numdbgs, const char *modname)
 {
struct ddebug_table *dt;
 
@@ -1030,15 +1031,16 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int 
n,
 * member of struct module, which lives at least as long as
 * this struct ddebug_table.
 */
-   dt->mod_name

[RFC PATCH v3 09/18] dyndbg: optimize ddebug_emit_prefix

2021-03-15 Thread Jim Cromie

Add early return if no callsite info is specified in site-flags.
This avoids fetching site info that isn't going to be printed.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 9 +
 lib/dynamic_debug.c   | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index bc8027292c02..aa4a3f5d8d35 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -40,6 +40,15 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_FUNCNAME   (1<<2)
 #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
 #define _DPRINTK_FLAGS_INCL_TID(1<<4)
+
+#define _DPRINTK_FLAGS_INCL_ANYSITE\
+   (_DPRINTK_FLAGS_INCL_MODNAME\
+| _DPRINTK_FLAGS_INCL_FUNCNAME \
+| _DPRINTK_FLAGS_INCL_LINENO)
+#define _DPRINTK_FLAGS_INCL_ANY\
+   (_DPRINTK_FLAGS_INCL_ANYSITE\
+| _DPRINTK_FLAGS_INCL_TID)
+
 #if defined DEBUG
 #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
 #else
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 1f0cac4a66af..af9cf97f869b 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -632,6 +632,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *dp, 
char *buf)
}
pos_after_tid = pos;
 
+   if (!(dp->flags & _DPRINTK_FLAGS_INCL_ANYSITE))
+   return buf;
+
if (desc) {
if (dp->flags & _DPRINTK_FLAGS_INCL_MODNAME)
pos += snprintf(buf + pos, remaining(pos), "%s:",
-- 
2.29.2

[RFC PATCH v3 14/18] dyndbg: add ddebug_site(_get|_put) abstraction

2021-03-15 Thread Jim Cromie

Replace direct ->site refs with _get(),_put() internal API.  Right
now, _get() just returns ->site and _put() does nothing.  Later we can
replace that implementation with one using ->module_index to fetch
then forget site data dynamically.

Several approaches are possible:

A: !site -> fill from backing store

1st try at this is/was using zram.  At init, it copied each callsite
into a zs-allocation, and all site-> refs afterward went thru
_get/_put to zs-map on demand, and zs-unmap the site info.  This
worked until I tried to keep callsites mapped while they're enabled,
when it gave lockdep warns/panics.  IIRC theres a zram patchset doing
something with locking; I need to retry this approach, even if other
options are better, this might be a validating use case.

B: block store

Another approach is to compress the new linker section, using some
algorithm thats good at indexed decompression.  I probed this
approach, using objcopy, unsuccessfully:

   objcopy --dump-section __dyndbg=dd \
   --dump-section __dyndbg_sites=ddsites $IMG

>From vmlinux.o dumps were mostly empty (pre-link/reloc data?)
and vmlinux didnt have the section.

C: callsite composed from __dyndbg[N] & __dyndbg_site[N]

We know _ddebug records are in a vector, either in the builtin
__dyndbg linker section, or the same from a modprobed one.  The
builtin section has all builtin module sub-sections catenated
dogether.

At init, we iterate over the section, and "parse it" by creating a
ddebug_table for each module with prdebugs.  ddebug_table.num_debugs
remembers the size of each modules' vector of prdebugs.

We need a few things:

- _ddebug.index field, which knows offset to start of this sub-vector.
  this new field will be "free" because the struct has padding.
  it can be initialized during init, then RO.

- a back-pointer at the beginning of the sub-vector, to the
  ddebug_table "owning" (but not containing) this sub-vector of
  prdebugs.

If we had both, we could get from the ddebug element to its vector
root, back up to the owning ddebug_table, then down to the _callsite
vector, and index to the right element.  While slower than a pointer
deref, this is a cold path, and it allows elimination of the
per-callsite pointer member, thus greater density of the sections, and
still can support sparse site info.

That back-pointer feels tricky.  It needs to be 1st in the sub-vector

D: (C1?) add a header record to each sub-vector

If we can insert a header record into each modules' __dyndbg* section
sub-vectors, we can simplify the cold path above; a single sites*
pointer in the header can give us access to __dyndbg_sites[N]

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 5529b17461ae..34329e323ed5 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -144,6 +144,14 @@ static void vpr_info_dq(const struct ddebug_query *query, 
const char *msg)
 query->first_lineno, query->last_lineno);
 }
 
+static struct _ddebug_site *ddebug_site_get(struct _ddebug *dp)
+{
+   return dp->site; /* passthru abstraction */
+}
+static inline void ddebug_site_put(struct _ddebug *dp)
+{
+}
+
 static int ddebug_match_site(const struct ddebug_query *query,
 const struct _ddebug *dp,
 const struct _ddebug_site *dc)
@@ -239,16 +247,18 @@ static int ddebug_change(const struct ddebug_query *query,
 
for (i = 0; i < dt->num_ddebugs; i++) {
struct _ddebug *dp = &dt->ddebugs[i];
-   struct _ddebug_site *dc = dp->site;
+   struct _ddebug_site *dc;
+
+   dc = ddebug_site_get(dp);
 
if (!ddebug_match_site(query, dp, dc))
-   continue;
+   goto skipsite;
 
nfound++;
 
newflags = (dp->flags & modifiers->mask) | 
modifiers->flags;
if (newflags == dp->flags)
-   continue;
+   goto skipsite;
 
ddebug_alter_site(dp, modifiers);
 
@@ -264,6 +274,9 @@ static int ddebug_change(const struct ddebug_query *query,
  dt->mod_name, dp->lineno,
  ddebug_describe_flags(dp->flags, 
&fbuf),
  dp->format);
+
+   skipsite:
+   ddebug_site_put(dp);
}
}
mutex_unlock(&ddebug_lock);
@@ -633,11 +646,11 @@ static int remaining(int wrote)
return 0;
 }
 
-static char *__dynamic_emit_prefix(const struct _ddebug *dp, char *buf)
+static char *__dynamic_emit_prefix(struct _ddebug *dp, char *buf)
 {
int pos_after_tid;
int pos = 0;
-   co

[RFC PATCH v3 15/18] dyndbg: add _index to struct _ddebug

2021-03-15 Thread Jim Cromie

We currently use dp->site to map: &__dyndbg[N] -> &__dyndbg_sites[N].
We want to drop site, new _ddebug._index provides the N.

The mapping is done in ddebug_site_get():

For builtin modules, a _ddebug *ptr is between __start___dyndbg and
__stop___dyndbg, and we can use &__start___dyndbg_sites[N] directly.
For loadable modules, we still need work, so we print rubbish, and
just return site pointer (which is correct).

ddebug_add_module() handles _index initialization:
Its new task is to number each module consecutively, so it gets new
base arg to pass the next starting index.

To actually drop site, We need both the module's __dyndbg* section
addys, and we need their relative placement to have a base-to-base
offset.

PLAN - a table header connecting 2 tables.

- ddebug_table points to both __dyndbgs & __dyndbg_sites.
  but *ddebugs & *sites are independent.
  no path from ddebugs[n] -> ddebug_sites[n]

If we have a header record in-situ, which keeps the site pointer we
seek to eliminate from _ddebug, and its in element[0] of both vectors,
we can go:

  ddebugs[n] -> ddebugs[0] -> containerof -> site[n]

  union ddebug_table_header {
struct ddebug_table *owner;
struct _ddebug item;
  }
  and
  struct ddebug_table_vector {
 struct ddebug_table *owner;
 struct _ddebug vector[];
  }

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h |  2 ++
 lib/dynamic_debug.c   | 43 +--
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 7d33475d226a..18689db0e2c0 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -29,6 +29,7 @@ struct _ddebug {
/* format is always needed, lineno shares word with flags */
const char *format;
const unsigned lineno:18;
+   unsigned _index:14;
/*
 * The flags field controls the behaviour at the callsite.
 * The bits here are changed dynamically when the user
@@ -56,6 +57,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_DEFAULT 0
 #endif
unsigned int flags:8;
+
 #ifdef CONFIG_JUMP_LABEL
union {
struct static_key_true dd_key_true;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 34329e323ed5..3b53035d63d6 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -123,6 +123,8 @@ do {
\
 #define vpr_info(fmt, ...) vnpr_info(1, fmt, ##__VA_ARGS__)
 #define v2pr_info(fmt, ...)vnpr_info(2, fmt, ##__VA_ARGS__)
 #define v3pr_info(fmt, ...)vnpr_info(3, fmt, ##__VA_ARGS__)
+#define v4pr_info(fmt, ...)vnpr_info(4, fmt, ##__VA_ARGS__)
+#define v5pr_info(fmt, ...)vnpr_info(5, fmt, ##__VA_ARGS__)
 
 static void vpr_info_dq(const struct ddebug_query *query, const char *msg)
 {
@@ -146,7 +148,17 @@ static void vpr_info_dq(const struct ddebug_query *query, 
const char *msg)
 
 static struct _ddebug_site *ddebug_site_get(struct _ddebug *dp)
 {
-   return dp->site; /* passthru abstraction */
+   v4pr_info("get %d: %s.%s.%d\n", dp->_index, dp->site->modname,
+ dp->site->function, dp->lineno);
+
+   if (dp >= __start___dyndbg && dp < __stop___dyndbg) {
+   v4pr_info(" is builtin: %d %ld\n", dp->_index, dp - 
__start___dyndbg);
+   return &__start___dyndbg_sites[ dp - __start___dyndbg ];
+   } else {
+   v3pr_info(" is loaded: %d %ld\n", dp->_index, dp - 
__start___dyndbg);
+   return dp->site;
+   }
+   return dp->site;
 }
 static inline void ddebug_site_put(struct _ddebug *dp)
 {
@@ -1034,14 +1046,16 @@ static const struct proc_ops proc_fops = {
  * Allocate a new ddebug_table for the given module
  * and add it to the global list.
  */
-int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites,
- unsigned int numdbgs, const char *modname)
+static int __ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites,
+  unsigned numdbgs, unsigned base,
+  const char *modname)
 {
struct ddebug_table *dt;
+   int i;
 
dt = kzalloc(sizeof(*dt), GFP_KERNEL);
if (dt == NULL) {
-   pr_err("error adding module: %s\n", name);
+   pr_err("error adding module: %s\n", modname);
return -ENOMEM;
}
/*
@@ -1055,6 +1069,13 @@ int ddebug_add_module(struct _ddebug *tab, struct 
_ddebug_site *sites,
dt->ddebugs = tab;
dt->sites = sites;
 
+   v3pr_info("add-module: %s\n", modname);
+   for (i = 0; i < numdbgs; i++) {
+   tab[i]._index = base++;
+   v3pr_info(" %d %d %s.%s.%d\n", i, tab[i]._index, modname,
+ tab[i].site->function, tab[i].lineno);
+   }
+
mutex_lock(&ddebug_lock);
list_add(&dt->link, &ddebug_tables);

[RFC PATCH v3 08/18] dyndbg: accept null site in ddebug_proc_show

2021-03-15 Thread Jim Cromie

Accept a ddebug record with a null site pointer, and write abbreviated
output for that record that doesn't include site info (but does
include line-number, since that can be used in >control queries).
Also add a 2nd header line with a template for the new output.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 25 +
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 0c535f3c2ba9..1f0cac4a66af 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -918,18 +918,27 @@ static int ddebug_proc_show(struct seq_file *m, void *p)
 
if (p == SEQ_START_TOKEN) {
seq_puts(m,
-"# filename:lineno [module]function flags format\n");
+"#: filename:lineno [module]function flags format\n"
+"#| [module]:lineno flags format\n"
+   );
return 0;
}
 
dc = dp->site;
-
-   seq_printf(m, "%s:%u [%s]%s =%s \"",
-  trim_prefix(dc->filename), dp->lineno,
-  iter->table->mod_name, dc->function,
-  ddebug_describe_flags(dp->flags, &flags));
-   seq_escape(m, dp->format, "\t\r\n\"");
-   seq_puts(m, "\"\n");
+   if (dc) {
+   seq_printf(m, "%s:%u [%s]%s =%s \"",
+  trim_prefix(dc->filename), dp->lineno,
+  iter->table->mod_name, dc->function,
+  ddebug_describe_flags(dp->flags, &flags));
+   seq_escape(m, dp->format, "\t\r\n\"");
+   seq_puts(m, "\"\n");
+   } else {
+   seq_printf(m, "[%s]:%u =%s \"",
+  iter->table->mod_name, dp->lineno,
+  ddebug_describe_flags(dp->flags, &flags));
+   seq_escape(m, dp->format, "\t\r\n\"");
+   seq_puts(m, "\"\n");
+   }
 
return 0;
 }
-- 
2.29.2

[RFC PATCH v3 12/18] dyndbg: allow deleting site info via control interface

2021-03-15 Thread Jim Cromie

Allow users & subsystems to selectively delete callsite info for
pr-debug callsites.  Hopefully, this can lead to actual recovery of
memory.

DRM is a potential user which would drop the sites:

- has distinct categories for logging, and can map them over to a
  format prefix, like: "drm:core:", "drm:kms:", etc.

- are happy with group control of all the callsites in a class/cateory.
  individual control is still possible using queries including line numbers

- don't need dynamic "module:function:line:" prefixes in log messages

- don't care about loss of context in /proc/dynamic_debug/control

before:

init/initramfs.c:485 [initramfs]unpack_to_rootfs =_ "Detected %s compressed 
data\012"
init/main.c:1337 [main]run_init_process =pm "%s\012"
init/main.c:1335 [main]run_init_process =pm "  with environment:\012"
init/main.c:1334 [main]run_init_process =pm "%s\012"
init/main.c:1332 [main]run_init_process =pm "  with arguments:\012"
init/main.c:1121 [main]initcall_blacklisted =pm "initcall %s blacklisted\012"
init/main.c:1082 [main]initcall_blacklist =pm "blacklisting initcall %s\012"

then:
  bash-5.0# echo file init/main.c +D > /proc/dynamic_debug/control

after:

init/initramfs.c:485 [initramfs]unpack_to_rootfs =_ "Detected %s compressed 
data\012"
[main]:1337 =pmD "%s\012"
[main]:1335 =pmD "  with environment:\012"
[main]:1334 =pmD "%s\012"
[main]:1332 =pmD "  with arguments:\012"
[main]:1121 =pmD "initcall %s blacklisted\012"
[main]:1082 =pmD "blacklisting initcall %s\012"

Notes:

If Drm adopted dyndbg, i915 + drm* would add ~1600 prdebugs, amdgpu +
drm* would add ~3200 callsites, so the additional memory costs are
substantial.  In trade, drm and drivers would avoid lots of calls to
drm_debug_enabled().  This patch should reduce the costs.

Using this interface, drm could drop site info for all categories /
prefixes controlled by bits in drm.debug, while preserving site info
and individual selectivity for any uncategorized prdebugs, and for all
other modules.

Lastly, because lineno field was not moved into _ddebug_callsite, it
can be used to modify a single[*] callsite even if drm has dropped all
the callsite data:

  echo module $mod format ^$prefix line $line +p >control

Dropping site info is a one-way, information losing operation, so
minor misuse is possible.  Worst case is maybe (depending upon
previous settings) some loss of logging context/decorations.

  echo +D > /proc/dynamic_debug/control

[*] amdgpu has some macros invoking clusters of pr_debugs; each use of
them creates a cluster of pr-debugs with the same line number.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 1 +
 lib/dynamic_debug.c   | 9 +
 2 files changed, 10 insertions(+)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index aa4a3f5d8d35..d811273c8484 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -40,6 +40,7 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_FUNCNAME   (1<<2)
 #define _DPRINTK_FLAGS_INCL_LINENO (1<<3)
 #define _DPRINTK_FLAGS_INCL_TID(1<<4)
+#define _DPRINTK_FLAGS_DELETE_SITE (1<<7) /* drop site info to save ram */
 
 #define _DPRINTK_FLAGS_INCL_ANYSITE\
(_DPRINTK_FLAGS_INCL_MODNAME\
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 6fbd9099c2fa..fdc2d0e15731 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -92,6 +92,7 @@ static struct { unsigned flag:8; char opt_char; } opt_array[] 
= {
{ _DPRINTK_FLAGS_INCL_LINENO, 'l' },
{ _DPRINTK_FLAGS_INCL_TID, 't' },
{ _DPRINTK_FLAGS_NONE, '_' },
+   { _DPRINTK_FLAGS_DELETE_SITE, 'D' },
 };
 
 struct flagsbuf { char buf[ARRAY_SIZE(opt_array)+1]; };
@@ -201,6 +202,14 @@ static void ddebug_alter_site(struct _ddebug *dp,
} else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
static_branch_enable(&dp->key.dd_key_true);
 #endif
+   /* delete site info for this callsite */
+   if (modifiers->flags & _DPRINTK_FLAGS_DELETE_SITE) {
+   if (dp->site) {
+   vpr_info("dropping site info %s.%s.%d\n", 
dp->site->filename,
+   dp->site->function, dp->lineno);
+   dp->site = NULL;
+   }
+   }
 }
 
 /*
-- 
2.29.2

[RFC PATCH v3 11/18] dyndbg: refactor ddebug_alter_site out of ddebug_change

2021-03-15 Thread Jim Cromie

Move the JUMP_LABEL/static-key code to a separate function.

no functional changes.

Signed-off-by: Jim Cromie 
---
 lib/dynamic_debug.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 2d011ac3308d..6fbd9099c2fa 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -191,6 +191,18 @@ static int ddebug_match_site(const struct ddebug_query 
*query,
return true;
 }
 
+static void ddebug_alter_site(struct _ddebug *dp,
+ struct flag_settings *modifiers)
+{
+#ifdef CONFIG_JUMP_LABEL
+   if (dp->flags & _DPRINTK_FLAGS_PRINT) {
+   if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT))
+   static_branch_disable(&dp->key.dd_key_true);
+   } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
+   static_branch_enable(&dp->key.dd_key_true);
+#endif
+}
+
 /*
  * Search the tables for _ddebug's which match the given `query' and
  * apply the `flags' and `mask' to them.  Returns number of matching
@@ -227,13 +239,9 @@ static int ddebug_change(const struct ddebug_query *query,
newflags = (dp->flags & modifiers->mask) | 
modifiers->flags;
if (newflags == dp->flags)
continue;
-#ifdef CONFIG_JUMP_LABEL
-   if (dp->flags & _DPRINTK_FLAGS_PRINT) {
-   if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT))
-   
static_branch_disable(&dp->key.dd_key_true);
-   } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT)
-   static_branch_enable(&dp->key.dd_key_true);
-#endif
+
+   ddebug_alter_site(dp, modifiers);
+
dp->flags = newflags;
 
if (dc)
-- 
2.29.2

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 2819 matches

Mail list logo