Re: [PATCH] smp: kernel/panic.c - silence warnings
Le 16/03/2021 à 07:35, heying (H) a écrit : Thanks for your reply. 在 2021/3/16 13:15, Randy Dunlap 写道: On 3/15/21 9:08 PM, He Ying wrote: We found these warnings in kernel/panic.c by using sparse tool: warning: symbol 'panic_smp_self_stop' was not declared. warning: symbol 'nmi_panic_self_stop' was not declared. warning: symbol 'crash_smp_send_stop' was not declared. To avoid them, add declarations for these three functions in include/linux/smp.h. Reported-by: Hulk Robot Signed-off-by: He Ying --- include/linux/smp.h | 8 1 file changed, 8 insertions(+) diff --git a/include/linux/smp.h b/include/linux/smp.h index 70c6f6284dcf..861a253cc179 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,6 +50,14 @@ extern unsigned int total_cpus; int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait); +/* + * Cpus stopping functions in panic. All have default weak definations. definitions. Sorry for my typo. + * Architecure dependent code may override them. Architecture-dependent Is that necessary? Sure you are missing a 't' Architecure ==> Architecture + */ +void panic_smp_self_stop(void); +void nmi_panic_self_stop(struct pt_regs *regs); +void crash_smp_send_stop(void); + /* * Call a function on all processors */
[PATCH v7 4/6] selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available
The SIGSTKSZ constant may not represent enough stack size in some architectures as the hardware state size grows. Use getauxval(AT_MINSIGSTKSZ) to increase the stack size. Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Cc: linux-kselft...@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- Changes from v5: * Added as a new patch. --- tools/testing/selftests/sigaltstack/sas.c | 20 +++- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index 8934a3766d20..c53b070755b6 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "../kselftest.h" @@ -24,6 +25,11 @@ #define SS_AUTODISARM (1U << 31) #endif +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 +#endif + +static unsigned int stack_size; static void *sstack, *ustack; static ucontext_t uc, sc; static const char *msg = "[OK]\tStack preserved"; @@ -47,7 +53,7 @@ void my_usr1(int sig, siginfo_t *si, void *u) #endif if (sp < (unsigned long)sstack || - sp >= (unsigned long)sstack + SIGSTKSZ) { + sp >= (unsigned long)sstack + stack_size) { ksft_exit_fail_msg("SP is not on sigaltstack\n"); } /* put some data on stack. other sighandler will try to overwrite it */ @@ -108,6 +114,10 @@ int main(void) stack_t stk; int err; + /* Make sure more than the required minimum. */ + stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ; + ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size); + ksft_print_header(); ksft_set_plan(3); @@ -117,7 +127,7 @@ int main(void) sigaction(SIGUSR1, &act, NULL); act.sa_sigaction = my_usr2; sigaction(SIGUSR2, &act, NULL); - sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (sstack == MAP_FAILED) { ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); @@ -139,7 +149,7 @@ int main(void) } stk.ss_sp = sstack; - stk.ss_size = SIGSTKSZ; + stk.ss_size = stack_size; stk.ss_flags = SS_ONSTACK | SS_AUTODISARM; err = sigaltstack(&stk, NULL); if (err) { @@ -161,7 +171,7 @@ int main(void) } } - ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, + ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (ustack == MAP_FAILED) { ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); @@ -170,7 +180,7 @@ int main(void) getcontext(&uc); uc.uc_link = NULL; uc.uc_stack.ss_sp = ustack; - uc.uc_stack.ss_size = SIGSTKSZ; + uc.uc_stack.ss_size = stack_size; makecontext(&uc, switch_fn, 0); raise(SIGUSR1); -- 2.17.1
[PATCH v7 6/6] selftest/x86/signal: Include test cases for validating sigaltstack
The test measures the kernel's signal delivery with different (enough vs. insufficient) stack sizes. Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Cc: x...@kernel.org Cc: linux-kselft...@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- Changes from v3: * Revised test messages again (Borislav Petkov) Changes from v2: * Revised test messages (Borislav Petkov) --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sigaltstack.c | 128 ++ 2 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/sigaltstack.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 333980375bc7..65bba2ae86ee 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \ - syscall_arg_fault fsgsbase_restore + syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c new file mode 100644 index ..f689af75e979 --- /dev/null +++ b/tools/testing/selftests/x86/sigaltstack.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* sigaltstack()-enforced minimum stack */ +#define ENFORCED_MINSIGSTKSZ 2048 + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static int nerrs; + +static bool sigalrm_expected; + +static unsigned long at_minstack_size; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static int setup_altstack(void *start, unsigned long size) +{ + stack_t ss; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = start; + + return sigaltstack(&ss, NULL); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + if (sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM)."); + nerrs++; + } else { + printf("[OK]\tSIGSEGV signal delivered.\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigalrm(int sig, siginfo_t *info, void *ctx_void) +{ + if (!sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV)."); + nerrs++; + } else { + printf("[OK]\tSIGALRM signal delivered.\n"); + } +} + +static void test_sigaltstack(void *altstack, unsigned long size) +{ + if (setup_altstack(altstack, size)) + err(1, "sigaltstack()"); + + sigalrm_expected = (size > at_minstack_size) ? true : false; + + sethandler(SIGSEGV, sigsegv, 0); + sethandler(SIGALRM, sigalrm, SA_ONSTACK); + + if (!sigsetjmp(jmpbuf, 1)) { + printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n", + sigalrm_expected ? "" : "in"); + printf("\tRaise SIGALRM. %s is expected to be delivered.\n", + sigalrm_expected ? "It" : "SIGSEGV"); + raise(SIGALRM); + } + + clearhandler(SIGALRM); + clearhandler(SIGSEGV); +} + +int main(void) +{ + void *altstack; + + at_minstack_size = getauxval(AT_MINSIGSTKSZ); + + altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (altstack == MAP_FAILED) + err(1, "mmap()"); + + if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size) + test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1); + + test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ); + + return nerrs == 0 ? 0 : 1; +} -- 2.17.1
Re: [PATCH] powerpc: arch/powerpc/kernel/setup_64.c - cleanup warnings
Le 16/03/2021 à 05:11, He Ying a écrit : warning: symbol 'rfi_flush' was not declared. warning: symbol 'entry_flush' was not declared. warning: symbol 'uaccess_flush' was not declared. We found warnings above in arch/powerpc/kernel/setup_64.c by using sparse tool. Define 'entry_flush' and 'uaccess_flush' as static because they are not referenced outside the file. Include asm/security_features.h in which 'rfi_flush' is declared. Reported-by: Hulk Robot Signed-off-by: He Ying Reviewed-by: Christophe Leroy --- arch/powerpc/kernel/setup_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 560ed8b975e7..f92d72a7e7ce 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,6 +68,7 @@ #include #include #include +#include #include "setup.h" @@ -949,8 +950,8 @@ static bool no_rfi_flush; static bool no_entry_flush; static bool no_uaccess_flush; bool rfi_flush; -bool entry_flush; -bool uaccess_flush; +static bool entry_flush; +static bool uaccess_flush; DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); EXPORT_SYMBOL(uaccess_flush_key);
[PATCH v7 3/6] x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ
Historically, signal.h defines MINSIGSTKSZ (2KB) and SIGSTKSZ (8KB), for use by all architectures with sigaltstack(2). Over time, the hardware state size grew, but these constants did not evolve. Today, literal use of these constants on several architectures may result in signal stack overflow, and thus user data corruption. A few years ago, the ARM team addressed this issue by establishing getauxval(AT_MINSIGSTKSZ). This enables the kernel to supply at runtime value that is an appropriate replacement on the current and future hardware. Add getauxval(AT_MINSIGSTKSZ) support to x86, analogous to the support added for ARM in commit 94b07c1f8c39 ("arm64: signal: Report signal frame size to userspace via auxv"). Also, include a documentation to describe x86-specific auxiliary vectors. Reported-by: Florian Weimer Fixes: c2bc11f10a39 ("x86, AVX-512: Enable AVX-512 States Context Switch") Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Cc: H.J. Lu Cc: Fenghua Yu Cc: Dave Martin Cc: Michael Ellerman Cc: x...@kernel.org Cc: libc-al...@sourceware.org Cc: linux-a...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=153531 --- Changes from v6: * Revised the documentation and fixed the build issue. (Borislav Petkov) * Fixed the vertical alignment of '\'. (Borislav Petkov) Changes from v5: * Added a documentation. --- Documentation/x86/elf_auxvec.rst | 53 ++ Documentation/x86/index.rst| 1 + arch/x86/include/asm/elf.h | 4 +++ arch/x86/include/uapi/asm/auxvec.h | 4 +-- arch/x86/kernel/signal.c | 5 +++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 Documentation/x86/elf_auxvec.rst diff --git a/Documentation/x86/elf_auxvec.rst b/Documentation/x86/elf_auxvec.rst new file mode 100644 index ..6c75b26f5efb --- /dev/null +++ b/Documentation/x86/elf_auxvec.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +== +x86-specific ELF Auxiliary Vectors +== + +This document describes the semantics of the x86 auxiliary vectors. + +Introduction + + +ELF Auxiliary vectors enable the kernel to efficiently provide +configuration specific parameters to userspace. In this example, a program +allocates an alternate stack based on the kernel-provided size:: + + #include + #include + #include + #include + #include + #include + + #ifndef AT_MINSIGSTKSZ + #define AT_MINSIGSTKSZ 51 + #endif + + + stack_t ss; + + ss.ss_sp = malloc(ss.ss_size); + assert(ss.ss_sp); + + ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ; + ss.ss_flags = 0; + + if (sigaltstack(&ss, NULL)) +err(1, "sigaltstack"); + + +The exposed auxiliary vectors += + +AT_SYSINFO is used for locating the vsyscall entry point. It is not +exported on 64-bit mode. + +AT_SYSINFO_EHDR is the start address of the page containing the vDSO. + +AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to +deliver a signal to user-space. AT_MINSIGSTKSZ comprehends the space +consumed by the kernel to accommodate the user context for the current +hardware configuration. It does not comprehend subsequent user-space stack +consumption, which must be added by the user. (e.g. Above, user-space adds +SIGSTKSZ to AT_MINSIGSTKSZ.) diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index 4693e192b447..d58614d5cde6 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -35,3 +35,4 @@ x86-specific Documentation sva sgx features + elf_auxvec diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9224d40cdefe..18d9b1117871 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -312,6 +312,7 @@ do { \ NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY);\ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\ } \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \ } while (0) /* @@ -328,6 +329,7 @@ extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); +extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 @@ -349,6 +351,7 @@ do { \ if (vdso64_enabled) \ NEW_AUX_ENT(AT_SYSINFO_EHDR,\ (unsigned long __force)current->mm->context.v
Re: [PATCH v1 3/5] dt-bindings: arm: Add cpu-idle-states to Tegra194 CPU nodes
On Fri, Mar 12, 2021 at 02:27:30PM -0800, Sowjanya Komatineni wrote: > Hi Sudeep, > > To make our driver PSCI compliant below are few updates to be done > > 1. Standardize passing residency time run-time thru PSCI to ATF. > Yes that was my initial understanding, but your previous response was confusing. I should have read this first. > From PSCI specification I only see PSCI_STAT_RESIDENCY and > PSCI_STAT_COUNT optional functions for PSCI1.0/PSCI1.1 > Indeed, we don't have any support to pass run-time residency hints in PSCI today. > Can you please help add right people to explore on possible ways to add > PSCI function for passing corresponding state residency time to ATF? > Before we jump to implementation in TF-A we need to get agreement to get this added to the specification to support in OSPM/Linux. TF-A is just one implementation of PSCI and may not be only one. Formally you can raise any specification related queries to https://developer.arm.com/support or supp...@arm.com. I will ask the author of PSCI specification internally to take a look at this thread and chime in if they can. > 2. Tegra CPU Idle support definitely need to pass deepest cluster state and > threshold to MCE firmware from DT and looks like we can move this > implementation to ATF > Yes, I just asked the same question as response to your earlier email. Thanks for confirming that it can be moved out of OSPM/Linux and DT > With both of the above implementation changes Tegra194 driver will be > PSCI compliant. > We still need to get agreement on the specification front 😉. -- Regards, Sudeep
[PATCH v7 5/6] x86/signal: Detect and prevent an alternate signal stack overflow
The kernel pushes context on to the userspace stack to prepare for the user's signal handler. When the user has supplied an alternate signal stack, via sigaltstack(2), it is easy for the kernel to verify that the stack size is sufficient for the current hardware context. Check if writing the hardware context to the alternate stack will exceed it's size. If yes, then instead of corrupting user-data and proceeding with the original signal handler, an immediate SIGSEGV signal is delivered. Instead of calling on_sig_stack(), directly check the new stack pointer whether in the bounds. While the kernel allows new source code to discover and use a sufficient alternate signal stack size, this check is still necessary to protect binaries with insufficient alternate signal stack size from data corruption. Suggested-by: Jann Horn Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Reviewed-by: Jann Horn Cc: Andy Lutomirski Cc: Jann Horn Cc: x...@kernel.org Cc: linux-kernel@vger.kernel.org --- Changes from v5: * Fixed the overflow check. (Andy Lutomirski) * Updated the changelog. Changes from v3: * Updated the changelog (Borislav Petkov) Changes from v2: * Simplified the implementation (Jann Horn) --- arch/x86/kernel/signal.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 0d24f64d0145..9a62604fbf63 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -242,7 +242,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long math_size = 0; unsigned long sp = regs->sp; unsigned long buf_fx = 0; - int onsigstack = on_sig_stack(sp); + bool onsigstack = on_sig_stack(sp); int ret; /* redzone */ @@ -251,8 +251,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) + if (sas_ss_flags(sp) == 0) { sp = current->sas_ss_sp + current->sas_ss_size; + /* On the alternate signal stack */ + onsigstack = true; + } } else if (IS_ENABLED(CONFIG_X86_32) && !onsigstack && regs->ss != __USER_DS && @@ -272,7 +275,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, * If we are on the alternate signal stack and would overflow it, don't. * Return an always-bogus address instead so we will die with SIGSEGV. */ - if (onsigstack && !likely(on_sig_stack(sp))) + if (onsigstack && unlikely(sp <= current->sas_ss_sp || + sp - current->sas_ss_sp > current->sas_ss_size)) return (void __user *)-1L; /* save i387 and extended state */ -- 2.17.1
[PATCH v7 2/6] x86/signal: Introduce helpers to get the maximum signal frame size
Signal frames do not have a fixed format and can vary in size when a number of things change: support XSAVE features, 32 vs. 64-bit apps. Add the code to support a runtime method for userspace to dynamically discover how large a signal stack needs to be. Introduce a new variable, max_frame_size, and helper functions for the calculation to be used in a new user interface. Set max_frame_size to a system-wide worst-case value, instead of storing multiple app-specific values. Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Acked-by: H.J. Lu Cc: x...@kernel.org Cc: linux-kernel@vger.kernel.org --- Changes from v2: * Renamed the fpstate size helper with cleanup (Borislav Petkov) * Moved the sigframe struct size defines to where used (Borislav Petkov) * Removed unneeded sentence in the changelog (Borislav Petkov) Change from v1: * Took stack alignment into account for sigframe size (Dave Martin) --- arch/x86/include/asm/fpu/signal.h | 2 ++ arch/x86/include/asm/sigframe.h | 2 ++ arch/x86/kernel/cpu/common.c | 3 ++ arch/x86/kernel/fpu/signal.c | 19 +++ arch/x86/kernel/signal.c | 57 +-- 5 files changed, 81 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 7fb516b6893a..8b6631dffefd 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -29,6 +29,8 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); +unsigned long fpu__get_fpstate_size(void); + extern void fpu__init_prepare_fx_sw_frame(void); #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h index 84eab2724875..5b1ed650b124 100644 --- a/arch/x86/include/asm/sigframe.h +++ b/arch/x86/include/asm/sigframe.h @@ -85,4 +85,6 @@ struct rt_sigframe_x32 { #endif /* CONFIG_X86_64 */ +void __init init_sigframe_size(void); + #endif /* _ASM_X86_SIGFRAME_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ab640abe26b6..c49ef3ad34dc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -58,6 +58,7 @@ #include #include #include +#include #include "cpu.h" @@ -1334,6 +1335,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) fpu__init_system(c); + init_sigframe_size(); + #ifdef CONFIG_X86_32 /* * Regardless of whether PCID is enumerated, the SDM says diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index a4ec65317a7f..dbb304e48f16 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -507,6 +507,25 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, return sp; } + +unsigned long fpu__get_fpstate_size(void) +{ + unsigned long ret = xstate_sigframe_size(); + + /* +* This space is needed on (most) 32-bit kernels, or when a 32-bit +* app is running on a 64-bit kernel. To keep things simple, just +* assume the worst case and always include space for 'freg_state', +* even for 64-bit apps on 64-bit kernels. This wastes a bit of +* space, but keeps the code simple. +*/ + if ((IS_ENABLED(CONFIG_IA32_EMULATION) || +IS_ENABLED(CONFIG_X86_32)) && use_fxsr()) + ret += sizeof(struct fregs_state); + + return ret; +} + /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ea794a083c44..800243afd1ef 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -212,6 +212,11 @@ do { \ * Set up a signal frame. */ +/* x86 ABI requires 16-byte alignment */ +#define FRAME_ALIGNMENT16UL + +#define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1) + /* * Determine which stack to use.. */ @@ -222,9 +227,9 @@ static unsigned long align_sigframe(unsigned long sp) * Align the stack pointer according to the i386 ABI, * i.e. so that on function entry ((sp + 4) & 15) == 0. */ - sp = ((sp + 4) & -16ul) - 4; + sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4; #else /* !CONFIG_X86_32 */ - sp = round_down(sp, 16) - 8; + sp = round_down(sp, FRAME_ALIGNMENT) - 8; #endif return sp; } @@ -663,6 +668,54 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; } +/* + * There are four different struct types for signal frame: sigframe_ia32, + * rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case + * -- the largest size. It means the size for 64-bit apps is a bit more + * than needed, but this keeps the code simple. + */ +#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
Re: [PATCH] Insert SFENCE.VMA in function set_pte_at for RISCV
> As per my understanding, we don't need to explicitly invalidate local TLB > in set_pte() or set_pet_at() because generic Linux page table management > (/mm/*) will call the appropriate flush_tlb_xyz() function after page > table updates. I witnessed this bug in our micro-architecture: set_pte instruction is still in the store buffer, no functions are inserting SFENCE.VMA in the stack below, so TLB cannot witness this modification. Here is my call stack: set_pte set_pte_at map_vm_area __vmalloc_area_node __vmalloc_node_range __vmalloc_node __vmalloc_node_flags vzalloc n_tty_open I think this is an architecture specific code, so /mm/* should not be modified. And spec requires SFENCE.VMA to be inserted on each modification to TLB. So I added code here. > Also, just local TLB flush is generally not sufficient because > a lot of page tables will be used across on multiple HARTs. Yes, this is the biggest issue, in RISC-V Volume 2, Privileged Spec v. 20190608 page 67 gave a solution: Consequently, other harts must be notified separately when the memory-management data structures have been modified. One approach is to use 1) a local data fence to ensure local writes are visible globally, then 2) an interprocessor interrupt to the other thread, then 3) a local SFENCE.VMA in the interrupt handler of the remote thread, and finally 4) signal back to originating thread that operation is complete. This is, of course, the RISC-V analog to a TLB shootdown. In general, this patch didn't handle the G bit in PTE, kernel trap it to sbi_remote_sfence_vma. do you think I should use flush_tlb_all? Jiuyang arch/arm/mm/mmu.c void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { unsigned long ext = 0; if (addr < TASK_SIZE && pte_valid_user(pteval)) { if (!pte_special(pteval)) __sync_icache_dcache(pteval); ext |= PTE_EXT_NG; } set_pte_ext(ptep, pteval, ext); } arch/mips/include/asm/pgtable.h static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { if (!pte_present(pteval)) goto cache_sync_done; if (pte_present(*ptep) && (pte_pfn(*ptep) == pte_pfn(pteval))) goto cache_sync_done; __update_cache(addr, pteval); cache_sync_done: set_pte(ptep, pteval); } Also, just local TLB flush is generally not sufficient because > a lot of page tables will be used accross on multiple HARTs. On Tue, Mar 16, 2021 at 5:05 AM Anup Patel wrote: > > +Alex > > On Tue, Mar 16, 2021 at 9:20 AM Jiuyang Liu wrote: > > > > This patch inserts SFENCE.VMA after modifying PTE based on RISC-V > > specification. > > > > arch/riscv/include/asm/pgtable.h: > > 1. implement pte_user, pte_global and pte_leaf to check correspond > > attribute of a pte_t. > > Adding pte_user(), pte_global(), and pte_leaf() is fine. > > > > > 2. insert SFENCE.VMA in set_pte_at based on RISC-V Volume 2, Privileged > > Spec v. 20190608 page 66 and 67: > > If software modifies a non-leaf PTE, it should execute SFENCE.VMA with > > rs1=x0. If any PTE along the traversal path had its G bit set, rs2 must > > be x0; otherwise, rs2 should be set to the ASID for which the > > translation is being modified. > > If software modifies a leaf PTE, it should execute SFENCE.VMA with rs1 > > set to a virtual address within the page. If any PTE along the traversal > > path had its G bit set, rs2 must be x0; otherwise, rs2 should be set to > > the ASID for which the translation is being modified. > > > > arch/riscv/include/asm/tlbflush.h: > > 1. implement get_current_asid to get current program asid. > > 2. implement local_flush_tlb_asid to flush tlb with asid. > > As per my understanding, we don't need to explicitly invalidate local TLB > in set_pte() or set_pet_at() because generic Linux page table management > (/mm/*) will call the appropriate flush_tlb_xyz() function after page > table updates. Also, just local TLB flush is generally not sufficient because > a lot of page tables will be used accross on multiple HARTs. > > > > > Signed-off-by: Jiuyang Liu > > --- > > arch/riscv/include/asm/pgtable.h | 27 +++ > > arch/riscv/include/asm/tlbflush.h | 12 > > 2 files changed, 39 insertions(+) > > > > diff --git a/arch/riscv/include/asm/pgtable.h > > b/arch/riscv/include/asm/pgtable.h > > index ebf817c1bdf4..5a47c60372c1 100644 > > --- a/arch/riscv/include/asm/pgtable.h > > +++ b/arch/riscv/include/asm/pgtable.h > > @@ -222,6 +222,16 @@ static inline int pte_write(pte_t pte) > > return pte_val(pte) & _PAGE_WRITE; > > } > > > > +static inline int pte_user(pte_t pte) > > +{ > > + return pte_val(pte) & _PAGE_USER; > > +} > > + > > +static inline int pte_global(pte_t pte) > > +{ > > + return pte_val(pte) & _PAGE_GLOBAL; > > +} > > + > > static inlin
[PATCH v7 1/6] uapi: Define the aux vector AT_MINSIGSTKSZ
Define the AT_MINSIGSTKSZ in generic Linux. It is already used as generic ABI in glibc's generic elf.h, and this define will prevent future namespace conflicts. In particular, x86 is also using this generic definition. Signed-off-by: Chang S. Bae Reviewed-by: Len Brown Cc: Carlos O'Donell Cc: Dave Martin Cc: libc-al...@sourceware.org Cc: linux-a...@vger.kernel.org Cc: linux-...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-kernel@vger.kernel.org --- Change from v6: * Revised the comment. (Borislav Petkov) Change from v5: * Reverted the arm64 change. (Dave Martin and Will Deacon) * Massaged the changelog. Change from v4: * Added as a new patch (Carlos O'Donell) --- include/uapi/linux/auxvec.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index abe5f2b6581b..c7e502bf5a6f 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -33,5 +33,8 @@ #define AT_EXECFN 31 /* filename of program */ +#ifndef AT_MINSIGSTKSZ +#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */ +#endif #endif /* _UAPI_LINUX_AUXVEC_H */ -- 2.17.1
[PATCH v7 0/6] x86: Improve Minimum Alternate Stack Size
During signal entry, the kernel pushes data onto the normal userspace stack. On x86, the data pushed onto the user stack includes XSAVE state, which has grown over time as new features and larger registers have been added to the architecture. MINSIGSTKSZ is a constant provided in the kernel signal.h headers and typically distributed in lib-dev(el) packages, e.g. [1]. Its value is compiled into programs and is part of the user/kernel ABI. The MINSIGSTKSZ constant indicates to userspace how much data the kernel expects to push on the user stack, [2][3]. However, this constant is much too small and does not reflect recent additions to the architecture. For instance, when AVX-512 states are in use, the signal frame size can be 3.5KB while MINSIGSTKSZ remains 2KB. The bug report [4] explains this as an ABI issue. The small MINSIGSTKSZ can cause user stack overflow when delivering a signal. In this series, we suggest a couple of things: 1. Provide a variable minimum stack size to userspace, as a similar approach to [5]. 2. Avoid using a too-small alternate stack. Changes from v6 [11]: * Updated and fixed the documentation. (Borislav Petkov) * Revised the AT_MINSIGSTKSZ comment. (Borislav Petkov) Changes form v5 [10]: * Fixed the overflow detection. (Andy Lutomirski) * Reverted the AT_MINSIGSTKSZ removal on arm64. (Dave Martin) * Added a documentation about the x86 AT_MINSIGSTKSZ. * Supported the existing sigaltstack test to use the new aux vector. Changes from v4 [9]: * Moved the aux vector define to the generic header. (Carlos O'Donell) Changes from v3 [8]: * Updated the changelog. (Borislav Petkov) * Revised the test messages again. (Borislav Petkov) Changes from v2 [7]: * Simplified the sigaltstack overflow prevention. (Jann Horn) * Renamed fpstate size helper with cleanup. (Borislav Petkov) * Cleaned up the signframe struct size defines. (Borislav Petkov) * Revised the selftest messages. (Borislav Petkov) * Revised a changelog. (Borislav Petkov) Changes from v1 [6]: * Took stack alignment into account for sigframe size. (Dave Martin) [1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/bits/sigstack.h;h=b9dca794da093dc4d41d39db9851d444e1b54d9b;hb=HEAD [2]: https://www.gnu.org/software/libc/manual/html_node/Signal-Stack.html [3]: https://man7.org/linux/man-pages/man2/sigaltstack.2.html [4]: https://bugzilla.kernel.org/show_bug.cgi?id=153531 [5]: https://blog.linuxplumbersconf.org/2017/ocw/system/presentations/4671/original/plumbers-dm-2017.pdf [6]: https://lore.kernel.org/lkml/20200929205746.6763-1-chang.seok@intel.com/ [7]: https://lore.kernel.org/lkml/20201119190237.626-1-chang.seok@intel.com/ [8]: https://lore.kernel.org/lkml/20201223015312.4882-1-chang.seok@intel.com/ [9]: https://lore.kernel.org/lkml/20210115211038.2072-1-chang.seok@intel.com/ [10]: https://lore.kernel.org/lkml/20210203172242.29644-1-chang.seok@intel.com/ [11]: https://lore.kernel.org/lkml/20210227165911.32757-1-chang.seok@intel.com/ Chang S. Bae (6): uapi: Define the aux vector AT_MINSIGSTKSZ x86/signal: Introduce helpers to get the maximum signal frame size x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ selftest/sigaltstack: Use the AT_MINSIGSTKSZ aux vector if available x86/signal: Detect and prevent an alternate signal stack overflow selftest/x86/signal: Include test cases for validating sigaltstack Documentation/x86/elf_auxvec.rst | 53 + Documentation/x86/index.rst | 1 + arch/x86/include/asm/elf.h| 4 + arch/x86/include/asm/fpu/signal.h | 2 + arch/x86/include/asm/sigframe.h | 2 + arch/x86/include/uapi/asm/auxvec.h| 4 +- arch/x86/kernel/cpu/common.c | 3 + arch/x86/kernel/fpu/signal.c | 19 arch/x86/kernel/signal.c | 72 +++- include/uapi/linux/auxvec.h | 3 + tools/testing/selftests/sigaltstack/sas.c | 20 +++- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sigaltstack.c | 128 ++ 13 files changed, 300 insertions(+), 13 deletions(-) create mode 100644 Documentation/x86/elf_auxvec.rst create mode 100644 tools/testing/selftests/x86/sigaltstack.c base-commit: 1e28eed17697bcf343c6743f0028cc3b5dd88bf0 -- 2.17.1
[PATCH] ALSA: hda/realtek: fix mute/micmute LEDs for HP 840 G8
* The HP EliteBook 840 G8 Notebook PC is using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b47504fa8dfd..e568bf460bae 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8048,6 +8048,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- 2.30.1
[PATCH] power-supply: use kobj_to_dev()
From: dongjian Use kobj_to_dev() instead of open-coding it Signed-off-by: dongjian --- drivers/power/supply/ds2781_battery.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/power/supply/ds2781_battery.c b/drivers/power/supply/ds2781_battery.c index 3df3c82..05b859b 100644 --- a/drivers/power/supply/ds2781_battery.c +++ b/drivers/power/supply/ds2781_battery.c @@ -626,7 +626,7 @@ static ssize_t ds2781_read_param_eeprom_bin(struct file *filp, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct power_supply *psy = to_power_supply(dev); struct ds2781_device_info *dev_info = to_ds2781_device_info(psy); @@ -639,7 +639,7 @@ static ssize_t ds2781_write_param_eeprom_bin(struct file *filp, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct power_supply *psy = to_power_supply(dev); struct ds2781_device_info *dev_info = to_ds2781_device_info(psy); int ret; @@ -671,7 +671,7 @@ static ssize_t ds2781_read_user_eeprom_bin(struct file *filp, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct power_supply *psy = to_power_supply(dev); struct ds2781_device_info *dev_info = to_ds2781_device_info(psy); @@ -685,7 +685,7 @@ static ssize_t ds2781_write_user_eeprom_bin(struct file *filp, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct power_supply *psy = to_power_supply(dev); struct ds2781_device_info *dev_info = to_ds2781_device_info(psy); int ret; -- 1.9.1
Re: [PATCH v1 10/14] mm: multigenerational lru: core
Yu Zhao writes: > On Tue, Mar 16, 2021 at 10:08:51AM +0800, Huang, Ying wrote: >> Yu Zhao writes: >> [snip] >> >> > +/* Main function used by foreground, background and user-triggered aging. >> > */ >> > +static bool walk_mm_list(struct lruvec *lruvec, unsigned long next_seq, >> > + struct scan_control *sc, int swappiness) >> > +{ >> > + bool last; >> > + struct mm_struct *mm = NULL; >> > + int nid = lruvec_pgdat(lruvec)->node_id; >> > + struct mem_cgroup *memcg = lruvec_memcg(lruvec); >> > + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); >> > + >> > + VM_BUG_ON(next_seq > READ_ONCE(lruvec->evictable.max_seq)); >> > + >> > + /* >> > + * For each walk of the mm list of a memcg, we decrement the priority >> > + * of its lruvec. For each walk of memcgs in kswapd, we increment the >> > + * priorities of all lruvecs. >> > + * >> > + * So if this lruvec has a higher priority (smaller value), it means >> > + * other concurrent reclaimers (global or memcg reclaim) have walked >> > + * its mm list. Skip it for this priority to balance the pressure on >> > + * all memcgs. >> > + */ >> > +#ifdef CONFIG_MEMCG >> > + if (!mem_cgroup_disabled() && !cgroup_reclaim(sc) && >> > + sc->priority > atomic_read(&lruvec->evictable.priority)) >> > + return false; >> > +#endif >> > + >> > + do { >> > + last = get_next_mm(lruvec, next_seq, swappiness, &mm); >> > + if (mm) >> > + walk_mm(lruvec, mm, swappiness); >> > + >> > + cond_resched(); >> > + } while (mm); >> >> It appears that we need to scan the whole address space of multiple >> processes in this loop? >> >> If so, I have some concerns about the duration of the function. Do you >> have some number of the distribution of the duration of the function? >> And may be the number of mm_struct and the number of pages scanned. >> >> In comparison, in the traditional LRU algorithm, for each round, only a >> small subset of the whole physical memory is scanned. > > Reasonable concerns, and insightful too. We are sensitive to direct > reclaim latency, and we tuned another path carefully so that direct > reclaims virtually don't hit this path :) > > Some numbers from the cover letter first: > In addition, direct reclaim latency is reduced by 22% at 99th > percentile and the number of refaults is reduced 7%. These metrics are > important to phones and laptops as they are correlated to user > experience. > > And "another path" is the background aging in kswapd: > age_active_anon() > age_lru_gens() > try_walk_mm_list() > /* try to spread pages out across spread+1 generations */ > if (old_and_young[0] >= old_and_young[1] * spread && > min_nr_gens(max_seq, min_seq, swappiness) > max(spread, > MIN_NR_GENS)) > return; > > walk_mm_list(lruvec, max_seq, sc, swappiness); > > By default, spread = 2, which makes kswapd slight more aggressive > than direct reclaim for our use cases. This can be entirely disabled > by setting spread to 0, for worloads that don't care about direct > reclaim latency, or larger values, they are more sensitive than > ours. OK, I see. That can avoid the long latency in direct reclaim path. > It's worth noting that walk_mm_list() is multithreaded -- reclaiming > threads can work on different mm_structs on the same list > concurrently. We do occasionally see this function in direct reclaims, > on over-overcommitted systems, i.e., kswapd CPU usage is 100%. Under > the same condition, we saw the current page reclaim live locked and > triggered hardware watchdog timeouts (our hardware watchdog is set to > 2 hours) many times. Just to confirm, in the current page reclaim, kswapd will keep running until watchdog? This is avoided in your algorithm mainly via multi-threading? Or via direct vs. reversing page table scanning? Best Regards, Huang, Ying
Re: slow boot with 7fef431be9c9 ("mm/page_alloc: place pages to tail in __free_pages_core()")
Hi Leo, On Tue, Mar 16, 2021 at 12:36:29AM +, Liang, Liang (Leo) wrote: > > Hi David, > > Sorry for late. If revert 7fef431be9c9 (without 7fef431be9c9), the dmesg > attached. And looks the exception as below: > [ +0.027833] [0x7800 - 0x783f] 20925 MB/s / 25405 > MB/s > [ +1.363596] [0x0001 - 0x0001003f] 222 MB/s / 222 MB/s > [ +1.562192] [0x00010040 - 0x0001007f] 222 MB/s / 222 MB/s > [ +1.881332] [0x00010080 - 0x000100bf] 195 MB/s / 159 MB/s > [ +1.383388] [0x000100c0 - 0x000100ff] 219 MB/s / 221 MB/s > [ +0.029342] [0x00010100 - 0x0001013f] 19807 MB/s / 24125 > MB/s > > What is the problem here? Do you want to check the acpi tables? As it seems the first 16M at 0x0001 are two orders of magnitude slower than the rest of the memory as if there is a different memory device there. This would explain why with 7fef431be9c9 everything gets slower as we allocate the first (and probably quite critical) data from those 16M. No idea how this could be related to ACPI and why ACPI initialization causes the huge slowdown on its own. Can you please try booting with 7fef431be9c9 still applied and with this patch (not even compile tested): diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d883176ef2ce..780f11ca14c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -778,6 +778,7 @@ void __init setup_arch(char **cmdline_p) * L1TF its contents can be leaked to user processes. */ memblock_reserve(0, PAGE_SIZE); + memblock_reserve(0x0001, SZ_16M); early_reserve_initrd(); > BRs, > Leo > -Original Message- > From: David Hildenbrand > Sent: Monday, March 15, 2021 9:04 PM > To: Mike Rapoport > Cc: Liang, Liang (Leo) ; Deucher, Alexander > ; linux-kernel@vger.kernel.org; amd-gfx list > ; Andrew Morton ; > Huang, Ray ; Koenig, Christian ; > Rafael J. Wysocki ; George Kennedy > > Subject: Re: slow boot with 7fef431be9c9 ("mm/page_alloc: place pages to tail > in __free_pages_core()") > > On 13.03.21 14:48, Mike Rapoport wrote: > > Hi, > > > > On Sat, Mar 13, 2021 at 10:05:23AM +0100, David Hildenbrand wrote: > >>> Am 13.03.2021 um 05:04 schrieb Liang, Liang (Leo) : > >>> > >>> Hi David, > >>> > >>> Which benchmark tool you prefer? Memtest86+ or else? > >> > >> Hi Leo, > >> > >> I think you want something that runs under Linux natively. > >> > >> I'm planning on coding up a kernel module to walk all 4MB pages in > >> the freelists and perform a stream benchmark individually. Then we > >> might be able to identify the problematic range - if there is a > >> problematic range :) > > > > My wild guess would be that the pages that are now at the head of free > > lists have wrong caching enabled. Might be worth checking in your test > > module. > > I hacked something up real quick: > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fdavidhildenbrand%2Fkstream&data=04%7C01%7Cliang.liang%40amd.com%7C61fb103eeb7647f5228408d8e7b2d7d3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637514102622932303%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=ufUYQRtdSHvEkR61LiJZtsVdYZbtdGbKlzZHOQdct78%3D&reserved=0 > > Only briefly tested inside a VM. The output looks something like > > [...] > [ 8396.432225] [0x4580 - 0x45bf] 25322 MB/s / > 38948 MB/s > [ 8396.448749] [0x45c0 - 0x45ff] 24481 MB/s / > 38946 MB/s > [ 8396.465197] [0x4600 - 0x463f] 24892 MB/s / > 39170 MB/s > [ 8396.481552] [0x4640 - 0x467f] 25222 MB/s / > 39156 MB/s > [ 8396.498012] [0x4680 - 0x46bf] 24416 MB/s / > 39159 MB/s > [ 8396.514397] [0x46c0 - 0x46ff] 25469 MB/s / > 38940 MB/s > [ 8396.530849] [0x4700 - 0x473f] 24885 MB/s / > 38734 MB/s > [ 8396.547195] [0x4740 - 0x477f] 25458 MB/s / > 38941 MB/s > [...] > > The benchmark allocates one 4 MiB chunk at a time and runs a simplified > STREAM benchmark a) without flushing caches b) flushing caches before every > memory access. > > It would be great if you could run that with the *old behavior* kernel (IOW, > without 7fef431be9c9), so we might still be lucky to catch the problematic > area in the freelist. > > Let's see if that will indicate anything. > > -- > Thanks, > > David / dhildenb -- Sincerely yours, Mike.
Re: [PATCH v2 5/5] mm/hugetlb: avoid calculating fault_mutex_hash in truncate_op case
On 2021/3/16 11:07, Mike Kravetz wrote: > On 3/15/21 7:27 PM, Miaohe Lin wrote: >> The fault_mutex hashing overhead can be avoided in truncate_op case >> because page faults can not race with truncation in this routine. So >> calculate hash for fault_mutex only in !truncate_op case to save some cpu >> cycles. >> >> Reviewed-by: Mike Kravetz >> Signed-off-by: Miaohe Lin >> Cc: Mike Kravetz >> --- >> v1->v2: >> remove unnecessary initialization for variable hash >> collect Reviewed-by tag from Mike Kravetz > > My apologies for not replying sooner and any misunderstanding from my > previous comments. > That's all right. > If the compiler is going to produce a warning because the variable is > not initialized, then we will need to keep the initialization. > Otherwise, this will show up as a build regression. Ideally, there > would be a modifier which could be used to tell the compiler the > variable will used. I do not know if such a modifier exists. > I do not know if such a modifier exists too. But maybe not all compilers are intelligent enough to not produce a warning. It would be safe to keep the initialization... > The patch can not produce a new warning. So, if you need to initialize So just drop this version of the patch? Or should I send a new version with your Reviewed-by tag and keep the initialization? > the variable then do it. My Reviewed-by still applies. >
Re: [PATCH v2] staging: rtl8192u: remove extra lines
On Tue, Mar 16, 2021 at 10:44:10AM +0800, zhaoxiao wrote: > Remove extra lines in many functions in r8192U_wx.c. > > Signed-off-by: zhaoxiao > --- > drivers/staging/rtl8192u/r8192U_wx.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) The commit message says you're removing lines but you're also adding them. :P regards, dan carpenter
[syzbot] general protection fault in scatterwalk_copychunks (4)
Hello, syzbot found the following issue on: HEAD commit:47142ed6 net: dsa: bcm_sf2: Qualify phydev->dev_flags base.. git tree: net console output: https://syzkaller.appspot.com/x/log.txt?x=17fb9376d0 kernel config: https://syzkaller.appspot.com/x/.config?x=eec733599e95cd87 dashboard link: https://syzkaller.appspot.com/bug?extid=66e3ea42c4b176748b9c Unfortunately, I don't have any reproducer for this issue yet. IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+66e3ea42c4b176748...@syzkaller.appspotmail.com general protection fault, probably for non-canonical address 0xdc01: [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0008-0x000f] CPU: 1 PID: 25 Comm: kworker/u4:1 Not tainted 5.12.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: pencrypt_parallel padata_parallel_worker RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:68 [inline] RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:93 [inline] RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:77 [inline] RIP: 0010:scatterwalk_copychunks+0x4db/0x6a0 crypto/scatterwalk.c:50 Code: ff df 80 3c 02 00 0f 85 b4 01 00 00 49 8d 44 24 08 4d 89 26 48 89 c2 48 89 44 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 77 01 00 00 48 b8 00 00 00 00 RSP: 0018:c9dff620 EFLAGS: 00010202 RAX: dc00 RBX: RCX: RDX: 0001 RSI: 83c45a33 RDI: 0003 RBP: R08: R09: 88801ba09d1b R10: 83c459e3 R11: d9e6 R12: R13: 0001 R14: c9dff880 R15: FS: () GS:8880b9d0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 00540198 CR3: 18d08000 CR4: 001506e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 Call Trace: skcipher_next_slow crypto/skcipher.c:278 [inline] skcipher_walk_next+0x7af/0x1680 crypto/skcipher.c:363 skcipher_walk_first+0xf8/0x3c0 crypto/skcipher.c:446 skcipher_walk_aead_common+0x7a5/0xbc0 crypto/skcipher.c:539 gcmaes_crypt_by_sg+0x323/0x8a0 arch/x86/crypto/aesni-intel_glue.c:658 Modules linked in: ---[ end trace 15593fd836276143 ]--- RIP: 0010:scatterwalk_start include/crypto/scatterwalk.h:68 [inline] RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:93 [inline] RIP: 0010:scatterwalk_pagedone include/crypto/scatterwalk.h:77 [inline] RIP: 0010:scatterwalk_copychunks+0x4db/0x6a0 crypto/scatterwalk.c:50 Code: ff df 80 3c 02 00 0f 85 b4 01 00 00 49 8d 44 24 08 4d 89 26 48 89 c2 48 89 44 24 18 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 77 01 00 00 48 b8 00 00 00 00 RSP: 0018:c9dff620 EFLAGS: 00010202 RAX: dc00 RBX: RCX: RDX: 0001 RSI: 83c45a33 RDI: 0003 RBP: R08: R09: 88801ba09d1b R10: 83c459e3 R11: d9e6 R12: R13: 0001 R14: c9dff880 R15: FS: () GS:8880b9d0() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 00540198 CR3: 18d08000 CR4: 001506e0 DR0: DR1: DR2: DR3: DR6: fffe0ff0 DR7: 0400 --- This report is generated by a bot. It may contain errors. See https://goo.gl/tpsmEJ for more information about syzbot. syzbot engineers can be reached at syzkal...@googlegroups.com. syzbot will keep track of this issue. See: https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
Re: [PATCH v1 09/14] mm: multigenerational lru: mm_struct list
Yu Zhao writes: > On Tue, Mar 16, 2021 at 10:07:36AM +0800, Huang, Ying wrote: >> Rik van Riel writes: >> >> > On Sat, 2021-03-13 at 00:57 -0700, Yu Zhao wrote: >> > >> >> +/* >> >> + * After pages are faulted in, they become the youngest generation. >> >> They must >> >> + * go through aging process twice before they can be evicted. After >> >> first scan, >> >> + * their accessed bit set during initial faults are cleared and they >> >> become the >> >> + * second youngest generation. And second scan makes sure they >> >> haven't been used >> >> + * since the first. >> >> + */ >> > >> > I have to wonder if the reductions in OOM kills and >> > low-memory tab discards is due to this aging policy >> > change, rather than from the switch to virtual scanning. > > There are no policy changes per se. The current page reclaim also > scans a faulted-in page at least twice before it can reclaim it. > That said, the new aging yields a better overall result because it > discovers every page that has been referenced since the last scan, > in addition to what Ying has mentioned. The current page scan stops > stops once it finds enough candidates, which may seem more > efficiently, but actually pays the price for not finding the best. > >> If my understanding were correct, the temperature of the processes is >> considered in addition to that of the individual pages. That is, the >> pages of the processes that haven't been scheduled after the previous >> scanning will not be scanned. I guess that this helps OOM kills? > > Yes, that's correct. > >> If so, how about just take advantage of that information for OOM killing >> and page reclaiming? For example, if a process hasn't been scheduled >> for long time, just reclaim its private pages. > > This is how it works. Pages that haven't been scanned grow older > automatically because those that have been scanned will be tagged with > younger generation numbers. Eviction does bucket sort based on > generation numbers and attacks the oldest. Sorry, my original words are misleading. What I wanted to say was that is it good enough that - Do not change the core algorithm of current page reclaiming. - Add some new logic to reclaim the process private pages regardless of the Accessed bits if the processes are not scheduled for some long enough time. This can be done before the normal page reclaiming. So this is an one small step improvement to the current page reclaiming algorithm via taking advantage of the scheduler information. It's clearly not sophisticated as your new algorithm, for example, the cold pages in the hot processes will not be reclaimed in this stage. But it can reduce the overhead of scanning too. All in all, some of your ideas may help the original LRU algorithm too. Or some can be experimented without replacing the original algorithm. But from another point of view, your solution can be seen as a kind of improvement on top of the original LRU algorithm too. It moves the recently accessed pages to kind of multiple active lists based on scanning page tables directly (instead of reversely). Best Regards, Huang, Ying
RE: [EXT] [PATCH 0/2] enable flexspi support on imx8mp
Hi Heiko, > -Original Message- > From: Heiko Schocher > Sent: Tuesday, March 16, 2021 10:34 AM > To: linux-...@vger.kernel.org > Cc: Heiko Schocher ; linux-arm-ker...@lists.infradead.org; > Ashish Kumar ; Kuldeep Singh > ; Mark Brown ; Rob Herring > ; Shawn Guo ; Yogesh Gaur > ; devicet...@vger.kernel.org; linux- > ker...@vger.kernel.org > Subject: [EXT] [PATCH 0/2] enable flexspi support on imx8mp > > Caution: EXT Email > > add compatible entry in nxp_fspi driver for imx8mp > > new in v3: > seperate spi changes from series: > https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.inf > radead.org%2Fpipermail%2Flinux-arm-kernel%2F2021- > March%2F643289.html&data=04%7C01%7Ckuldeep.singh%40nxp.com% > 7C5da0c3da3dbe410baaf508d8e83903f4%7C686ea1d3bc2b4c6fa92cd99c5c3 > 01635%7C0%7C0%7C637514678868305498%7CUnknown%7CTWFpbGZsb3d8 > eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3 > D%7C1000&sdata=2uy0EKUh4Nt0BceSQbIkCZDfakid3wx5uwebw0DhEIQ > %3D&reserved=0 > > into own series as Kuldeep suggested and rebased against > git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next > 144c79ef33536 ("Merge tag 'perf-tools-fixes-for-v5.12-2020-03-07' of > git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux") The changes are not on on top of spi tree git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next and therefore may not apply seamlessly. I recently added driver support for imx8dxl which is accepted in spi tree And these patches will cause conflict with it. Kindly rebase these patches on top of the tree. Regards Kuldeep
Re: [PATCH] mm,hwpoison: return -EBUSY when page already poisoned
On Fri, Mar 12, 2021 at 11:48:31PM +, Luck, Tony wrote: > >> will memory_failure() find it and unmap it? if succeed, then the current > >> will be > >> signaled with correct vaddr and shift? > > > > That's a very good question. I didn't see a SIGBUS when I first wrote this > > code, > > hence all the p->mce_vaddr. But now I'm > > a) not sure why there wasn't a signal We have a recent change around this behavior, which might be an answer for you. See commit 30c9cf492704 ("mm,hwpoison: send SIGBUS to PF_MCE_EARLY processes on action required events"). > > b) if we are to fix the problems noted by AndyL, need to make sure that > > there isn't a SIGBUS > > Tests on upstream kernel today show that memory_failure() is both unmapping > the page > and sending a SIGBUS. Sorry if I misunderstood the exact problem, but if the point is to allow user processes to find poisoned virtual address without SIGBUS, one possible way is to expose hwpoison entry via /proc/pid/pagemap (attached a draft patch below). With this patch, processes easily find hwpoison entries without any new interface. > > My biggest issue with the KERNEL_COPYIN recovery path is that we don't have > code > to mark the page not present while we are still in do_machine_check(). It sounds to me that even if we have such code, it just narrows down the race window between multiple MCEs on different CPUs. Or does it completely prevent the race? (Another thread could touch the poisoned page just before the first thread marks the page non-present?) > That's resulted > in recovery working for simple cases where there is a single get_user() call > followed by > an error return if that failed. But more complex cases require more machine > checks and > a touching faith that the kernel will eventually give up trying (spoiler: it > sometimes doesn't). > > Thanks to the decode of the instruction we do have the virtual address. So we > just need > a safe walk of pgd->p4d->pud->pmd->pte (truncated if we hit a huge page) with > a write > of a "not-present" value. Maybe a different poison type from the one we get > from > memory_failure() so that the #PF code can recognize this as a special case > and do any > other work that we avoided because we were in #MC context. As you answered in another email, p->mce_vaddr is set only on KERNEL_COPYIN case, then if p->mce_vaddr is available also for generic SRAR MCE (I'm assuming that we are talking about issues on race between generic SRAR MCE not only for KERNEL_COPYIN case), that might be helpful, although it might be hard to implement. And I'm afraid that walking page table could find the wrong virtual address if a process have multiple entries to the single page. We could send multiple SIGBUSs for such case, but maybe that's not an optimal solution. Thanks, Naoya Horiguchi >From 147449a97e2ea3420ac3523f13523f5d30a13614 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 16 Mar 2021 14:22:21 +0900 Subject: [PATCH] pagemap: expose hwpoison entry not-signed-off-by-yet: Naoya Horiguchi --- fs/proc/task_mmu.c | 6 ++ include/linux/swapops.h | 12 tools/vm/page-types.c | 5 - 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 602e3a52884d..08cea209bae7 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1300,6 +1300,7 @@ struct pagemapread { #define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) +#define PM_HWPOISONBIT_ULL(60) #define PM_FILEBIT_ULL(61) #define PM_SWAPBIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -1385,6 +1386,11 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); + if (is_hwpoison_entry(entry)) { + page = hwpoison_entry_to_page(entry); + flags |= PM_HWPOISON; + } + if (is_device_private_entry(entry)) page = device_private_entry_to_page(entry); } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index d9b7c9132c2f..1b9dedbd06ab 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -323,6 +323,13 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } +static inline struct page *hwpoison_entry_to_page(swp_entry_t entry) +{ + struct page *p = pfn_to_page(swp_offset(entry)); + WARN_ON(!PageHWPoison(p)); + return p; +} + static inline void num_poisoned_pages_inc(void) { atomic_long_inc(&num_poisoned_pages); @@ -345,6 +352,11 @@ static inline int is_hwpoison_entry(swp_entry_t swp) return 0; } +static inline struct page *hwpoison_
Re: [PATCH] KCOV: Introduced tracing unique covered PCs
On Mon, Mar 15, 2021 at 10:43 PM Alexander Lochmann wrote: > On 15.03.21 09:02, Dmitry Vyukov wrote: > static notrace unsigned long canonicalize_ip(unsigned long ip) > @@ -191,18 +192,26 @@ void notrace __sanitizer_cov_trace_pc(void) > struct task_struct *t; > unsigned long *area; > unsigned long ip = canonicalize_ip(_RET_IP_); > - unsigned long pos; > + unsigned long pos, idx; > > t = current; > - if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) > + if (!check_kcov_mode(KCOV_MODE_TRACE_PC | KCOV_MODE_UNIQUE_PC, > t)) > return; > > area = t->kcov_area; > - /* The first 64-bit word is the number of subsequent PCs. */ > - pos = READ_ONCE(area[0]) + 1; > - if (likely(pos < t->kcov_size)) { > - area[pos] = ip; > - WRITE_ONCE(area[0], pos); > + if (likely(t->kcov_mode == KCOV_MODE_TRACE_PC)) { > >>> > >>> Does this introduce an additional real of t->kcov_mode? > >>> If yes, please reuse the value read in check_kcov_mode. > >> Okay. How do I get that value from check_kcov_mode() to the caller? > >> Shall I add an additional parameter to check_kcov_mode()? > > > > Yes, I would try to add an additional pointer parameter for mode. I > > think after inlining the compiler should be able to regestrize it. > > > Should kcov->mode be written directly to that ptr? > Otherwise, it must be written to the already present variable mode, and > than copied to the ptr (if not NULL). I would expect that after inlining it won't make difference in generated code. Is so, both options are fine. Whatever leads to a cleaner code.
Re: [PATCH] smp: kernel/panic.c - silence warnings
Thanks for your reply. 在 2021/3/16 13:15, Randy Dunlap 写道: On 3/15/21 9:08 PM, He Ying wrote: We found these warnings in kernel/panic.c by using sparse tool: warning: symbol 'panic_smp_self_stop' was not declared. warning: symbol 'nmi_panic_self_stop' was not declared. warning: symbol 'crash_smp_send_stop' was not declared. To avoid them, add declarations for these three functions in include/linux/smp.h. Reported-by: Hulk Robot Signed-off-by: He Ying --- include/linux/smp.h | 8 1 file changed, 8 insertions(+) diff --git a/include/linux/smp.h b/include/linux/smp.h index 70c6f6284dcf..861a253cc179 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -50,6 +50,14 @@ extern unsigned int total_cpus; int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait); +/* + * Cpus stopping functions in panic. All have default weak definations. definitions. Sorry for my typo. + * Architecure dependent code may override them. Architecture-dependent Is that necessary? + */ +void panic_smp_self_stop(void); +void nmi_panic_self_stop(struct pt_regs *regs); +void crash_smp_send_stop(void); + /* * Call a function on all processors */
Re: [PATCH 4.14 00/95] 4.14.226-rc1 review
On 2021/3/15 21:56, gre...@linuxfoundation.org wrote: From: Greg Kroah-Hartman This is the start of the stable review cycle for the 4.14.226 release. There are 95 patches in this series, all will be posted as a response to this one. If anyone has any issues with these being applied, please let me know. Responses should be made by Wed, 17 Mar 2021 13:57:24 +. Anything received after that time might be too late. The whole patch series can be found in one patch at: https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.226-rc1.gz or in the git tree and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.14.y and the diffstat can be found below. thanks, greg k-h Tested on x86 for 4.14.226-rc1, Kernel repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git Branch: linux-4.14.y Version: 4.14.226-rc1 Commit: 57cc62fb2d2b8e81c02cb9197e303c7782dee4cd Compiler: gcc version 7.3.0 (GCC) x86 (No kernel failures) Testcase Result Summary: total_num: 4728 succeed_num: 4727 failed_num: 1 timeout_num: 0 Tested-by: Hulk Robot
Re: [PATCH v2] task_work: kasan: record task_work_add() call stack
On Tue, Mar 16, 2021 at 3:44 AM Walter Wu wrote: > > Why record task_work_add() call stack? > Syzbot reports many use-after-free issues for task_work, see [1]. > After see the free stack and the current auxiliary stack, we think > they are useless, we don't know where register the work, this work > may be the free call stack, so that we miss the root cause and > don't solve the use-after-free. > > Add task_work_add() call stack into KASAN auxiliary stack in > order to improve KASAN report. It is useful for programmers > to solve use-after-free issues. > > [1]: > https://groups.google.com/g/syzkaller-bugs/search?q=kasan%20use-after-free%20task_work_run > > Signed-off-by: Walter Wu > Suggested-by: Dmitry Vyukov > Cc: Andrey Konovalov > Cc: Andrey Ryabinin > Cc: Dmitry Vyukov > Cc: Alexander Potapenko > Cc: Andrew Morton > Cc: Matthias Brugger > Cc: Jens Axboe > Cc: Oleg Nesterov > --- > > v2: Fix kasan_record_aux_stack() calling sequence issue. > Thanks for Dmitry's suggestion Reviewed-by: Dmitry Vyukov > --- > kernel/task_work.c | 3 +++ > mm/kasan/kasan.h | 2 +- > 2 files changed, 4 insertions(+), 1 deletion(-) > > diff --git a/kernel/task_work.c b/kernel/task_work.c > index 9cde961875c0..3d4852891fa8 100644 > --- a/kernel/task_work.c > +++ b/kernel/task_work.c > @@ -34,6 +34,9 @@ int task_work_add(struct task_struct *task, struct > callback_head *work, > { > struct callback_head *head; > > + /* record the work call stack in order to print it in KASAN reports */ > + kasan_record_aux_stack(work); > + > do { > head = READ_ONCE(task->task_works); > if (unlikely(head == &work_exited)) > diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h > index 3436c6bf7c0c..e4629a971a3c 100644 > --- a/mm/kasan/kasan.h > +++ b/mm/kasan/kasan.h > @@ -146,7 +146,7 @@ struct kasan_alloc_meta { > struct kasan_track alloc_track; > #ifdef CONFIG_KASAN_GENERIC > /* > -* call_rcu() call stack is stored into struct kasan_alloc_meta. > +* The auxiliary stack is stored into struct kasan_alloc_meta. > * The free stack is stored into struct kasan_free_meta. > */ > depot_stack_handle_t aux_stack[2]; > -- > 2.18.0 > > -- > You received this message because you are subscribed to the Google Groups > "kasan-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to kasan-dev+unsubscr...@googlegroups.com. > To view this discussion on the web visit > https://groups.google.com/d/msgid/kasan-dev/20210316024410.19967-1-walter-zh.wu%40mediatek.com.
Re: [syzbot] KASAN: invalid-free in sg_finish_scsi_blk_rq
On 2021-03-15 9:59 p.m., syzbot wrote: Hello, syzbot found the following issue on: HEAD commit:d98f554b Add linux-next specific files for 20210312 git tree: linux-next console output: https://syzkaller.appspot.com/x/log.txt?x=1189318ad0 kernel config: https://syzkaller.appspot.com/x/.config?x=e362835d2e58cef6 dashboard link: https://syzkaller.appspot.com/bug?extid=0a0e8ecea895d38332e6 Unfortunately, I don't have any reproducer for this issue yet. No need, I think I can see how it happens. A particular type of resource error from the block layer, together with a 32 byte (or larger) SCSI command. I'm testing a patch. Doug Gilbert IMPORTANT: if you fix the issue, please add the following tag to the commit: Reported-by: syzbot+0a0e8ecea895d3833...@syzkaller.appspotmail.com == BUG: KASAN: double-free or invalid-free in slab_free mm/slub.c:3161 [inline] BUG: KASAN: double-free or invalid-free in kfree+0xe5/0x7f0 mm/slub.c:4215 CPU: 0 PID: 10481 Comm: syz-executor.5 Not tainted 5.12.0-rc2-next-20210312-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x141/0x1d7 lib/dump_stack.c:120 print_address_description.constprop.0.cold+0x5b/0x2f8 mm/kasan/report.c:232 kasan_report_invalid_free+0x51/0x80 mm/kasan/report.c:357 kasan_slab_free mm/kasan/common.c:340 [inline] __kasan_slab_free+0x118/0x130 mm/kasan/common.c:367 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1562 [inline] slab_free_freelist_hook+0x92/0x210 mm/slub.c:1600 slab_free mm/slub.c:3161 [inline] kfree+0xe5/0x7f0 mm/slub.c:4215 scsi_req_free_cmd include/scsi/scsi_request.h:28 [inline] sg_finish_scsi_blk_rq+0x690/0x810 drivers/scsi/sg.c:3224 sg_common_write+0xa07/0xe70 drivers/scsi/sg.c:1132 sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797 sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline] sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014 sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x465f69 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:7f8413efa188 EFLAGS: 0246 ORIG_RAX: 0010 RAX: ffda RBX: 0056bf60 RCX: 00465f69 RDX: 20001780 RSI: 2285 RDI: 0003 RBP: 004bfa8f R08: R09: R10: R11: 0246 R12: 0056bf60 R13: 7ffe20e16e2f R14: 7f8413efa300 R15: 00022000 Allocated by task 10481: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:46 [inline] set_alloc_info mm/kasan/common.c:427 [inline] kasan_kmalloc mm/kasan/common.c:506 [inline] kasan_kmalloc mm/kasan/common.c:465 [inline] __kasan_kmalloc+0x99/0xc0 mm/kasan/common.c:515 kmalloc include/linux/slab.h:561 [inline] kzalloc include/linux/slab.h:686 [inline] sg_start_req+0x16f/0x24e0 drivers/scsi/sg.c:3044 sg_common_write+0x5fd/0xe70 drivers/scsi/sg.c:1109 sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797 sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline] sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014 sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xae Freed by task 10481: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:38 kasan_set_track+0x1c/0x30 mm/kasan/common.c:46 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:357 kasan_slab_free mm/kasan/common.c:360 [inline] kasan_slab_free mm/kasan/common.c:325 [inline] __kasan_slab_free+0xf5/0x130 mm/kasan/common.c:367 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1562 [inline] slab_free_freelist_hook+0x92/0x210 mm/slub.c:1600 slab_free mm/slub.c:3161 [inline] kfree+0xe5/0x7f0 mm/slub.c:4215 sg_start_req+0x1b33/0x24e0 drivers/scsi/sg.c:3106 sg_common_write+0x5fd/0xe70 drivers/scsi/sg.c:1109 sg_v3_submit+0x3b1/0x530 drivers/scsi/sg.c:797 sg_ctl_sg_io drivers/scsi/sg.c:1785 [inline] sg_ioctl_common+0x3c86/0x97f0 drivers/scsi/sg.c:2014 sg_ioctl+0x7c/0x110 drivers/scsi/sg.c:2229 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:7
Re: [PATCH -tip v2 00/10] kprobes: Fix stacktrace with kretprobes
On Mon, 15 Mar 2021 21:30:03 -0500 Josh Poimboeuf wrote: > On Fri, Mar 12, 2021 at 03:41:44PM +0900, Masami Hiramatsu wrote: > > Hello, > > > > Here is the 2nd version of the series to fix the stacktrace with kretprobe. > > > > The 1st series is here; > > > > https://lore.kernel.org/bpf/161495873696.346821.10161501768906432924.stgit@devnote2/ > > > > In this version I merged the ORC unwinder fix for kretprobe which discussed > > in the > > previous thread. [3/10] is updated according to the Miroslav's comment. > > [4/10] is > > updated for simplify the code. [5/10]-[9/10] are discussed in the previsous > > tread > > and are introduced to the series. > > > > Daniel, can you also test this again? I and Josh discussed a bit different > > method and I've implemented it on this version. > > > > This actually changes the kretprobe behavisor a bit, now the instraction > > pointer in > > the pt_regs passed to kretprobe user handler is correctly set the real > > return > > address. So user handlers can get it via instruction_pointer() API. > > When I add WARN_ON(1) to a test kretprobe, it doesn't unwind properly. > > show_trace_log_lvl() reads the entire stack in lockstep with calls to > the unwinder so that it can decide which addresses get prefixed with > '?'. So it expects to find an actual return address on the stack. > Instead it finds %rsp. So it never syncs up with unwind_next_frame() > and shows all remaining addresses as unreliable. > > Call Trace: >__kretprobe_trampoline_handler+0xca/0x1a0 >trampoline_handler+0x3d/0x50 >kretprobe_trampoline+0x25/0x50 >? init_test_probes.cold+0x323/0x387 >? init_kprobes+0x144/0x14c >? init_optprobes+0x15/0x15 >? do_one_initcall+0x5b/0x300 >? lock_is_held_type+0xe8/0x140 >? kernel_init_freeable+0x174/0x2cd >? rest_init+0x233/0x233 >? kernel_init+0xa/0x11d >? ret_from_fork+0x22/0x30 > > How about pushing 'kretprobe_trampoline' instead of %rsp for the return > address placeholder. That fixes the above test, and removes the need > for the weird 'state->ip == sp' check: > > Call Trace: >__kretprobe_trampoline_handler+0xca/0x150 >trampoline_handler+0x3d/0x50 >kretprobe_trampoline+0x29/0x50 >? init_test_probes.cold+0x323/0x387 >elfcorehdr_read+0x10/0x10 >init_kprobes+0x144/0x14c >? init_optprobes+0x15/0x15 >do_one_initcall+0x72/0x280 >kernel_init_freeable+0x174/0x2cd >? rest_init+0x122/0x122 >kernel_init+0xa/0x10e >ret_from_fork+0x22/0x30 > > Though, init_test_probes.cold() (the real return address) is still > listed as unreliable. Maybe we need a call to kretprobe_find_ret_addr() > in show_trace_log_lvl(), similar to the ftrace_graph_ret_addr() call > there. Thanks for the test! OK, that could be acceptable. However, push %sp still needed for accessing stack address from kretprobe handler via pt_regs. (regs->sp must point the stack address) Anyway, with int3, it pushes one more entry for emulating call, so I think it is OK. Let me update the series! Thank you! > > > > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c > index 06f33bfebc50..70188fdd288e 100644 > --- a/arch/x86/kernel/kprobes/core.c > +++ b/arch/x86/kernel/kprobes/core.c > @@ -766,19 +766,19 @@ asm( > "kretprobe_trampoline:\n" > /* We don't bother saving the ss register */ > #ifdef CONFIG_X86_64 > - " pushq %rsp\n" > + /* Push fake return address to tell the unwinder it's a kretprobe */ > + " pushq $kretprobe_trampoline\n" > UNWIND_HINT_FUNC > " pushfq\n" > SAVE_REGS_STRING > " movq %rsp, %rdi\n" > " call trampoline_handler\n" > - /* Replace saved sp with true return address. */ > + /* Replace the fake return address with the real one. */ > " movq %rax, 19*8(%rsp)\n" > RESTORE_REGS_STRING > " popfq\n" > #else > " pushl %esp\n" > - UNWIND_HINT_FUNC > " pushfl\n" > SAVE_REGS_STRING > " movl %esp, %eax\n" > diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c > index 1d1b9388a1b1..1d3de84d2410 100644 > --- a/arch/x86/kernel/unwind_orc.c > +++ b/arch/x86/kernel/unwind_orc.c > @@ -548,8 +548,7 @@ bool unwind_next_frame(struct unwind_state *state) >* In those cases, find the correct return address from >* task->kretprobe_instances list. >*/ > - if (state->ip == sp || > - is_kretprobe_trampoline(state->ip)) > + if (is_kretprobe_trampoline(state->ip)) > state->ip = kretprobe_find_ret_addr(state->task, > &state->kr_iter); > > > -- Masami Hiramatsu
Re: [mm, net-next v2] mm: net: memcg accounting for TCP rx zerocopy
On Mon, Mar 15, 2021 at 11:22 PM Arjun Roy wrote: > > On Mon, Mar 15, 2021 at 9:29 PM Shakeel Butt wrote: > > > > On Mon, Mar 15, 2021 at 9:20 PM Arjun Roy wrote: > > > > > [...] > > > > > > > > > > Apologies for the spam - looks like I forgot to rebase the first time > > > I sent this out. > > > > > > Actually, on a related note, it's not 100% clear to me whether this > > > patch (which in its current form, applies to net-next) should instead > > > be based on the mm branch - but the most recent (clean) checkout of mm > > > fails to build for me so net-next for now. > > > > > > > It is due to "mm: page-writeback: simplify memcg handling in > > test_clear_page_writeback()" patch in the mm tree. You would need to > > reintroduce the lock_page_memcg() which returns the memcg and make > > __unlock_page_memcg() non-static. > > To clarify, Shakeel - the tag "tag: v5.12-rc2-mmots-2021-03-11-21-49" > fails to build on a clean checkout, without this patch, due to a > compilation failure in mm/shmem.c, for reference: > https://pastebin.com/raw/12eSGdGD > (and that's why I'm basing this patch off of net-next in this email). > > -Arjun Another seeming anomaly - the patch sent out passes scripts/checkpatch.pl but netdev/checkpatch finds plenty of actionable fixes here: https://patchwork.kernel.org/project/netdevbpf/patch/20210316041645.144249-1-arjunroy.k...@gmail.com/ Is netdev using some other automated checker instead of scripts/checkpatch.pl? -Arjun
Re: [mm, net-next v2] mm: net: memcg accounting for TCP rx zerocopy
On Mon, Mar 15, 2021 at 9:29 PM Shakeel Butt wrote: > > On Mon, Mar 15, 2021 at 9:20 PM Arjun Roy wrote: > > > [...] > > > > > > > Apologies for the spam - looks like I forgot to rebase the first time > > I sent this out. > > > > Actually, on a related note, it's not 100% clear to me whether this > > patch (which in its current form, applies to net-next) should instead > > be based on the mm branch - but the most recent (clean) checkout of mm > > fails to build for me so net-next for now. > > > > It is due to "mm: page-writeback: simplify memcg handling in > test_clear_page_writeback()" patch in the mm tree. You would need to > reintroduce the lock_page_memcg() which returns the memcg and make > __unlock_page_memcg() non-static. To clarify, Shakeel - the tag "tag: v5.12-rc2-mmots-2021-03-11-21-49" fails to build on a clean checkout, without this patch, due to a compilation failure in mm/shmem.c, for reference: https://pastebin.com/raw/12eSGdGD (and that's why I'm basing this patch off of net-next in this email). -Arjun
Re: [PATCH v3] bus: mhi: core: Check state before processing power_down
On Wed, Mar 10, 2021 at 01:49:25PM -0700, Jeffrey Hugo wrote: > We cannot process a power_down if the power state is DISABLED. There is > no valid mhi_ctxt in that case, so attepting to process the power_down > will likely result in a null pointer dereference. If the power state is > DISABLED, there is nothing to do anyways, so just bail early. > > Signed-off-by: Jeffrey Hugo Applied to mhi-next! Thanks, Mani > --- > > v3: Move the pm_lock use up > v2: Fix subject and tweak the locking to avoid needless lock/unlock/relock > > drivers/bus/mhi/core/pm.c | 12 ++-- > 1 file changed, 10 insertions(+), 2 deletions(-) > > diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c > index 414da4f..ea62549 100644 > --- a/drivers/bus/mhi/core/pm.c > +++ b/drivers/bus/mhi/core/pm.c > @@ -1149,6 +1149,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) > mhi_deinit_dev_ctxt(mhi_cntrl); > > error_dev_ctxt: > + mhi_cntrl->pm_state = MHI_PM_DISABLE; > mutex_unlock(&mhi_cntrl->pm_mutex); > > return ret; > @@ -1160,12 +1161,19 @@ void mhi_power_down(struct mhi_controller *mhi_cntrl, > bool graceful) > enum mhi_pm_state cur_state, transition_state; > struct device *dev = &mhi_cntrl->mhi_dev->dev; > > + mutex_lock(&mhi_cntrl->pm_mutex); > + write_lock_irq(&mhi_cntrl->pm_lock); > + cur_state = mhi_cntrl->pm_state; > + if (cur_state == MHI_PM_DISABLE) { > + write_unlock_irq(&mhi_cntrl->pm_lock); > + mutex_unlock(&mhi_cntrl->pm_mutex); > + return; /* Already powered down */ > + } > + > /* If it's not a graceful shutdown, force MHI to linkdown state */ > transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS : > MHI_PM_LD_ERR_FATAL_DETECT; > > - mutex_lock(&mhi_cntrl->pm_mutex); > - write_lock_irq(&mhi_cntrl->pm_lock); > cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state); > if (cur_state != transition_state) { > dev_err(dev, "Failed to move to state: %s from: %s\n", > -- > Qualcomm Technologies, Inc. is a member of the > Code Aurora Forum, a Linux Foundation Collaborative Project. >
Re: [PATCH v3] bus: mhi: core: Check state before processing power_down
On Wed, Mar 10, 2021 at 01:49:25PM -0700, Jeffrey Hugo wrote: > We cannot process a power_down if the power state is DISABLED. There is > no valid mhi_ctxt in that case, so attepting to process the power_down > will likely result in a null pointer dereference. If the power state is > DISABLED, there is nothing to do anyways, so just bail early. > > Signed-off-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Thanks, Mani > --- > > v3: Move the pm_lock use up > v2: Fix subject and tweak the locking to avoid needless lock/unlock/relock > > drivers/bus/mhi/core/pm.c | 12 ++-- > 1 file changed, 10 insertions(+), 2 deletions(-) > > diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c > index 414da4f..ea62549 100644 > --- a/drivers/bus/mhi/core/pm.c > +++ b/drivers/bus/mhi/core/pm.c > @@ -1149,6 +1149,7 @@ int mhi_async_power_up(struct mhi_controller *mhi_cntrl) > mhi_deinit_dev_ctxt(mhi_cntrl); > > error_dev_ctxt: > + mhi_cntrl->pm_state = MHI_PM_DISABLE; > mutex_unlock(&mhi_cntrl->pm_mutex); > > return ret; > @@ -1160,12 +1161,19 @@ void mhi_power_down(struct mhi_controller *mhi_cntrl, > bool graceful) > enum mhi_pm_state cur_state, transition_state; > struct device *dev = &mhi_cntrl->mhi_dev->dev; > > + mutex_lock(&mhi_cntrl->pm_mutex); > + write_lock_irq(&mhi_cntrl->pm_lock); > + cur_state = mhi_cntrl->pm_state; > + if (cur_state == MHI_PM_DISABLE) { > + write_unlock_irq(&mhi_cntrl->pm_lock); > + mutex_unlock(&mhi_cntrl->pm_mutex); > + return; /* Already powered down */ > + } > + > /* If it's not a graceful shutdown, force MHI to linkdown state */ > transition_state = (graceful) ? MHI_PM_SHUTDOWN_PROCESS : > MHI_PM_LD_ERR_FATAL_DETECT; > > - mutex_lock(&mhi_cntrl->pm_mutex); > - write_lock_irq(&mhi_cntrl->pm_lock); > cur_state = mhi_tryset_pm_state(mhi_cntrl, transition_state); > if (cur_state != transition_state) { > dev_err(dev, "Failed to move to state: %s from: %s\n", > -- > Qualcomm Technologies, Inc. is a member of the > Code Aurora Forum, a Linux Foundation Collaborative Project. >
Re: [PATCH 3/3] arm64: dts: qcom: sm8150: add i2c nodes
On 10-03-21, 16:31, Caleb Connolly wrote: > Tested on the OnePlus 7 Pro (including DMA). Lgtm: Reviewed-by: Vinod Koul But missing enabling nodes in board dts ..? > > Signed-off-by: Caleb Connolly > --- > arch/arm64/boot/dts/qcom/sm8150.dtsi | 521 +++ > 1 file changed, 521 insertions(+) > > diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi > b/arch/arm64/boot/dts/qcom/sm8150.dtsi > index 543417d74216..0a38ad54c715 100644 > --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi > +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi > @@ -588,6 +588,111 @@ qupv3_id_0: geniqup@8c { > #size-cells = <2>; > ranges; > status = "disabled"; > + > + i2c0: i2c@88 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x0088 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S0_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c0_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c1: i2c@884000 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x00884000 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S1_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c1_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c2: i2c@888000 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x00888000 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S2_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c2_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c3: i2c@88c000 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x0088c000 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S3_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c3_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c4: i2c@89 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x0089 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S4_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c4_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c5: i2c@894000 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x00894000 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S5_CLK>; > + pinctrl-names = "default"; > + pinctrl-0 = <&qup_i2c5_default>; > + interrupts = ; > + #address-cells = <1>; > + #size-cells = <0>; > + status = "disabled"; > + }; > + > + i2c6: i2c@898000 { > + compatible = "qcom,geni-i2c"; > + reg = <0 0x00898000 0 0x4000>; > + clock-names = "se"; > + clocks = <&gcc GCC_QUPV3_WRAP0_S6_CLK>; > + pinctrl-names = "default"; > +
Re: [PATCH 2/3] arm64: dts: qcom: sm8150: add iommus to qups
On 10-03-21, 16:31, Caleb Connolly wrote: > Hook up the SMMU for doing DMA over i2c. Some peripherals like > touchscreens easily exceed 32-bytes per transfer, causing errors and > lockups without this. Why not squash this to patch 1..? > > Signed-off-by: Caleb Connolly > --- > Fixes i2c on the OnePlus 7, without this touching the screen with more > than 4 fingers causes the device to lock up and reboot. > --- > arch/arm64/boot/dts/qcom/sm8150.dtsi | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi > b/arch/arm64/boot/dts/qcom/sm8150.dtsi > index 03e05d98daf2..543417d74216 100644 > --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi > +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi > @@ -583,6 +583,7 @@ qupv3_id_0: geniqup@8c { > clock-names = "m-ahb", "s-ahb"; > clocks = <&gcc GCC_QUPV3_WRAP_0_M_AHB_CLK>, ><&gcc GCC_QUPV3_WRAP_0_S_AHB_CLK>; > + iommus = <&apps_smmu 0xc3 0x0>; > #address-cells = <2>; > #size-cells = <2>; > ranges; > @@ -595,6 +596,7 @@ qupv3_id_1: geniqup@ac { > clock-names = "m-ahb", "s-ahb"; > clocks = <&gcc GCC_QUPV3_WRAP_1_M_AHB_CLK>, ><&gcc GCC_QUPV3_WRAP_1_S_AHB_CLK>; > + iommus = <&apps_smmu 0x603 0x0>; > #address-cells = <2>; > #size-cells = <2>; > ranges; > @@ -617,6 +619,7 @@ qupv3_id_2: geniqup@cc { > clock-names = "m-ahb", "s-ahb"; > clocks = <&gcc GCC_QUPV3_WRAP_2_M_AHB_CLK>, ><&gcc GCC_QUPV3_WRAP_2_S_AHB_CLK>; > + iommus = <&apps_smmu 0x7a3 0x0>; > #address-cells = <2>; > #size-cells = <2>; > ranges; > -- > 2.29.2 > -- ~Vinod
Re: [PATCH v2 RESEND] bus: mhi: core: Wait for ready state after reset
On Wed, Mar 10, 2021 at 01:41:58PM -0700, Jeffrey Hugo wrote: > After the device has signaled the end of reset by clearing the reset bit, > it will automatically reinit MHI and the internal device structures. Once > That is done, the device will signal it has entered the ready state. > > Signaling the ready state involves sending an interrupt (MSI) to the host > which might cause IOMMU faults if it occurs at the wrong time. > > If the controller is being powered down, and possibly removed, then the > reset flow would only wait for the end of reset. At which point, the host > and device would start a race. The host may complete its reset work, and > remove the interrupt handler, which would cause the interrupt to be > disabled in the IOMMU. If that occurs before the device signals the ready > state, then the IOMMU will fault since it blocked an interrupt. While > harmless, the fault would appear like a serious issue has occurred so let's > silence it by making sure the device hits the ready state before the host > completes its reset processing. > > Signed-off-by: Jeffrey Hugo > --- > drivers/bus/mhi/core/pm.c | 17 - > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c > index adb0e80..414da4f 100644 > --- a/drivers/bus/mhi/core/pm.c > +++ b/drivers/bus/mhi/core/pm.c > @@ -467,7 +467,7 @@ static void mhi_pm_disable_transition(struct > mhi_controller *mhi_cntrl) > > /* Trigger MHI RESET so that the device will not access host memory */ > if (!MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) { > - u32 in_reset = -1; > + u32 in_reset = -1, ready = 0; > unsigned long timeout = msecs_to_jiffies(mhi_cntrl->timeout_ms); > > dev_dbg(dev, "Triggering MHI Reset in device\n"); > @@ -490,6 +490,21 @@ static void mhi_pm_disable_transition(struct > mhi_controller *mhi_cntrl) >* hence re-program it >*/ > mhi_write_reg(mhi_cntrl, mhi_cntrl->bhi, BHI_INTVEC, 0); > + > + if (!MHI_IN_PBL(mhi_get_exec_env(mhi_cntrl))) { > + /* wait for ready to be set */ > + ret = wait_event_timeout(mhi_cntrl->state_event, > + mhi_read_reg_field(mhi_cntrl, > + mhi_cntrl->regs, > + MHISTATUS, > + MHISTATUS_READY_MASK, > + MHISTATUS_READY_SHIFT, > + &ready) > + || ready, timeout); > + if (!ret || !ready) > + dev_warn(dev, > + "Device failed to enter READY state\n"); Wouldn't dev_err be more appropriate here provided that we might get IOMMU fault anytime soon? Thanks, Mani > + } > } > > dev_dbg(dev, > -- > Qualcomm Technologies, Inc. is a member of the > Code Aurora Forum, a Linux Foundation Collaborative Project. >
Re: [PATCH V4 4/7] vDPA/ifcvf: remove the version number string
在 2021/3/15 下午3:44, Zhu Lingshan 写道: This commit removes the version number string, using kernel version is enough. Signed-off-by: Zhu Lingshan Reviewed-by: Leon Romanovsky Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index fd5befc5cbcc..c34e1eec6b6c 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -14,7 +14,6 @@ #include #include "ifcvf_base.h" -#define VERSION_STRING "0.1" #define DRIVER_AUTHOR "Intel Corporation" #define IFCVF_DRIVER_NAME "ifcvf" @@ -503,4 +502,3 @@ static struct pci_driver ifcvf_driver = { module_pci_driver(ifcvf_driver); MODULE_LICENSE("GPL v2"); -MODULE_VERSION(VERSION_STRING);
Re: [PATCH V4 3/7] vDPA/ifcvf: rename original IFCVF dev ids to N3000 ids
在 2021/3/15 下午3:44, Zhu Lingshan 写道: IFCVF driver probes multiple types of devices now, to distinguish the original device driven by IFCVF from others, it is renamed as "N3000". Signed-off-by: Zhu Lingshan Acked-by: Jason Wang If you want to have a general driver, you probaby need to rename the driver. Thanks --- drivers/vdpa/ifcvf/ifcvf_base.h | 8 drivers/vdpa/ifcvf/ifcvf_main.c | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 75d9a8052039..794d1505d857 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -18,10 +18,10 @@ #include #include -#define IFCVF_VENDOR_ID 0x1AF4 -#define IFCVF_DEVICE_ID0x1041 -#define IFCVF_SUBSYS_VENDOR_ID 0x8086 -#define IFCVF_SUBSYS_DEVICE_ID 0x001A +#define N3000_VENDOR_ID0x1AF4 +#define N3000_DEVICE_ID0x1041 +#define N3000_SUBSYS_VENDOR_ID 0x8086 +#define N3000_SUBSYS_DEVICE_ID 0x001A #define C5000X_PL_VENDOR_ID 0x1AF4 #define C5000X_PL_DEVICE_ID 0x1000 diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 26a2dab7ca66..fd5befc5cbcc 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -480,10 +480,10 @@ static void ifcvf_remove(struct pci_dev *pdev) } static struct pci_device_id ifcvf_pci_ids[] = { - { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, - IFCVF_DEVICE_ID, - IFCVF_SUBSYS_VENDOR_ID, - IFCVF_SUBSYS_DEVICE_ID) }, + { PCI_DEVICE_SUB(N3000_VENDOR_ID, +N3000_DEVICE_ID, +N3000_SUBSYS_VENDOR_ID, +N3000_SUBSYS_DEVICE_ID) }, { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID, C5000X_PL_DEVICE_ID, C5000X_PL_SUBSYS_VENDOR_ID,
Re: [PATCH] pinctrl: add lock in mtk_rmw function.
On Tue, 2021-03-16 at 13:05 +0800, Sean Wang wrote: > Hi Zhiyong, > > On Fri, Mar 12, 2021 at 2:35 PM Zhiyong Tao wrote: > > > > When multiple threads operate on the same register resource > > which include multiple pin, It will make the register resource > > wrong to control. So we add lock to avoid the case. > > > > Signed-off-by: Zhiyong Tao > > --- > > drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 4 > > drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h | 2 ++ > > drivers/pinctrl/mediatek/pinctrl-paris.c | 2 ++ > > 3 files changed, 8 insertions(+) > > > > diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c > > b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c > > index 72f17f26acd8..fcf7c34a 100644 > > --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c > > +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c > > @@ -58,10 +58,14 @@ void mtk_rmw(struct mtk_pinctrl *pctl, u8 i, u32 reg, > > u32 mask, u32 set) > > { > > u32 val; > > > > + mutex_lock(&pctl->lock); > > + > > val = mtk_r32(pctl, i, reg); > > val &= ~mask; > > val |= set; > > mtk_w32(pctl, i, reg, val); > > + > > + mutex_unlock(&pctl->lock); > > } > > > > static int mtk_hw_pin_field_lookup(struct mtk_pinctrl *hw, > > diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h > > b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h > > index e2aae285b5fc..65eac708a3b3 100644 > > --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h > > +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.h > > @@ -251,6 +251,8 @@ struct mtk_pinctrl { > > struct mtk_eint *eint; > > struct mtk_pinctrl_group*groups; > > const char **grp_names; > > + /* lock pin's register resource to avoid multiple threads issue*/ > > + struct mutex lock; > > }; > > > > void mtk_rmw(struct mtk_pinctrl *pctl, u8 i, u32 reg, u32 mask, u32 set); > > diff --git a/drivers/pinctrl/mediatek/pinctrl-paris.c > > b/drivers/pinctrl/mediatek/pinctrl-paris.c > > index da1f19288aa6..48e823f6d293 100644 > > --- a/drivers/pinctrl/mediatek/pinctrl-paris.c > > +++ b/drivers/pinctrl/mediatek/pinctrl-paris.c > > @@ -970,6 +970,8 @@ int mtk_paris_pinctrl_probe(struct platform_device > > *pdev, > > > > hw->nbase = hw->soc->nbase_names; > > > > + mutex_init(&hw->lock); > > + > > Could you help add the mutex initialization into pinctrl-moore.c too? > and then the patch would look good to me. ==>Hi sean, We will add add the mutex initialization into pinctrl-moore.c too in v2. Thanks. > > > err = mtk_pctrl_build_state(pdev); > > if (err) { > > dev_err(&pdev->dev, "build state failed: %d\n", err); > > -- > > 2.18.0 > >
Re: [PATCH V4 2/7] vDPA/ifcvf: enable Intel C5000X-PL virtio-net for vDPA
在 2021/3/15 下午3:44, Zhu Lingshan 写道: This commit enabled Intel FPGA SmartNIC C5000X-PL virtio-net for vDPA Signed-off-by: Zhu Lingshan Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_base.h | 5 + drivers/vdpa/ifcvf/ifcvf_main.c | 5 + 2 files changed, 10 insertions(+) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 64696d63fe07..75d9a8052039 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -23,6 +23,11 @@ #define IFCVF_SUBSYS_VENDOR_ID0x8086 #define IFCVF_SUBSYS_DEVICE_ID0x001A +#define C5000X_PL_VENDOR_ID 0x1AF4 +#define C5000X_PL_DEVICE_ID0x1000 +#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086 +#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001 + #define IFCVF_SUPPORTED_FEATURES \ ((1ULL << VIRTIO_NET_F_MAC) | \ (1ULL << VIRTIO_F_ANY_LAYOUT)| \ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index e501ee07de17..26a2dab7ca66 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -484,6 +484,11 @@ static struct pci_device_id ifcvf_pci_ids[] = { IFCVF_DEVICE_ID, IFCVF_SUBSYS_VENDOR_ID, IFCVF_SUBSYS_DEVICE_ID) }, + { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID, +C5000X_PL_DEVICE_ID, +C5000X_PL_SUBSYS_VENDOR_ID, +C5000X_PL_SUBSYS_DEVICE_ID) }, + { 0 }, }; MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
Re: [PATCH V4 1/7] vDPA/ifcvf: get_vendor_id returns a device specific vendor id
在 2021/3/15 下午3:44, Zhu Lingshan 写道: In this commit, ifcvf_get_vendor_id() will return a device specific vendor id of the probed pci device than a hard code. Signed-off-by: Zhu Lingshan Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index fa1af301cf55..e501ee07de17 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -324,7 +324,10 @@ static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) { - return IFCVF_SUBSYS_VENDOR_ID; + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); + struct pci_dev *pdev = adapter->pdev; + + return pdev->subsystem_vendor; } static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
RE: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json schema
Hi Vladimir, > -Original Message- > From: Vladimir Oltean > Sent: Tuesday, March 16, 2021 2:25 AM > To: Kuldeep Singh > Cc: linux-...@vger.kernel.org; devicet...@vger.kernel.org; linux- > ker...@vger.kernel.org; Mark Brown ; Rob Herring > > Subject: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json > schema > > Caution: EXT Email > > Hi Kuldeep, > > On Mon, Mar 15, 2021 at 05:45:18PM +0530, Kuldeep Singh wrote: > > Convert the Freescale DSPI binding to DT schema format using json-schema. > > > > Signed-off-by: Kuldeep Singh > > --- [...] > > + > > +allOf: > > + - $ref: "spi-controller.yaml#" > > + > > +properties: > > + compatible: > > +oneOf: > > + - enum: > > + - fsl,vf610-dspi > > + - fsl,ls1021a-v1.0-dspi > > + - fsl,ls1028a-dspi > > + - fsl,ls2085a-dspi > > + - fsl,lx2160a-dspi > > + - items: > > + - enum: > > + - fsl,ls1012a-dspi > > + - fsl,ls1028a-dspi > > + - fsl,ls1043a-dspi > > + - fsl,ls1046a-dspi > > + - fsl,ls1088a-dspi > > + - const: fsl,ls1021a-v1.0-dspi > > + - items: > > + - enum: > > + - fsl,ls2080a-dspi > > + - fsl,lx2160a-dspi > > + - const: fsl,ls2085a-dspi > > Can this simply be: > compatible: > oneOf: > - enum: > - fsl,vf610-dspi > - fsl,ls1021a-v1.0-dspi > - fsl,ls1012a-dspi > - fsl,ls1028a-dspi > - fsl,ls1043a-dspi > - fsl,ls1046a-dspi > - fsl,ls1088a-dspi > - fsl,ls2080a-dspi > - fsl,ls2085a-dspi > - fsl,lx2160a-dspi > ? Compatible entries in conjugation require enum and const pair. For example, ls1012a.dtsi uses compatible = "fsl,ls1012a-dspi","fsl,ls1021a-v1.0-dspi"; Same goes for LS1028 as well. Therefore, can't mention the compatible entry as single entity otherwise it may fail "make dt_binding_check" and "make dtbs_check". > > > +examples: > > + - | > > +#include > > +#include > > + > > +soc { > > +#address-cells = <2>; > > +#size-cells = <2>; > > + > > +spi@210 { > > +compatible = "fsl,ls1028a-dspi", "fsl,ls1021a-v1.0-dspi"; > > This doesn't need the "fsl,ls1021a-v1.0-dspi" compatible, can you please > remove > it? I have taken this example from LS1028a.dtsi and it uses these compatibles in conjugation. If "fsl,ls1021a-v1.0-dspi" is not required, then it should also be removed from device-tree As well as from bindings both. > > > +#address-cells = <1>; > > +#size-cells = <0>; > > +reg = <0x0 0x210 0x0 0x1>; > > +interrupts = ; > > +clock-names = "dspi"; > > +clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL > QORIQ_CLK_PLL_DIV(2)>; > > +dmas = <&edma0 0 62>, <&edma0 0 60>; > > +dma-names = "tx", "rx"; > > +spi-num-chipselects = <4>; > > +little-endian; > > + > > +flash@0 { > > +compatible = "jedec,spi-nor"; > > +spi-max-frequency = <1000>; > > +reg = <0>; > > +}; > > +}; > > +}; > > (...) > > > -Optional property: > > -- big-endian: If present the dspi device's registers are implemented > > - in big endian mode. > > I don't see "big-endian" being covered in any common yaml, could you please > not > delete it? The driver calls of_device_is_big_endian. Thanks for mentioning. Will consider this in next version after receiving feedback on other thread. Regards Kuldeep
[tip:master] BUILD SUCCESS 4aabf1d2f23189cabd453e10473ed5d6cce6bf5f
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git master branch HEAD: 4aabf1d2f23189cabd453e10473ed5d6cce6bf5f Merge 'x86/cpu' into master elapsed time: 724m configs tested: 134 configs skipped: 2 The following configs have been built successfully. More configs may be tested in the coming days. gcc tested configs: arm defconfig arm64allyesconfig arm64 defconfig arm allyesconfig arm allmodconfig x86_64 allyesconfig riscvallmodconfig h8300 edosk2674_defconfig powerpcamigaone_defconfig powerpc linkstation_defconfig m68km5407c3_defconfig arc nsimosci_hs_smp_defconfig openrisc simple_smp_defconfig arm mainstone_defconfig m68kmvme16x_defconfig arm mv78xx0_defconfig mips rbtx49xx_defconfig h8300h8300h-sim_defconfig h8300 defconfig armrealview_defconfig s390 alldefconfig powerpc stx_gp3_defconfig mipsmalta_kvm_guest_defconfig sh ap325rxa_defconfig mips cu1000-neo_defconfig mips xway_defconfig arm versatile_defconfig arm lpc32xx_defconfig shshmin_defconfig mips ip32_defconfig parisc alldefconfig ia64defconfig sh se7712_defconfig arm lubbock_defconfig x86_64 defconfig sh rts7751r2dplus_defconfig powerpc tqm8xx_defconfig xtensa virt_defconfig sh se7722_defconfig arm eseries_pxa_defconfig m68k bvme6000_defconfig powerpc rainier_defconfig pariscgeneric-32bit_defconfig armmini2440_defconfig i386defconfig powerpc mpc5200_defconfig mips fuloong2e_defconfig pariscgeneric-64bit_defconfig armhisi_defconfig powerpc mpc836x_mds_defconfig mips rt305x_defconfig arc haps_hs_smp_defconfig powerpc mpc512x_defconfig ia64 alldefconfig cskydefconfig powerpc mpc834x_itxgp_defconfig powerpc cm5200_defconfig armxcep_defconfig sh sh7785lcr_32bit_defconfig powerpc motionpro_defconfig arm h3600_defconfig powerpc mpc8313_rdb_defconfig powerpc g5_defconfig arm orion5x_defconfig powerpc mpc837x_rdb_defconfig powerpc arches_defconfig arm pcm027_defconfig xtensa alldefconfig mips ath79_defconfig mips lemote2f_defconfig ia64 allmodconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nds32 defconfig nios2allyesconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig arc defconfig sh allmodconfig parisc defconfig s390 allyesconfig s390 allmodconfig parisc allyesconfig s390defconfig i386 allyesconfig sparcallyesconfig sparc defconfig i386 tinyconfig nios2 defconfig arc allyesconfig nds32 allnoconfig mips allyesconfig mips allmodconfig powerpc allyesconfig powerpc allmodconfig powerpc allnoconfig x86_64 randconfig-a006-20210
[tip:x86/cpu] BUILD SUCCESS WITH WARNING 301cddc21a157a3072d789a3097857202e550a24
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/cpu branch HEAD: 301cddc21a157a3072d789a3097857202e550a24 objtool/x86: Use asm/nops.h Warning reports: https://lore.kernel.org/lkml/202103160701.3uxlwiwm-...@intel.com Warning in current branch: arch/x86/kernel/alternative.c:96:10: warning: Undefined behaviour, pointer arithmetic 'x86nops+10' is out of bounds. [pointerOutOfBounds] arch/x86/kernel/ftrace.c:304:7: warning: union member 'ftrace_op_code_union::code' is never used. [unusedStructMember] Warning ids grouped by kconfigs: gcc_recent_errors `-- i386-randconfig-p002-20210315 |-- arch-x86-kernel-alternative.c:warning:Undefined-behaviour-pointer-arithmetic-x86nops-is-out-of-bounds.-pointerOutOfBounds `-- arch-x86-kernel-ftrace.c:warning:union-member-ftrace_op_code_union::code-is-never-used.-unusedStructMember elapsed time: 723m configs tested: 135 configs skipped: 2 gcc tested configs: arm defconfig arm64allyesconfig arm64 defconfig arm allyesconfig arm allmodconfig x86_64 allyesconfig riscvallmodconfig mips malta_kvm_defconfig arm exynos_defconfig ia64 bigsur_defconfig armdove_defconfig arm mainstone_defconfig m68kmvme16x_defconfig arm mv78xx0_defconfig mips rbtx49xx_defconfig h8300h8300h-sim_defconfig h8300 defconfig armzeus_defconfig powerpcsocrates_defconfig arc tb10x_defconfig powerpc chrp32_defconfig openrisc alldefconfig arm am200epdkit_defconfig arm palmz72_defconfig arm versatile_defconfig openrisc simple_smp_defconfig armrealview_defconfig s390 alldefconfig powerpc stx_gp3_defconfig mipsmalta_kvm_guest_defconfig sh ap325rxa_defconfig mips cu1000-neo_defconfig mips xway_defconfig arm lpc32xx_defconfig shshmin_defconfig mips ip32_defconfig parisc alldefconfig ia64defconfig sh se7712_defconfig arm lubbock_defconfig x86_64 defconfig sh rts7751r2dplus_defconfig powerpc tqm8xx_defconfig xtensa virt_defconfig sh se7722_defconfig arm eseries_pxa_defconfig m68k bvme6000_defconfig powerpc rainier_defconfig pariscgeneric-32bit_defconfig armmini2440_defconfig i386defconfig mips rt305x_defconfig arc haps_hs_smp_defconfig powerpc mpc512x_defconfig ia64 alldefconfig cskydefconfig powerpc mpc834x_itxgp_defconfig powerpc cm5200_defconfig armxcep_defconfig sh sh7785lcr_32bit_defconfig powerpc motionpro_defconfig arm h3600_defconfig powerpc mpc8313_rdb_defconfig powerpc g5_defconfig arm orion5x_defconfig powerpc mpc837x_rdb_defconfig powerpc arches_defconfig arm pcm027_defconfig xtensa alldefconfig mips ath79_defconfig mips lemote2f_defconfig ia64 allmodconfig ia64 allyesconfig m68k allmodconfig m68kdefconfig m68k allyesconfig nios2 defconfig arc allyesconfig nds32 allnoconfig nds32 defconfig nios2allyesconfig alpha defconfig alphaallyesconfig xtensa allyesconfig h8300allyesconfig arc defconfig sh
Re: [PATCH v3 RESEND] bus: mhi: core: Return EAGAIN if MHI ring is full
On Wed, Mar 10, 2021 at 01:40:03PM -0700, Jeffrey Hugo wrote: > From: Fan Wu > > Currently ENOMEM is returned when MHI ring is full. This error code is > very misleading. Change to EAGAIN instead. > > Signed-off-by: Fan Wu > Signed-off-by: Jeffrey Hugo > Reviewed-by: Hemant Kumar > Reviewed-by: Manivannan Sadhasivam Applied to mhi-next! Thanks, Mani > --- > drivers/bus/mhi/core/main.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c > index 2c61dfd..a7811fb 100644 > --- a/drivers/bus/mhi/core/main.c > +++ b/drivers/bus/mhi/core/main.c > @@ -1020,7 +1020,7 @@ static int mhi_queue(struct mhi_device *mhi_dev, struct > mhi_buf_info *buf_info, > > ret = mhi_is_ring_full(mhi_cntrl, tre_ring); > if (unlikely(ret)) { > - ret = -ENOMEM; > + ret = -EAGAIN; > goto exit_unlock; > } > > -- > Qualcomm Technologies, Inc. is a member of the > Code Aurora Forum, a Linux Foundation Collaborative Project. >
drivers/md/raid5.c:2539 resize_stripes() warn: inconsistent indenting
tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 1a4431a5db2bf800c647ee0ed87f2727b8d6c29c commit: f16acaf328c5615fdaea74f9bd0b4019544532d6 md/raid5: resize stripe_head when reshape array date: 6 months ago config: openrisc-randconfig-m031-20210316 (attached as .config) compiler: or1k-linux-gcc (GCC) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot New smatch warnings: drivers/md/raid5.c:2539 resize_stripes() warn: inconsistent indenting Old smatch warnings: drivers/md/raid5.c:2840 raid5_end_write_request() error: uninitialized symbol 'rdev'. drivers/md/raid5.c:2845 raid5_end_write_request() error: uninitialized symbol 'rdev'. drivers/md/raid5.c:6950 alloc_thread_groups() warn: double check that we're allocating correct size: 108 vs 1 vim +2539 drivers/md/raid5.c 2459 2460 static int resize_stripes(struct r5conf *conf, int newsize) 2461 { 2462 /* Make all the stripes able to hold 'newsize' devices. 2463 * New slots in each stripe get 'page' set to a new page. 2464 * 2465 * This happens in stages: 2466 * 1/ create a new kmem_cache and allocate the required number of 2467 *stripe_heads. 2468 * 2/ gather all the old stripe_heads and transfer the pages across 2469 *to the new stripe_heads. This will have the side effect of 2470 *freezing the array as once all stripe_heads have been collected, 2471 *no IO will be possible. Old stripe heads are freed once their 2472 *pages have been transferred over, and the old kmem_cache is 2473 *freed when all stripes are done. 2474 * 3/ reallocate conf->disks to be suitable bigger. If this fails, 2475 *we simple return a failure status - no need to clean anything up. 2476 * 4/ allocate new pages for the new slots in the new stripe_heads. 2477 *If this fails, we don't bother trying the shrink the 2478 *stripe_heads down again, we just leave them as they are. 2479 *As each stripe_head is processed the new one is released into 2480 *active service. 2481 * 2482 * Once step2 is started, we cannot afford to wait for a write, 2483 * so we use GFP_NOIO allocations. 2484 */ 2485 struct stripe_head *osh, *nsh; 2486 LIST_HEAD(newstripes); 2487 struct disk_info *ndisks; 2488 int err = 0; 2489 struct kmem_cache *sc; 2490 int i; 2491 int hash, cnt; 2492 2493 md_allow_write(conf->mddev); 2494 2495 /* Step 1 */ 2496 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], 2497 sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), 2498 0, 0, NULL); 2499 if (!sc) 2500 return -ENOMEM; 2501 2502 /* Need to ensure auto-resizing doesn't interfere */ 2503 mutex_lock(&conf->cache_size_mutex); 2504 2505 for (i = conf->max_nr_stripes; i; i--) { 2506 nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); 2507 if (!nsh) 2508 break; 2509 2510 list_add(&nsh->lru, &newstripes); 2511 } 2512 if (i) { 2513 /* didn't get enough, give up */ 2514 while (!list_empty(&newstripes)) { 2515 nsh = list_entry(newstripes.next, struct stripe_head, lru); 2516 list_del(&nsh->lru); 2517 free_stripe(sc, nsh); 2518 } 2519 kmem_cache_destroy(sc); 2520 mutex_unlock(&conf->cache_size_mutex); 2521 return -ENOMEM; 2522 } 2523 /* Step 2 - Must use GFP_NOIO now. 2524 * OK, we have enough stripes, start collecting inactive 2525 * stripes and copying them over 2526 */ 2527 hash = 0; 2528 cnt = 0; 2529 list_for_each_entry(nsh, &newstripes, lru) { 2530 lock_device_hash_lock(conf, hash); 2531 wait_event_cmd(conf->wait_for_stripe, 2532 !list_empty(conf->inactive_list + hash), 2533 unlock_device_hash_lock(conf, hash), 2534 lock_device_hash_lock(conf, hash)); 2535 osh = get_free_stripe(conf, hash); 2536 unlock_device_hash_lock(conf, hash); 2537 2538 #if PAGE_SIZE != DEFAULT_STRIPE_SIZE > 2539 for (i = 0; i < osh->nr_pages; i++) { 2540
Re: [PATCH 0/2] x86: Remove ideal_nops[]
On Mon, Mar 15, 2021 at 11:14 PM Peter Zijlstra wrote: > > On Mon, Mar 15, 2021 at 07:23:29PM +0100, Sedat Dilek wrote: > > > You mean something like that ^^? > > > > - Sedat - > > > > [1] > > https://git.zx2c4.com/laptop-kernel/commit/?id=116badbe0a18bc36ba90acb8b80cff41f9ab0686 > > *shudder*, I was more thinking you'd simply add it to you CFLAGS when > building. I don't see any point in having that in Kconfig. Simply adding the CFLAGS to arch/x86/Makefile. If I forgot to mention: Tested-by: Sedat Dilek . # LLVM/Clang v12.0.0-rc3 - Sedat -
Re: [PATCH v2 3/3] iio: adc: add ADC driver for the TI TSC2046 controller
Hi, On Sat, Mar 13, 2021 at 04:11:19PM +, Jonathan Cameron wrote: > On Fri, 12 Mar 2021 11:55:15 +0100 > Oleksij Rempel wrote: > > > Basically the TI TSC2046 touchscreen controller is 8 channel ADC optimized > > for > > the touchscreen use case. By implementing it as IIO ADC device, we can > > make use of resistive-adc-touch and iio-hwmon drivers. > > > > So far, this driver was tested with custom version of resistive-adc-touch > > driver, > > since it need to be extended to make use of Z1 and Z2 channels. The X/Y > > are working without additional changes. > > > > Signed-off-by: Oleksij Rempel > > So the flow in here is still rather non obvious and touchscreen specific. > Please add some documentation on what the trigger actually is. It seems > to be some combination of an hrtimer and a interrupt driven trigger. > I 'think' the idea is to ensure you get one 'no touch' measurement once > touch is removed? Yes, this IRQ line is just a voltage level converter integrated in to controller. As soon voltage level reaches some threshold the IRQ level is changed. It means, the IRQ is inactive as soon as nothing is pressing on the screen, or the channel is changed. Since we need more sample and not so frequent, I made this construction of rate limited IRQ with extra triggering on the end. Potentially we can use hrtimer trigger, especially if the IRQ line is not attached or can't be used for some reason. But for most use cases this trigger will provide better power efficiency. I can imagine, that in case of field board diagnostic, we may need some extra functionality. So the workflow will look like: - unbind the touchscreen driver - disable oversampling - disable settling time - attach sysfs trigger - enable one of channels - start grabbing data over IIO char dev But this functionality will need some more work and currently has lowest prio. > I guess we don't need the performance but I was a bit surprised that I didn't > see this doing overlapping of the previous read out with the config for the > next sample. I can't follow here. Can you please explain. > Anyhow, overall this looks pretty good to me. Add that a bit of documentation > for the trigger to the comments at the top of the file and tidy up the last > few things inline. ok > It's a bit unusual as ADC drivers go, but not so strange that it worries > me that much. :) > Thanks, > > Jonathan > > > --- > > MAINTAINERS | 8 + > > drivers/iio/adc/Kconfig | 12 + > > drivers/iio/adc/Makefile | 1 + > > drivers/iio/adc/ti-tsc2046.c | 713 +++ > > 4 files changed, 734 insertions(+) > > create mode 100644 drivers/iio/adc/ti-tsc2046.c > > > > diff --git a/MAINTAINERS b/MAINTAINERS > > index 3fea1a934b32..2d33c6442a55 100644 > > --- a/MAINTAINERS > > +++ b/MAINTAINERS > > @@ -17852,6 +17852,14 @@ S: Supported > > F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt > > F: drivers/nfc/trf7970a.c > > > > +TI TSC2046 ADC DRIVER > > +M: Oleksij Rempel > > +R: ker...@pengutronix.de > > +L: linux-...@vger.kernel.org > > +S: Maintained > > +F: Documentation/devicetree/bindings/iio/adc/ti,tsc2046.yaml > > +F: drivers/iio/adc/ti-tsc2046.c > > + > > TI TWL4030 SERIES SOC CODEC DRIVER > > M: Peter Ujfalusi > > L: alsa-de...@alsa-project.org (moderated for non-subscribers) > > diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig > > index 15587a1bc80d..6ad6f04dfd20 100644 > > --- a/drivers/iio/adc/Kconfig > > +++ b/drivers/iio/adc/Kconfig > > @@ -1175,6 +1175,18 @@ config TI_TLC4541 > > This driver can also be built as a module. If so, the module will be > > called ti-tlc4541. > > > > +config TI_TSC2046 > > + tristate "Texas Instruments TSC2046 ADC driver" > > + depends on SPI > > + select IIO_BUFFER > > + select IIO_TRIGGERED_BUFFER > > + help > > + Say yes here to build support for ADC functionality of Texas > > + Instruments TSC2046 touch screen controller. > > + > > + This driver can also be built as a module. If so, the module will be > > + called ti-tsc2046. > > + > > config TWL4030_MADC > > tristate "TWL4030 MADC (Monitoring A/D Converter)" > > depends on TWL4030_CORE > > diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile > > index 5fca90ada0ec..440e18ac6780 100644 > > --- a/drivers/iio/adc/Makefile > > +++ b/drivers/iio/adc/Makefile > > @@ -105,6 +105,7 @@ obj-$(CONFIG_TI_ADS8688) += ti-ads8688.o > > obj-$(CONFIG_TI_ADS124S08) += ti-ads124s08.o > > obj-$(CONFIG_TI_AM335X_ADC) += ti_am335x_adc.o > > obj-$(CONFIG_TI_TLC4541) += ti-tlc4541.o > > +obj-$(CONFIG_TI_TSC2046) += ti-tsc2046.o > > obj-$(CONFIG_TWL4030_MADC) += twl4030-madc.o > > obj-$(CONFIG_TWL6030_GPADC) += twl6030-gpadc.o > > obj-$(CONFIG_VF610_ADC) += vf610_adc.o > > diff --git a/drivers/iio/adc/ti-tsc2046.c b/drivers/iio/adc/ti-tsc2046.c > > new file mode 100644 > > index ..7c3ae9181164 > > --- /dev/null > > +++ b/d
RE: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json schema
> -Original Message- > From: Pratyush Yadav > Sent: Tuesday, March 16, 2021 12:01 AM > To: Kuldeep Singh > Cc: linux-...@vger.kernel.org; devicet...@vger.kernel.org; linux- > ker...@vger.kernel.org; Mark Brown ; Rob Herring > ; Vladimir Oltean ; linux- > m...@lists.infradead.org > Subject: [EXT] Re: [PATCH] dt-bindings: spi: Convert Freescale DSPI to json > schema > > Caution: EXT Email > > +Cc mtd list > > Hi, > > On 15/03/21 05:45PM, Kuldeep Singh wrote: > > Convert the Freescale DSPI binding to DT schema format using json-schema. > > > > Signed-off-by: Kuldeep Singh > > --- > > Hi Rob, > > This patch is checked with following commands with no warnings observed. > > make distclean; make allmodconfig; > > make dt_binding_check > > DT_SCHEMA_FILES=Documentation/devicetree/bindings/spi/fsl,spi-fsl-dspi > > .yaml; make dtbs_check > > DT_SCHEMA_FILES=Documentation/devicetree/bindings/spi/fsl,spi-fsl-dspi > > .yaml > > When I add the "fsl,spi-cs-sck-delay" property under the flash@0 node in the > example and run dt_binding_check, I see the below error: > > /home/pratyush/src/linux/Documentation/devicetree/bindings/spi/fsl,spi-fsl- > dspi.example.dt.yaml: flash@0: 'fsl,spi-cs-sck-delay' does not match any of > the > regexes: '^partition@', 'pinctrl-[0-9]+' > From schema: > /home/pratyush/src/lin/Documentation/devicetree/bindings/mtd/jedec,spi- > nor.yaml Hi Pratyush, Thanks for mentioning, I just noticed the same error after adding fsl,spi-cs-sck-delay property. Since my example is not using the property, the error went unnoticed. Taking example of nvidia qspi bindings i.e https://lore.kernel.org/linux-devicetree/1608585459-17250-3-git-send-email-skomatin...@nvidia.com/ I constructed other properties in similar fashion and later noticed that example in nvidia bindings uses compatibes as "spi-nor" instead of "jedec,spi-nor" and therefore passes "make dt_binding_check". > I am trying to solve a similar problem for the Cadence QSPI controller > binding and > I wonder what the best solution for this is. The obvious one would be to add > these properties to jedec,spi-nor.yaml. I haven't managed to come up with any > other solution to this problem. I agree with the solution to add properties in jedec,spi-nor.yaml and adding properties particular to specific controllers for flashes in generic jedec,spi-nor.yaml may not be a good solution though. Please let me know your views. Other approach is to add these properties in same binding itself (if possible) so as to limit the scope of these properties. Looking forward for more suggestions. Regards Kuldeep
Re: [PATCH 6/6] clk: actions: Add NIC and ETHERNET clock support for Actions S500 SoC
On Mon, Mar 08, 2021 at 07:18:31PM +0200, Cristian Ciocaltea wrote: > Add support for the missing NIC and ETHERNET clocks in the Actions Semi > Owl S500 SoC clock driver. > > Additionally, change APB clock parent from AHB to the newly added NIC. > > Signed-off-by: Cristian Ciocaltea > --- > drivers/clk/actions/owl-s500.c | 17 - > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c > index b9e434173b4f..0ccc9619b302 100644 > --- a/drivers/clk/actions/owl-s500.c > +++ b/drivers/clk/actions/owl-s500.c > @@ -112,6 +112,7 @@ static const char * const bisp_clk_mux_p[] = { > "display_pll_clk", "dev_clk" }; > static const char * const sensor_clk_mux_p[] = { "hosc", "bisp_clk" }; > static const char * const sd_clk_mux_p[] = { "dev_clk", "nand_pll_clk" }; > static const char * const pwm_clk_mux_p[] = { "losc", "hosc" }; > +static const char * const nic_clk_mux_p[] = { "dev_clk", "display_pll_clk", > "nand_pll_clk", "ddr_pll_clk" }; As per the reg field order, this should come after "ahbprediv_clk_mux_p" Rest looks good. Thanks, Mani > static const char * const ahbprediv_clk_mux_p[] = { "dev_clk", > "display_pll_clk", "nand_pll_clk", "ddr_pll_clk" }; > static const char * const uart_clk_mux_p[] = { "hosc", "dev_pll_clk" }; > static const char * const de_clk_mux_p[] = { "display_pll_clk", "dev_clk" }; > @@ -197,7 +198,7 @@ static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", > CMU_DEVCLKEN1, 3, 0, 0); > > /* divider clocks */ > static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 2, 2, > h_div_table, 0, 0); > -static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, > 0, 0); > +static OWL_DIVIDER(apb_clk, "apb_clk", "nic_clk", CMU_BUSCLK1, 14, 2, NULL, > 0, 0); > static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", > CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0); > > /* factor clocks */ > @@ -205,6 +206,12 @@ static OWL_FACTOR(de1_clk, "de_clk1", "de_clk", > CMU_DECLK, 0, 4, de_factor_table > static OWL_FACTOR(de2_clk, "de_clk2", "de_clk", CMU_DECLK, 4, 4, > de_factor_table, 0, 0); > > /* composite clocks */ > +static OWL_COMP_DIV(nic_clk, "nic_clk", nic_clk_mux_p, > + OWL_MUX_HW(CMU_BUSCLK1, 4, 3), > + { 0 }, > + OWL_DIVIDER_HW(CMU_BUSCLK1, 16, 2, 0, NULL), > + 0); > + > static OWL_COMP_DIV(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, > OWL_MUX_HW(CMU_BUSCLK1, 8, 3), > { 0 }, > @@ -320,6 +327,10 @@ static OWL_COMP_FIXED_FACTOR(i2c3_clk, "i2c3_clk", > "ethernet_pll_clk", > OWL_GATE_HW(CMU_DEVCLKEN1, 31, 0), > 1, 5, 0); > > +static OWL_COMP_FIXED_FACTOR(ethernet_clk, "ethernet_clk", > "ethernet_pll_clk", > + OWL_GATE_HW(CMU_DEVCLKEN1, 22, 0), > + 1, 20, 0); > + > static OWL_COMP_DIV(uart0_clk, "uart0_clk", uart_clk_mux_p, > OWL_MUX_HW(CMU_UART0CLK, 16, 1), > OWL_GATE_HW(CMU_DEVCLKEN1, 6, 0), > @@ -454,6 +465,8 @@ static struct owl_clk_common *s500_clks[] = { > &apb_clk.common, > &dmac_clk.common, > &gpio_clk.common, > + &nic_clk.common, > + ðernet_clk.common, > }; > > static struct clk_hw_onecell_data s500_hw_clks = { > @@ -513,6 +526,8 @@ static struct clk_hw_onecell_data s500_hw_clks = { > [CLK_APB] = &apb_clk.common.hw, > [CLK_DMAC] = &dmac_clk.common.hw, > [CLK_GPIO] = &gpio_clk.common.hw, > + [CLK_NIC] = &nic_clk.common.hw, > + [CLK_ETHERNET] = ðernet_clk.common.hw, > }, > .num = CLK_NR_CLKS, > }; > -- > 2.30.1 >
Re: [PATCH 5.10 113/290] net: dsa: implement a central TX reallocation procedure
On Mon, Mar 15, 2021 at 05:06:16PM -0400, Sasha Levin wrote: > On Mon, Mar 15, 2021 at 07:56:02PM +, Vladimir Oltean wrote: > > > Signed-off-by: Vladimir Oltean > > > Tested-by: Christian Eggers # For tail taggers only > > > Tested-by: Kurt Kanzenbach > > > Reviewed-by: Florian Fainelli > > > Signed-off-by: Jakub Kicinski > > > Signed-off-by: Sasha Levin > > > --- > > > > For context, Sasha explains here: > > https://www.spinics.net/lists/stable-commits/msg190151.html > > (the conversation is somewhat truncated, unfortunately, because > > stable-comm...@vger.kernel.org ate my replies) > > that 13 patches were backported to get the unrelated commit 9200f515c41f > > ("net: dsa: tag_mtk: fix 802.1ad VLAN egress") to apply cleanly with git-am. > > > > I am not strictly against this, even though I would have liked to know > > that the maintainers were explicitly informed about it. > > > > Greg, could you make your stable backporting emails include the output > > of ./get_maintainer.pl into the list of recipients? Thanks. > > Did it not happen here? I've looked at Greg's script[1] and it seemed to > me like it does go through get_maintainer.pl. > > > [1] > https://github.com/gregkh/gregkh-linux/blob/master/scripts/generate_cc_list That's just a script I use for "normal" kernel development when creating patches, not for stable stuff. thanks, greg k-h
Re: [PATCH V2] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c
On 3/15/21 10:42 PM, Bhaskar Chowdhury wrote: > > s/kernal/kernel/ > > A punctuation added too. > > Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap > --- > Changes from V1: > Punctuation missed, added as per Randy's finding > > arch/x86/events/intel/bts.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c > index 731dd8d0dbb1..1ba93c40fc54 100644 > --- a/arch/x86/events/intel/bts.c > +++ b/arch/x86/events/intel/bts.c > @@ -594,7 +594,7 @@ static __init int bts_init(void) >* we cannot use the user mapping since it will not be available >* if we're not running the owning process. >* > - * With PTI we can't use the kernal map either, because its not > + * With PTI we can't use the kernel map either, because it's not >* there when we run userspace. >* >* For now, disable this driver when using PTI. > -- -- ~Randy
Re: [PATCH] platform/x86: pmc_atom: use callback for all dmi quirk entries
Hoi Hans, on a slighly different but also related topic. Did you ever come across SMSC SCH5347? Seems to be pretty similar to 56xx, only with spec non public ... and probably less often in use Maybe you happen to have code, or know the differences. We already have it working with a modified copy of sch56xx but that is still rough and i thought i ask before we potentially duplicate work. groetjes, Henning Am Mon, 15 Mar 2021 19:01:13 +0100 schrieb Hans de Goede : > Hi, > > On 3/15/21 6:00 PM, Henning Schild wrote: > > Am Mon, 15 Mar 2021 17:31:49 +0100 > > schrieb Hans de Goede : > > > >> Hi, > >> > >> On 3/15/21 3:58 PM, Henning Schild wrote: > >>> Introduce a global variable to remember the matching entry for > >>> later printing. Also having a callback allows to stop matching > >>> after the first hit. > >>> > >>> Signed-off-by: Henning Schild > >>> --- > >>> drivers/platform/x86/pmc_atom.c | 26 -- > >>> 1 file changed, 20 insertions(+), 6 deletions(-) > >>> > >>> diff --git a/drivers/platform/x86/pmc_atom.c > >>> b/drivers/platform/x86/pmc_atom.c index 38542d547f29..d0f74856cd8b > >>> 100644 --- a/drivers/platform/x86/pmc_atom.c > >>> +++ b/drivers/platform/x86/pmc_atom.c > >>> @@ -364,8 +364,16 @@ static void pmc_dbgfs_register(struct pmc_dev > >>> *pmc) #endif /* CONFIG_DEBUG_FS */ > >>> > >>> static bool pmc_clk_is_critical = true; > >>> +static const struct dmi_system_id *dmi_critical; > >>> > >>> -static int siemens_clk_is_critical(const struct dmi_system_id *d) > >>> +static int dmi_callback(const struct dmi_system_id *d) > >>> +{ > >>> + dmi_critical = d; > >> > >> Don't introduce a global variable for this please. Instead just > >> directly print the ident of the matching dmi_system_id here. > > > > Sorry, missed that part. Result looks nice and clean, thanks. I > > think i will squash it into 4/4 in v3 and not follow up here for > > now. > > Ack, that sounds good to me. > > Regards, > > Hans > > > >>> + > >>> + return 1; > >>> +} > >>> + > >>> +static int dmi_callback_siemens(const struct dmi_system_id *d) > >>> { > >>> u32 st_id; > >>> > >>> @@ -373,7 +381,7 @@ static int siemens_clk_is_critical(const > >>> struct dmi_system_id *d) goto out; > >>> > >>> if (st_id == SIMATIC_IPC_IPC227E || st_id == > >>> SIMATIC_IPC_IPC277E) > >>> - return 1; > >>> + return dmi_callback(d); > >>> > >>> out: > >>> pmc_clk_is_critical = false; > >>> @@ -388,6 +396,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk0 is used for an external HSIC USB > >>> HUB */ .ident = "MPL CEC1x", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"), > >>> DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 > >>> Family"), @@ -396,6 +405,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk0 - 3 are used for the 4 ethernet > >>> controllers */ .ident = "Lex 3I380D", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "Lex > >>> BayTrail"), DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), > >>> @@ -404,6 +414,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk* - are used for ethernet > >>> controllers */ .ident = "Lex 2I385SW", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "Lex > >>> BayTrail"), DMI_MATCH(DMI_PRODUCT_NAME, "2I385SW"), > >>> @@ -412,6 +423,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk* - are used for ethernet > >>> controllers */ .ident = "Beckhoff CB3163", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff > >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB3163"), > >>> @@ -420,6 +432,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk* - are used for ethernet > >>> controllers */ .ident = "Beckhoff CB4063", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff > >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB4063"), > >>> @@ -428,6 +441,7 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk* - are used for ethernet > >>> controllers */ .ident = "Beckhoff CB6263", > >>> + .callback = dmi_callback, > >>> .matches = { > >>> DMI_MATCH(DMI_SYS_VENDOR, "Beckhoff > >>> Automation"), DMI_MATCH(DMI_BOARD_NAME, "CB6263"), > >>> @@ -436,13 +450,14 @@ static const struct dmi_system_id > >>> critclk_systems[] = { { > >>> /* pmc_plt_clk* - are used for ethernet > >>> controllers */ .ident = "Beckhoff CB6363", > >>> + .callback = dmi_callback,
[PATCH v2 12/12] docs: path-lookup: update symlink description
instead of lookup_real()/vfs_create(), i_op->lookup() and i_op->create() will be called directly. update vfs_open() logic should_follow_link is merged into lookup_last() or open_last_lookup() which returns symlink name instead of an integer. Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index eef6e9f68fba..adbc714740c2 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1202,16 +1202,15 @@ the code. it. If the file was found in the dcache, then ``vfs_open()`` is used for this. If not, then ``lookup_open()`` will either call ``atomic_open()`` (if the filesystem provides it) to combine the final lookup with the open, or - will perform the separate ``lookup_real()`` and ``vfs_create()`` steps + will perform the separate ``i_op->lookup()`` and ``i_op->create()`` steps directly. In the later case the actual "open" of this newly found or created file will be performed by ``vfs_open()``, just as if the name were found in the dcache. 2. ``vfs_open()`` can fail with ``-EOPENSTALE`` if the cached information - wasn't quite current enough. Rather than restarting the lookup from - the top with ``LOOKUP_REVAL`` set, ``lookup_open()`` is called instead, - giving the filesystem a chance to resolve small inconsistencies. - If that doesn't work, only then is the lookup restarted from the top. + wasn't quite current enough. If it's in RCU-walk -ECHILD will be returned + otherwise will return -ESTALE. When -ESTALE is returned, the caller may + retry with LOOKUP_REVAL flag set. 3. An open with O_CREAT **does** follow a symlink in the final component, unlike other creation system calls (like ``mkdir``). So the sequence:: @@ -1221,8 +1220,8 @@ the code. will create a file called ``/tmp/bar``. This is not permitted if ``O_EXCL`` is set but otherwise is handled for an O_CREAT open much - like for a non-creating open: ``should_follow_link()`` returns ``1``, and - so does ``do_last()`` so that ``trailing_symlink()`` gets called and the + like for a non-creating open: ``lookup_last()`` or ``open_last_lookup()`` + returns a non ``Null`` value, and ``link_path_walk()`` gets called and the open process continues on the symlink that was found. Updating the access time -- 2.30.2
[PATCH v2 10/12] docs: path-lookup: update WALK_GET, WALK_PUT desc
WALK_GET is changed to WALK_TRAILING with a different meaning. Here it should be WALK_NOFOLLOW. WALK_PUT dosn't exist, we have WALK_MORE. WALK_PUT == !WALK_MORE And there is not should_follow_link(). Related commits: commit 8c4efe22e7c4 ("namei: invert the meaning of WALK_FOLLOW") commit 1c4ff1a87e46 ("namei: invert WALK_PUT logics") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index 0d41c61f7e4f..abd0153e2415 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1123,13 +1123,11 @@ stack in ``walk_component()`` immediately when the symlink is found; old symlink as it walks that last component. So it is quite convenient for ``walk_component()`` to release the old symlink and pop the references just before pushing the reference information for the -new symlink. It is guided in this by two flags; ``WALK_GET``, which -gives it permission to follow a symlink if it finds one, and -``WALK_PUT``, which tells it to release the current symlink after it has been -followed. ``WALK_PUT`` is tested first, leading to a call to -``put_link()``. ``WALK_GET`` is tested subsequently (by -``should_follow_link()``) leading to a call to ``pick_link()`` which sets -up the stack frame. +new symlink. It is guided in this by two flags; ``WALK_NOFOLLOW``, which +suggests whether to follow a symlink if it finds one, and +``WALK_MORE``, which tells whether to release the current symlink after it has +been followed. ``WALK_MORE`` is tested first, leading to a call to +``put_link()``. Symlinks with no final component -- 2.30.2
[PATCH v2 09/12] docs: path-lookup: no get_link()
no get_link() anymore. we have step_into() and pick_link(). walk_component() will call step_into(), in turn call pick_link, and return symlink name. Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index 8ab95dd9046e..0d41c61f7e4f 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1103,12 +1103,10 @@ doesn't need to notice. Getting this ``name`` variable on and off the stack is very straightforward; pushing and popping the references is a little more complex. -When a symlink is found, ``walk_component()`` returns the value ``1`` -(``0`` is returned for any other sort of success, and a negative number -is, as usual, an error indicator). This causes ``get_link()`` to be -called; it then gets the link from the filesystem. Providing that -operation is successful, the old path ``name`` is placed on the stack, -and the new value is used as the ``name`` for a while. When the end of +When a symlink is found, ``walk_component()`` calls ``pick_link()``, +it then gets the link from the filesystem returning new path ``name``. +Providing that operation is successful, the old path ``name`` is placed on the +stack, and the new value is used as the ``name`` for a while. When the end of the path is found (i.e. ``*name`` is ``'\0'``) the old ``name`` is restored off the stack and path walking continues. -- 2.30.2
[PATCH v2 11/12] docs: path-lookup: update get_link() ->follow_link description
get_link() is merged into pick_link(). i_op->follow_link is replaced with i_op->get_link(). get_link() can return ERR_PTR(0) which equals NULL. Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index abd0153e2415..eef6e9f68fba 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1134,10 +1134,10 @@ Symlinks with no final component A pair of special-case symlinks deserve a little further explanation. Both result in a new ``struct path`` (with mount and dentry) being set -up in the ``nameidata``, and result in ``get_link()`` returning ``NULL``. +up in the ``nameidata``, and result in ``pick_link()`` returning ``NULL``. The more obvious case is a symlink to "``/``". All symlinks starting -with "``/``" are detected in ``get_link()`` which resets the ``nameidata`` +with "``/``" are detected in ``pick_link()`` which resets the ``nameidata`` to point to the effective filesystem root. If the symlink only contains "``/``" then there is nothing more to do, no components at all, so ``NULL`` is returned to indicate that the symlink can be released and @@ -1154,12 +1154,11 @@ something that looks like a symlink. It is really a reference to the target file, not just the name of it. When you ``readlink`` these objects you get a name that might refer to the same file - unless it has been unlinked or mounted over. When ``walk_component()`` follows -one of these, the ``->follow_link()`` method in "procfs" doesn't return +one of these, the ``->get_link()`` method in "procfs" doesn't return a string name, but instead calls ``nd_jump_link()`` which updates the -``nameidata`` in place to point to that target. ``->follow_link()`` then -returns ``NULL``. Again there is no final component and ``get_link()`` -reports this by leaving the ``last_type`` field of ``nameidata`` as -``LAST_BIND``. +``nameidata`` in place to point to that target. ``->get_link()`` then +returns ``0``. Again there is no final component and ``pick_link()`` +returns NULL. Following the symlink in the final component -- 2.30.2
[PATCH v2 06/12] docs: path-lookup: Add macro name to symlink limit description
Add macro name MAXSYMLINKS to the symlink limit description, so that it is consistent with path name length description above. Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index 66697db74955..af5c20fecfef 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -992,8 +992,8 @@ is 4096. There are a number of reasons for this limit; not letting the kernel spend too much time on just one path is one of them. With symbolic links you can effectively generate much longer paths so some sort of limit is needed for the same reason. Linux imposes a limit of -at most 40 symlinks in any one path lookup. It previously imposed a -further limit of eight on the maximum depth of recursion, but that was +at most 40 (MAXSYMLINKS) symlinks in any one path lookup. It previously imposed +a further limit of eight on the maximum depth of recursion, but that was raised to 40 when a separate stack was implemented, so there is now just the one limit. -- 2.30.2
[PATCH v2 07/12] docs: path-lookup: i_op->follow_link replaced with i_op->get_link
follow_link has been replaced by get_link() which can be called in RCU mode. see commit: commit 6b2553918d8b ("replace ->follow_link() with new method that could stay in RCU mode") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index af5c20fecfef..e6b6c43ff0f6 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1060,13 +1060,11 @@ filesystem cannot successfully get a reference in RCU-walk mode, it must return ``-ECHILD`` and ``unlazy_walk()`` will be called to return to REF-walk mode in which the filesystem is allowed to sleep. -The place for all this to happen is the ``i_op->follow_link()`` inode -method. In the present mainline code this is never actually called in -RCU-walk mode as the rewrite is not quite complete. It is likely that -in a future release this method will be passed an ``inode`` pointer when -called in RCU-walk mode so it both (1) knows to be careful, and (2) has the -validated pointer. Much like the ``i_op->permission()`` method we -looked at previously, ``->follow_link()`` would need to be careful that +The place for all this to happen is the ``i_op->get_link()`` inode +method. This is called both in RCU-walk and REF-walk. In RCU-walk the +``dentry*`` argument is NULL, ``->get_link()`` can return -ECHILD to drop out of +RCU-walk. Much like the ``i_op->permission()`` method we +looked at previously, ``->get_link()`` would need to be careful that all the data structures it references are safe to be accessed while holding no counted reference, only the RCU lock. Though getting a reference with ``->follow_link()`` is not yet done in RCU-walk mode, the -- 2.30.2
[PATCH v2 08/12] docs: path-lookup: update i_op->put_link and cookie description
No inode->put_link operation anymore. We use delayed_call to deal with link destruction. Cookie has been replaced with struct delayed_call. Related commit: commit fceef393a538 ("switch ->get_link() to delayed_call, kill ->put_link()") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 30 ++- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index e6b6c43ff0f6..8ab95dd9046e 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -1066,34 +1066,20 @@ method. This is called both in RCU-walk and REF-walk. In RCU-walk the RCU-walk. Much like the ``i_op->permission()`` method we looked at previously, ``->get_link()`` would need to be careful that all the data structures it references are safe to be accessed while -holding no counted reference, only the RCU lock. Though getting a -reference with ``->follow_link()`` is not yet done in RCU-walk mode, the -code is ready to release the reference when that does happen. - -This need to drop the reference to a symlink adds significant -complexity. It requires a reference to the inode so that the -``i_op->put_link()`` inode operation can be called. In REF-walk, that -reference is kept implicitly through a reference to the dentry, so -keeping the ``struct path`` of the symlink is easiest. For RCU-walk, -the pointer to the inode is kept separately. To allow switching from -RCU-walk back to REF-walk in the middle of processing nested symlinks -we also need the seq number for the dentry so we can confirm that -switching back was safe. - -Finally, when providing a reference to a symlink, the filesystem also -provides an opaque "cookie" that must be passed to ``->put_link()`` so that it -knows what to free. This might be the allocated memory area, or a -pointer to the ``struct page`` in the page cache, or something else -completely. Only the filesystem knows what it is. +holding no counted reference, only the RCU lock. A callback +``struct delayed_called`` will be passed to get_link, +file systems can set their own put_link function and argument through +``set_delayed_call``. Later on, when vfs wants to put link, it will call +``do_delayed_call`` to invoke that callback function with the argument. In order for the reference to each symlink to be dropped when the walk completes, whether in RCU-walk or REF-walk, the symlink stack needs to contain, along with the path remnants: -- the ``struct path`` to provide a reference to the inode in REF-walk -- the ``struct inode *`` to provide a reference to the inode in RCU-walk +- the ``struct path`` to provide a reference to the previous path +- the ``const char *`` to provide a reference to the to previous name - the ``seq`` to allow the path to be safely switched from RCU-walk to REF-walk -- the ``cookie`` that tells ``->put_path()`` what to put. +- the ``struct delayed_call`` for later invocation. This means that each entry in the symlink stack needs to hold five pointers and an integer instead of just one pointer (the path -- 2.30.2
[PATCH v2 05/12] docs: path-lookup: remove filename_mountpoint
No filename_mountpoint any more see commit: commit 161aff1d93ab ("LOOKUP_MOUNTPOINT: fold path_mountpointat() into path_lookupat()") Without filename_mountpoint and path_mountpoint(), the numbers should be four & three: "These four correspond roughly to the three path_*() functions" Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index a65cb477d524..66697db74955 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -652,9 +652,9 @@ restarts from the top with REF-walk. This pattern of "try RCU-walk, if that fails try REF-walk" can be clearly seen in functions like ``filename_lookup()``, -``filename_parentat()``, ``filename_mountpoint()``, -``do_filp_open()``, and ``do_file_open_root()``. These five -correspond roughly to the four ``path_*()`` functions we met earlier, +``filename_parentat()``, +``do_filp_open()``, and ``do_file_open_root()``. These four +correspond roughly to the three ``path_*()`` functions we met earlier, each of which calls ``link_path_walk()``. The ``path_*()`` functions are called using different mode flags until a mode is found which works. They are first called with ``LOOKUP_RCU`` set to request "RCU-walk". If -- 2.30.2
[PATCH v2 04/12] docs: path-lookup: update do_last() part
traling_symlink() was merged into lookup_last, do_last(). do_last() has later been split into open_last_lookups() and do_open(). see related commit: commit c5971b8c6354 ("take post-lookup part of do_last() out of loop") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 35 --- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index b6a301b78121..a65cb477d524 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -495,11 +495,11 @@ This is important when unmounting a filesystem that is inaccessible, such as one provided by a dead NFS server. Finally ``path_openat()`` is used for the ``open()`` system call; it -contains, in support functions starting with "``do_last()``", all the +contains, in support functions starting with "``open_last_lookups()``", all the complexity needed to handle the different subtleties of O_CREAT (with or without O_EXCL), final "``/``" characters, and trailing symbolic links. We will revisit this in the final part of this series, which -focuses on those symbolic links. "``do_last()``" will sometimes, but +focuses on those symbolic links. "``open_last_lookups()``" will sometimes, but not always, take ``i_rwsem``, depending on what it finds. Each of these, or the functions which call them, need to be alert to @@ -1199,26 +1199,27 @@ symlink. This case is handled by the relevant caller of ``link_path_walk()``, such as ``path_lookupat()`` using a loop that calls ``link_path_walk()``, and then handles the final component. If the final component is a symlink -that needs to be followed, then ``trailing_symlink()`` is called to set -things up properly and the loop repeats, calling ``link_path_walk()`` -again. This could loop as many as 40 times if the last component of -each symlink is another symlink. +that needs to be followed, then ``open_last_lookups()`` is +called to set things up properly and the loop repeats, calling +``link_path_walk()`` again. This could loop as many as 40 times if the last +component of each symlink is another symlink. The various functions that examine the final component and possibly -report that it is a symlink are ``lookup_last()``, ``mountpoint_last()`` -and ``do_last()``, each of which use the same convention as -``walk_component()`` of returning ``1`` if a symlink was found that needs -to be followed. +report that it is a symlink are ``lookup_last()``, ``open_last_lookups()`` +, each of which use the same convention as +``walk_component()`` of returning ``char *name`` if a symlink was found that +needs to be followed. -Of these, ``do_last()`` is the most interesting as it is used for -opening a file. Part of ``do_last()`` runs with ``i_rwsem`` held and this -part is in a separate function: ``lookup_open()``. +Of these, ``open_last_lookups()`` is the most interesting as it works in tandem +with ``do_open()`` for opening a file. Part of ``open_last_lookups()`` runs +with ``i_rwsem`` held and this part is in a separate function: ``lookup_open()``. -Explaining ``do_last()`` completely is beyond the scope of this article, -but a few highlights should help those interested in exploring the -code. +Explaining ``open_last_lookups()`` and ``do_open()`` completely is beyond the scope +of this article, but a few highlights should help those interested in exploring +the code. -1. Rather than just finding the target file, ``do_last()`` needs to open +1. Rather than just finding the target file, ``do_open()`` is used after + ``open_last_lookup()`` to open it. If the file was found in the dcache, then ``vfs_open()`` is used for this. If not, then ``lookup_open()`` will either call ``atomic_open()`` (if the filesystem provides it) to combine the final lookup with the open, or -- 2.30.2
[PATCH v2 02/12] docs: path-lookup: update path_to_nameidata() part
No path_to_namei() anymore, step_into() will be called. Related commit: commit c99687a03a78 ("fold path_to_nameidata() into its only remaining caller") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index d07766375e13..a29d714431a3 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -455,9 +455,10 @@ directly from walk_component() or from handle_dots(). It calls ``handle_mount()``, to check and handle mount points, in which a new ``struct path`` containing a counted reference to the new dentry and a reference to the new ``vfsmount`` which is only counted if it is -different from the previous ``vfsmount``. It then calls -``path_to_nameidata()`` to install the new ``struct path`` in the -``struct nameidata`` and drop the unneeded references. +different from the previous ``vfsmount`` will be created. Then if there is +symbolic link, ``step_into()`` calls ``pick_link()`` to deal with it, otherwise +installs the new ``struct path`` in the ``struct nameidata`` and drop the +unneeded references. This "hand-over-hand" sequencing of getting a reference to the new dentry before dropping the reference to the previous dentry may -- 2.30.2
[PATCH v2 01/12] docs: path-lookup: update follow_managed() part
No follow_managed() anymore, handle_mounts(), traverse_mounts(), will do the job. see commit 9deed3ebca24 ("new helper: traverse_mounts()") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index c482e1619e77..d07766375e13 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -448,10 +448,11 @@ described. If it finds a ``LAST_NORM`` component it first calls filesystem to revalidate the result if it is that sort of filesystem. If that doesn't get a good result, it calls "``lookup_slow()``" which takes ``i_rwsem``, rechecks the cache, and then asks the filesystem -to find a definitive answer. Each of these will call -``follow_managed()`` (as described below) to handle any mount points. +to find a definitive answer. -In the absence of symbolic links, ``walk_component()`` creates a new +As the last step of ``walk_component()``, ``step_into()`` will be called either +directly from walk_component() or from handle_dots(). It calls +``handle_mount()``, to check and handle mount points, in which a new ``struct path`` containing a counted reference to the new dentry and a reference to the new ``vfsmount`` which is only counted if it is different from the previous ``vfsmount``. It then calls @@ -535,8 +536,7 @@ covered in greater detail in autofs.txt in the Linux documentation tree, but a few notes specifically related to path lookup are in order here. -The Linux VFS has a concept of "managed" dentries which is reflected -in function names such as "``follow_managed()``". There are three +The Linux VFS has a concept of "managed" dentries. There are three potentially interesting things about these dentries corresponding to three different flags that might be set in ``dentry->d_flags``: -- 2.30.2
[PATCH v2 03/12] docs: path-lookup: update path_mountpoint() part
path_mountpoint() doesn't exist anymore. Have been folded into path_lookup_at when flag is set with LOOKUP_MOUNTPOINT. Check commit: commit 161aff1d93abf0e ("LOOKUP_MOUNTPOINT: fold path_mountpointat() into path_lookupat()") Signed-off-by: Fox Chen --- Documentation/filesystems/path-lookup.rst | 12 +--- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst index a29d714431a3..b6a301b78121 100644 --- a/Documentation/filesystems/path-lookup.rst +++ b/Documentation/filesystems/path-lookup.rst @@ -472,7 +472,7 @@ Handling the final component ``nd->last_type`` to refer to the final component of the path. It does not call ``walk_component()`` that last time. Handling that final component remains for the caller to sort out. Those callers are -``path_lookupat()``, ``path_parentat()``, ``path_mountpoint()`` and +``path_lookupat()``, ``path_parentat()`` and ``path_openat()`` each of which handles the differing requirements of different system calls. @@ -488,12 +488,10 @@ perform their operation. object is wanted such as by ``stat()`` or ``chmod()``. It essentially just calls ``walk_component()`` on the final component through a call to ``lookup_last()``. ``path_lookupat()`` returns just the final dentry. - -``path_mountpoint()`` handles the special case of unmounting which must -not try to revalidate the mounted filesystem. It effectively -contains, through a call to ``mountpoint_last()``, an alternate -implementation of ``lookup_slow()`` which skips that step. This is -important when unmounting a filesystem that is inaccessible, such as +It is worth noting that when flag ``LOOKUP_MOUNTPOINT`` is set, +``path_lookupat()`` will unset LOOKUP_JUMPED in nameidata so that in the further +path traversal ``d_weak_revalidate()`` won't be called. +This is important when unmounting a filesystem that is inaccessible, such as one provided by a dead NFS server. Finally ``path_openat()`` is used for the ``open()`` system call; it -- 2.30.2
[PATCH v2 00/12] docs: path-lookup: Update pathlookup docs
The Path lookup is a very complex subject in VFS. The path-lookup document provides a very detailed guidance to help people understand how path lookup works in the kernel. This document was originally written based on three lwn articles five years ago. As times goes by, some of the content is outdated. This patchset is intended to update the document to make it more relevant to current codebase. --- v1: https://lore.kernel.org/lkml/20210126072443.33066-1-foxhlc...@gmail.com/ v2: - Fix problems in v1 reviewed by Neil: 1. In Patch 01 and 02 rewrite a new paragrah to describe step_into() 2. In Patch 01 instead of changing it to traverse_mounts, remove follow_managed() 3. In Patch 03 re-telling the story rather than adding notes 4. In Patch 04 do_open() should be outside of loop, fix it and fix other problems in following paragrah 5. In Patch 07 use "drop out of RCU-walk" 6. In Patch 08 "latter" should be "later", fix it and restructure the next paragrah removing "Finally" To help review, I've put a compiled html version here: http://linux-docs.54fox.com/linux_docs/filesystems/path-lookup-v2.html Fox Chen (12): docs: path-lookup: update follow_managed() part docs: path-lookup: update path_to_nameidata() part docs: path-lookup: update path_mountpoint() part docs: path-lookup: update do_last() part docs: path-lookup: remove filename_mountpoint docs: path-lookup: Add macro name to symlink limit description docs: path-lookup: i_op->follow_link replaced with i_op->get_link docs: path-lookup: update i_op->put_link and cookie description docs: path-lookup: no get_link() docs: path-lookup: update WALK_GET, WALK_PUT desc docs: path-lookup: update get_link() ->follow_link description docs: path-lookup: update symlink description Documentation/filesystems/path-lookup.rst | 164 ++ 1 file changed, 71 insertions(+), 93 deletions(-) -- 2.30.2
Re: [PATCH 5.10 113/290] net: dsa: implement a central TX reallocation procedure
On Mon, Mar 15, 2021 at 07:56:02PM +, Vladimir Oltean wrote: > +Andrew, Vivien, > > On Mon, Mar 15, 2021 at 02:53:26PM +0100, gre...@linuxfoundation.org wrote: > > From: Greg Kroah-Hartman > > > > From: Vladimir Oltean > > > > [ Upstream commit a3b0b6479700a5b0af2c631cb2ec0fb7a0d978f2 ] > > > > At the moment, taggers are left with the task of ensuring that the skb > > headers are writable (which they aren't, if the frames were cloned for > > TX timestamping, for flooding by the bridge, etc), and that there is > > enough space in the skb data area for the DSA tag to be pushed. > > > > Moreover, the life of tail taggers is even harder, because they need to > > ensure that short frames have enough padding, a problem that normal > > taggers don't have. > > > > The principle of the DSA framework is that everything except for the > > most intimate hardware specifics (like in this case, the actual packing > > of the DSA tag bits) should be done inside the core, to avoid having > > code paths that are very rarely tested. > > > > So provide a TX reallocation procedure that should cover the known needs > > of DSA today. > > > > Note that this patch also gives the network stack a good hint about the > > headroom/tailroom it's going to need. Up till now it wasn't doing that. > > So the reallocation procedure should really be there only for the > > exceptional cases, and for cloned packets which need to be unshared. > > > > Signed-off-by: Vladimir Oltean > > Tested-by: Christian Eggers # For tail taggers only > > Tested-by: Kurt Kanzenbach > > Reviewed-by: Florian Fainelli > > Signed-off-by: Jakub Kicinski > > Signed-off-by: Sasha Levin > > --- > > For context, Sasha explains here: > https://www.spinics.net/lists/stable-commits/msg190151.html > (the conversation is somewhat truncated, unfortunately, because > stable-comm...@vger.kernel.org ate my replies) > that 13 patches were backported to get the unrelated commit 9200f515c41f > ("net: dsa: tag_mtk: fix 802.1ad VLAN egress") to apply cleanly with git-am. > > I am not strictly against this, even though I would have liked to know > that the maintainers were explicitly informed about it. > > Greg, could you make your stable backporting emails include the output > of ./get_maintainer.pl into the list of recipients? Thanks. I cc: everyone on the signed-off-by list on the patch, why would we need to add more? A maintainer should always be on that list automatically. thanks, greg k-h
Re: [PATCH 4/6] clk: actions: Fix AHPPREDIV-H-AHB clock chain on Owl S500 SoC
On Mon, Mar 08, 2021 at 07:18:29PM +0200, Cristian Ciocaltea wrote: > There are a few issues with the setup of the Actions Semi Owl S500 SoC's > clock chain involving AHPPREDIV, H and AHB clocks: > > * AHBPREDIV clock is defined as a muxer only, although it also acts as > a divider. > * H clock is defined as a standard divider, although the raw value zero > is not supported. What do you mean by not supported? The datasheet lists "0" as the valid divisor value for divide by 1. Rest looks good to me. Thanks, Mani > * AHB is defined as a multi-rate factor clock, but it is actually just > a fixed pass clock. > > Let's provide the following fixes: > > * Change AHBPREDIV clock to an ungated OWL_COMP_DIV definition. > * Add a clock div table 'h_div_table' for the H clock to drop the > unsupported 0 rate and use the correct register shift value in the > OWL_DIVIDER definition. > * Drop the unneeded 'ahb_factor_table[]' and change AHB clock to an > ungated OWL_COMP_FIXED_FACTOR definition. > > Fixes: ed6b4795ece4 ("clk: actions: Add clock driver for S500 SoC") > Signed-off-by: Cristian Ciocaltea > --- > drivers/clk/actions/owl-s500.c | 20 ++-- > 1 file changed, 14 insertions(+), 6 deletions(-) > > diff --git a/drivers/clk/actions/owl-s500.c b/drivers/clk/actions/owl-s500.c > index abe8874353de..b9e434173b4f 100644 > --- a/drivers/clk/actions/owl-s500.c > +++ b/drivers/clk/actions/owl-s500.c > @@ -151,9 +151,9 @@ static struct clk_factor_table hde_factor_table[] = { > { 0, 0, 0 }, > }; > > -static struct clk_factor_table ahb_factor_table[] = { > - { 1, 1, 2 }, { 2, 1, 3 }, > - { 0, 0, 0 }, > +static struct clk_div_table h_div_table[] = { > + { 1, 2 }, { 2, 3 }, { 3, 4 }, > + { 0, 0 }, > }; > > static struct clk_div_table rmii_ref_div_table[] = { > @@ -184,7 +184,6 @@ static struct clk_div_table nand_div_table[] = { > > /* mux clock */ > static OWL_MUX(dev_clk, "dev_clk", dev_clk_mux_p, CMU_DEVPLL, 12, 1, > CLK_SET_RATE_PARENT); > -static OWL_MUX(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, > CMU_BUSCLK1, 8, 3, CLK_SET_RATE_PARENT); > > /* gate clocks */ > static OWL_GATE(gpio_clk, "gpio_clk", "apb_clk", CMU_DEVCLKEN0, 18, 0, 0); > @@ -197,16 +196,25 @@ static OWL_GATE(timer_clk, "timer_clk", "hosc", > CMU_DEVCLKEN1, 27, 0, 0); > static OWL_GATE(hdmi_clk, "hdmi_clk", "hosc", CMU_DEVCLKEN1, 3, 0, 0); > > /* divider clocks */ > -static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 12, 2, > NULL, 0, 0); > +static OWL_DIVIDER(h_clk, "h_clk", "ahbprediv_clk", CMU_BUSCLK1, 2, 2, > h_div_table, 0, 0); > static OWL_DIVIDER(apb_clk, "apb_clk", "ahb_clk", CMU_BUSCLK1, 14, 2, NULL, > 0, 0); > static OWL_DIVIDER(rmii_ref_clk, "rmii_ref_clk", "ethernet_pll_clk", > CMU_ETHERNETPLL, 1, 1, rmii_ref_div_table, 0, 0); > > /* factor clocks */ > -static OWL_FACTOR(ahb_clk, "ahb_clk", "h_clk", CMU_BUSCLK1, 2, 2, > ahb_factor_table, 0, 0); > static OWL_FACTOR(de1_clk, "de_clk1", "de_clk", CMU_DECLK, 0, 4, > de_factor_table, 0, 0); > static OWL_FACTOR(de2_clk, "de_clk2", "de_clk", CMU_DECLK, 4, 4, > de_factor_table, 0, 0); > > /* composite clocks */ > +static OWL_COMP_DIV(ahbprediv_clk, "ahbprediv_clk", ahbprediv_clk_mux_p, > + OWL_MUX_HW(CMU_BUSCLK1, 8, 3), > + { 0 }, > + OWL_DIVIDER_HW(CMU_BUSCLK1, 12, 2, 0, NULL), > + 0); > + > +static OWL_COMP_FIXED_FACTOR(ahb_clk, "ahb_clk", "h_clk", > + { 0 }, > + 1, 1, CLK_SET_RATE_PARENT); > + > static OWL_COMP_FACTOR(vce_clk, "vce_clk", hde_clk_mux_p, > OWL_MUX_HW(CMU_VCECLK, 4, 2), > OWL_GATE_HW(CMU_DEVCLKEN0, 26, 0), > -- > 2.30.1 >
[PATCH V2] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c
s/kernal/kernel/ A punctuation added too. Signed-off-by: Bhaskar Chowdhury --- Changes from V1: Punctuation missed, added as per Randy's finding arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 731dd8d0dbb1..1ba93c40fc54 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -594,7 +594,7 @@ static __init int bts_init(void) * we cannot use the user mapping since it will not be available * if we're not running the owning process. * -* With PTI we can't use the kernal map either, because its not +* With PTI we can't use the kernel map either, because it's not * there when we run userspace. * * For now, disable this driver when using PTI. -- 2.30.2
Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files
On 16-03-21, 00:36, Frank Rowand wrote: > I should have looked at patch 3/5 more carefully instead of counting on > Masahiro to check it out and simply build testing. > > Patch 3/5 does not seem correct. I'll look over all the makefile related > changes that have been accepted (if any) and patch 3/5 later today (Tuesday). What is already merged by Rob is what everyone reviewed and it wasn't related to this .dtso/.dtbo discussion we are having. I will resend a new patchset with the stuff left for merging (which will involve the thing we are discussing here), so we can get a clear picture of it. -- viresh
Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files
On 3/15/21 5:12 PM, Laurent Pinchart wrote: > Hi Yamada-san, > > On Tue, Mar 16, 2021 at 02:43:45AM +0900, Masahiro Yamada wrote: >> On Mon, Mar 15, 2021 at 3:40 PM Viresh Kumar wrote: >>> On 14-03-21, 20:16, Frank Rowand wrote: On 3/12/21 11:11 PM, Frank Rowand wrote: > On 3/12/21 1:13 AM, Viresh Kumar wrote: >> On 12-03-21, 01:09, Frank Rowand wrote: >>> I suggested having the .dtso files include the .dts file because that >>> is a relatively >>> small and easy change to test. What would probably make more sense is >>> the rename >>> the existing overlay .dts files to be .dtso files and then for each >>> overlay .dtso >>> file create a new .dts file that #includes the corresponding .dtso >>> file. This is >>> more work and churn, but easier to document that the .dts files are a >>> hack that is >>> needed so that the corresponding .dtb.S files will be generated. >> >> What about creating links instead then ? >> > > I don't really like the idea of using links here. > > Maybe it is best to make the changes needed to allow the unittest > overlays to be .dtso instead of .dts. > > Off the top of my head: > > scripts/Makefile.lib: > The rule for %.dtb.S invokes cmd_dt_S_dtb, which puts the > overlay data in section .dtb.init.rodata, with a label > pointing to the beginning of the overlay __dtb_XXX_begin and > a label pointing to the end of the overlay __dtb_XXX_end, > for the overlay named XXX. I _think_ that you could simply > add a corresponding rule for %.dtbo.S using a new command > cmd_dt_S_dtbo (the same as cmd_dt_S_dtb, except use labels > __dtbo_XXX_begin and __dtbo_XXX_end). If you do the above, please put it in drivers/of/unittest-data/Makefile instead of scripts/Makefile.lib because it is unittest.c specific and not meant to be anywhere else in the kernel. >>> >>> What about doing this then in unittest's Makefile instead (which I >>> already suggested earlier), that will make everything work just fine >>> without any other changes ? >>> >>> +# Required for of unittest files as they can't be renamed to .dtso >>> +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE >>> + $(call if_changed_dep,dtc) >>> >> >> If those rules are only needed by drivers/of/unittest-data/Makefile, >> they should not be located in scripts/Makefile.lib. >> >> But how can we fix drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a779*.dts >> if these are doing bad things. >> They seem to be overlay files even though the file name suffix is .dts > > That is correct, they are overlays. I have no issue with those files > being renamed to .dtso if that can help (but I haven't checked if that > would have any adverse effect on the R-Car DU driver). As Laurent replied, yes these are overlays. They were grandfathered in as a deprecated use of overlays. > > These files are there to ensure backward compatibility with older DT > bindings. The change was made 3 years ago and I wouldn't object to > dropping this completely, but I understand I may not be the most > cautious person when it comes to ensuring DT backward compatibility :-) My memory is that the goal was to eventually remove these overlays at some point in the future. If everyone agrees that today is the proper time, it would be helpful to go ahead and remove these .dts files and the code that uses them. -Frank > >> $ find drivers -name '*.dts' >> drivers/staging/pi433/Documentation/devicetree/pi433-overlay.dts >> drivers/staging/mt7621-dts/gbpc2.dts >> drivers/staging/mt7621-dts/gbpc1.dts >> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts >> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts >> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts >> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts >> drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts >> drivers/of/unittest-data/overlay_1.dts >> drivers/of/unittest-data/testcases.dts >> drivers/of/unittest-data/overlay_bad_add_dup_node.dts >> drivers/of/unittest-data/overlay_bad_symbol.dts >> drivers/of/unittest-data/overlay_0.dts >> drivers/of/unittest-data/overlay_11.dts >> drivers/of/unittest-data/overlay_gpio_03.dts >> drivers/of/unittest-data/overlay_gpio_04a.dts >> drivers/of/unittest-data/overlay_gpio_04b.dts >> drivers/of/unittest-data/overlay_5.dts >> drivers/of/unittest-data/overlay_bad_add_dup_prop.dts >> drivers/of/unittest-data/overlay_gpio_01.dts >> drivers/of/unittest-data/overlay_10.dts >> drivers/of/unittest-data/overlay_7.dts >> drivers/of/unittest-data/overlay_bad_phandle.dts >> drivers/of/unittest-data/overlay_3.dts >> drivers/of/unittest-data/overlay_6.dts >> drivers/of/unittest-data/overlay_8.dts >> drivers/of/unittest-data/overlay_12.dts >> drivers/of/unittest-data/overlay_gpio_02a.dts >> drivers/of/unittest-data/overlay_gpio_02b.dts >> drivers/of/unittest-data/overlay_4.dts >> drivers/of/unittest
Re: [PATCH v1 3/5] dt-bindings: arm: Add cpu-idle-states to Tegra194 CPU nodes
+Lorenzo Hi Sowjanya, Sorry for the delayed response. I am still in vacation 😉 On Thu, Mar 11, 2021 at 01:11:37PM -0800, Sowjanya Komatineni wrote: > > On 3/10/21 6:52 PM, Sudeep Holla wrote: > > On Mon, Mar 08, 2021 at 10:32:17AM -0800, Sowjanya Komatineni wrote: > > > On 3/7/21 8:37 PM, Sudeep Holla wrote: > > > > On Wed, Mar 03, 2021 at 10:08:10PM -0800, Sowjanya Komatineni wrote: > > > > > This patch adds cpu-idle-states and corresponding state nodes to > > > > > Tegra194 CPU in dt-binding document > > > > > > > > > I see that this platform has PSCI support. Can you care to explain why > > > > you need additional DT bindings and driver for PSCI based CPU suspend. > > > > Until the reasons are convincing, consider NACK from my side for this > > > > driver and DT bindings. You should be really using those bindings and > > > > the driver may be with minor changes there. > > > > > > > MCE firmware is in charge of state transition for Tegra194 carmel CPUs. > > > > > Sure, but I assume only TF-A talks to MCE and not any OSPM/Linux kernel. > > No. Tegra194 CPU idle driver works with MCE firmware running in background > so cpuidle kernel driver also talks to MCE firmware directly on state > information. If that is the case I wouldn't term this as PSCI compliant firmware and wouldn't attempt to use PSCI CPU idle driver. Now if we would what to allow non-PSCI idle driver for Arm64 is entirely different question that deserves a separate discussion IMO. > > > > > For run-time state transitions, need to provide state request along with > > > its > > > residency time to MCE firmware which is running in the background. > > > > > Sounds similar to x86 mwait, perhaps we need to extend PSCI if we need > > to make this firmware PSCI compliant or just say it is not and implement > > completely independent implementation. I am not saying that is acceptable > > ATM but I prefer not to mix some implementation to make it look like > > PSCI compliant. > > > > > State min residency is updated into power_state value along with state id > > > that is passed to psci_cpu_suspend_enter > > > > > Sounds like a hack/workaround. I would prefer to standardise that. IIUC > > the power_state is more static and derived from DT. I don't like to > > overload that TBH. Need to check with authors of that binding. > > Passing state idle time to ATF along with state to enter is Tegra specific > as ATF firmware updates idle time to Tegra MCE firmware which will be used > for deciding on state transition along with other information and background > load. > So far we don't have any platform specific PSCI in OSPM and I prefer to keep it that way. > Not sure if this need to be standardized but will try to find alternate way > to update idle time without misusing power-state value. > Sure, we can always review and see if any alternatives are acceptable, but I am bit nervous to tie this as PSCI if it is not strictly spec compliant. > Will discuss on this internally and get back. > Thanks. > > > > > Also states cross-over idle times need to be provided to MCE firmware. > > > > > New requirements if this has to be PSCI compliant. > > Updating cross-over idle times from DT to MCE firmware directly from cpuidle > kernel driver with corresponding MCE ARI commands is again Tegra specific. > So all there are platform specific but static information you need from DT ? If so, what can't it be made part of TF-A and OSPM can avoid interfering with that info completely. My understanding was that OSPM provides runtime hints like x86 mwait. If that's not the case, I am failing to understand the need for OSPM to pass such static information from DT to the firmware. Why can't that be just part of the firmware to begin with ? > > > > > MCE firmware decides on state transition based on these inputs along with > > > its background work load. > > > What do you mean by this *"background work load"* ? -- Regards, Sudeep
Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files
On 3/15/21 1:40 AM, Viresh Kumar wrote: > On 14-03-21, 20:16, Frank Rowand wrote: >> On 3/12/21 11:11 PM, Frank Rowand wrote: >>> On 3/12/21 1:13 AM, Viresh Kumar wrote: On 12-03-21, 01:09, Frank Rowand wrote: > I suggested having the .dtso files include the .dts file because that is > a relatively > small and easy change to test. What would probably make more sense is > the rename > the existing overlay .dts files to be .dtso files and then for each > overlay .dtso > file create a new .dts file that #includes the corresponding .dtso file. > This is > more work and churn, but easier to document that the .dts files are a > hack that is > needed so that the corresponding .dtb.S files will be generated. What about creating links instead then ? >>> >>> I don't really like the idea of using links here. >>> >>> Maybe it is best to make the changes needed to allow the unittest >>> overlays to be .dtso instead of .dts. >>> >>> Off the top of my head: >>> >>> scripts/Makefile.lib: >>> The rule for %.dtb.S invokes cmd_dt_S_dtb, which puts the >>> overlay data in section .dtb.init.rodata, with a label >>> pointing to the beginning of the overlay __dtb_XXX_begin and >>> a label pointing to the end of the overlay __dtb_XXX_end, >>> for the overlay named XXX. I _think_ that you could simply >>> add a corresponding rule for %.dtbo.S using a new command >>> cmd_dt_S_dtbo (the same as cmd_dt_S_dtb, except use labels >>> __dtbo_XXX_begin and __dtbo_XXX_end). >> >> If you do the above, please put it in drivers/of/unittest-data/Makefile >> instead of scripts/Makefile.lib because it is unittest.c specific and >> not meant to be anywhere else in the kernel. > > What about doing this then in unittest's Makefile instead (which I > already suggested earlier), that will make everything work just fine > without any other changes ? > > +# Required for of unittest files as they can't be renamed to .dtso > +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE > + $(call if_changed_dep,dtc) > I should have looked at patch 3/5 more carefully instead of counting on Masahiro to check it out and simply build testing. Patch 3/5 does not seem correct. I'll look over all the makefile related changes that have been accepted (if any) and patch 3/5 later today (Tuesday). -Frank
Re: [PATCH] dlm: Mundane typo fixes throughout the file lock.c
On 3/15/21 10:27 PM, Bhaskar Chowdhury wrote: > > Trivial typo fixes throughout the file. > > cc: triv...@vger.kernel.org > > Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap > --- > fs/dlm/lock.c | 8 > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c > index 002123efc6b0..caadc426c8b4 100644 > --- a/fs/dlm/lock.c > +++ b/fs/dlm/lock.c > @@ -91,7 +91,7 @@ static void del_timeout(struct dlm_lkb *lkb); > static void toss_rsb(struct kref *kref); > > /* > - * Lock compatibilty matrix - thanks Steve > + * Lock compatibility matrix - thanks Steve > * UN = Unlocked state. Not really a state, used as a flag > * PD = Padding. Used to make the matrix a nice power of two in size > * Other states are the same as the VMS DLM. > @@ -1535,7 +1535,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, > int mstype, > return -1; > } > > - /* Remove for the convert reply, and premptively remove for the > + /* Remove for the convert reply, and preemptively remove for the > cancel reply. A convert has been granted while there's still > an outstanding cancel on it (the cancel is moot and the result > in the cancel reply should be 0). We preempt the cancel reply > @@ -2357,14 +2357,14 @@ static int _can_be_granted(struct dlm_rsb *r, struct > dlm_lkb *lkb, int now, >* 6-5: But the default algorithm for deciding whether to grant or >* queue conversion requests does not by itself guarantee that such >* requests are serviced on a "first come first serve" basis. This, in > - * turn, can lead to a phenomenon known as "indefinate postponement". > + * turn, can lead to a phenomenon known as "indefinite postponement". >* >* 6-7: This issue is dealt with by using the optional QUECVT flag with >* the system service employed to request a lock conversion. This flag >* forces certain conversion requests to be queued, even if they are >* compatible with the granted modes of other locks on the same >* resource. Thus, the use of this flag results in conversion requests > - * being ordered on a "first come first servce" basis. > + * being ordered on a "first come first serve" basis. >* >* DCT: This condition is all about new conversions being able to occur >* "in place" while the lock remains on the granted queue (assuming > -- -- ~Randy
Re: [PATCH] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c
On 22:19 Mon 15 Mar 2021, Randy Dunlap wrote: On 3/15/21 9:19 PM, Bhaskar Chowdhury wrote: s/kernal/kernel/ Signed-off-by: Bhaskar Chowdhury --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 731dd8d0dbb1..6320d2cfd9d3 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -594,7 +594,7 @@ static __init int bts_init(void) * we cannot use the user mapping since it will not be available * if we're not running the owning process. * -* With PTI we can't use the kernal map either, because its not +* With PTI we can't use the kernel map either, because its not it's Heck! * there when we run userspace. * * For now, disable this driver when using PTI. -- -- ~Randy signature.asc Description: PGP signature
Re: [PATCH] bpf: selftests: remove unused 'nospace_err' in tests for batched ops in array maps
Hello: This patch was applied to bpf/bpf-next.git (refs/heads/master): On Mon, 15 Mar 2021 14:29:51 +0100 you wrote: > This seems to be a reminiscent from the hashmap tests. > > Signed-off-by: Pedro Tammela > --- > tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c | 5 - > 1 file changed, 5 deletions(-) Here is the summary with links: - bpf: selftests: remove unused 'nospace_err' in tests for batched ops in array maps https://git.kernel.org/bpf/bpf-next/c/23f50b5ac331 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
Re: [PATCH 5.4 000/168] 5.4.106-rc1 review
On Mon, 15 Mar 2021 at 19:35, wrote: > > From: Greg Kroah-Hartman > > This is the start of the stable review cycle for the 5.4.106 release. > There are 168 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Wed, 17 Mar 2021 13:55:26 +. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.4.106-rc1.gz > or in the git tree and branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > linux-5.4.y > and the diffstat can be found below. > > thanks, > > greg k-h Results from Linaro’s test farm. No regressions on arm64, arm, x86_64, and i386. Tested-by: Linux Kernel Functional Testing Summary kernel: 5.4.106-rc1 git repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git git branch: linux-5.4.y git commit: 26ba2df2641dff3b9583fc4d1fbdc668bd346f00 git describe: v5.4.105-169-g26ba2df2641d Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.4.y/build/v5.4.105-169-g26ba2df2641d No regressions (compared to build v5.4.105) No fixes (compared to build v5.4.105) Ran 50881 total tests in the following environments and test suites. Environments -- - arc - arm - arm64 - dragonboard-410c - hi6220-hikey - i386 - juno-r2 - juno-r2-compat - juno-r2-kasan - mips - nxp-ls2088 - nxp-ls2088-64k_page_size - parisc - powerpc - qemu-arm-clang - qemu-arm64-clang - qemu-arm64-kasan - qemu-x86_64-clang - qemu-x86_64-kasan - qemu-x86_64-kcsan - qemu_arm - qemu_arm64 - qemu_arm64-compat - qemu_i386 - qemu_x86_64 - qemu_x86_64-compat - riscv - s390 - sh - sparc - x15 - x86 - x86-kasan - x86_64 Test Suites --- * build * linux-log-parser * install-android-platform-tools-r2600 * kselftest- * kselftest-android * kselftest-bpf * kselftest-capabilities * kselftest-cgroup * kselftest-clone3 * kselftest-core * kselftest-cpu-hotplug * kselftest-cpufreq * kselftest-efivarfs * kselftest-filesystems * kselftest-firmware * kselftest-fpu * kselftest-futex * kselftest-gpio * kselftest-intel_pstate * kselftest-ipc * kselftest-ir * kselftest-kcmp * kselftest-kvm * kselftest-lib * kselftest-livepatch * kselftest-lkdtm * kselftest-membarrier * kselftest-memfd * kselftest-memory-hotplug * kselftest-mincore * kselftest-mount * kselftest-mqueue * kselftest-openat2 * kselftest-pid_namespace * kselftest-pidfd * kselftest-proc * kselftest-pstore * kselftest-ptrace * kselftest-rseq * kselftest-rtc * kselftest-seccomp * kselftest-sigaltstack * kselftest-size * kselftest-splice * kselftest-static_keys * kselftest-sync * kselftest-sysctl * kselftest-timens * kselftest-timers * kselftest-tmpfs * kselftest-tpm2 * kselftest-user * kselftest-zram * libhugetlbfs * ltp-controllers-tests * ltp-dio-tests * ltp-fcntl-locktests-tests * ltp-filecaps-tests * ltp-fs-tests * ltp-fs_bind-tests * ltp-fs_perms_simple-tests * ltp-fsx-tests * ltp-io-tests * ltp-nptl-tests * ltp-pty-tests * ltp-sched-tests * ltp-securebits-tests * ltp-tracing-tests * perf * v4l2-compliance * fwts * kselftest-net * kselftest-netfilter * kselftest-nsfs * kselftest-tc-testing * kvm-unit-tests * ltp-cap_bounds-tests * ltp-commands-tests * ltp-containers-tests * ltp-cpuhotplug-tests * ltp-crypto-tests * ltp-cve-tests * ltp-hugetlb-tests * ltp-ipc-tests * ltp-math-tests * ltp-mm-tests * network-basic-tests * kselftest-kexec * kselftest-vm * kselftest-x86 * ltp-cap_bounds-test[ * ltp-open-posix-tests * ltp-syscalls-tests * rcutorture * ssuite -- Linaro LKFT https://lkft.linaro.org
[PATCH] dlm: Mundane typo fixes throughout the file lock.c
Trivial typo fixes throughout the file. cc: triv...@vger.kernel.org Signed-off-by: Bhaskar Chowdhury --- fs/dlm/lock.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 002123efc6b0..caadc426c8b4 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -91,7 +91,7 @@ static void del_timeout(struct dlm_lkb *lkb); static void toss_rsb(struct kref *kref); /* - * Lock compatibilty matrix - thanks Steve + * Lock compatibility matrix - thanks Steve * UN = Unlocked state. Not really a state, used as a flag * PD = Padding. Used to make the matrix a nice power of two in size * Other states are the same as the VMS DLM. @@ -1535,7 +1535,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype, return -1; } - /* Remove for the convert reply, and premptively remove for the + /* Remove for the convert reply, and preemptively remove for the cancel reply. A convert has been granted while there's still an outstanding cancel on it (the cancel is moot and the result in the cancel reply should be 0). We preempt the cancel reply @@ -2357,14 +2357,14 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now, * 6-5: But the default algorithm for deciding whether to grant or * queue conversion requests does not by itself guarantee that such * requests are serviced on a "first come first serve" basis. This, in -* turn, can lead to a phenomenon known as "indefinate postponement". +* turn, can lead to a phenomenon known as "indefinite postponement". * * 6-7: This issue is dealt with by using the optional QUECVT flag with * the system service employed to request a lock conversion. This flag * forces certain conversion requests to be queued, even if they are * compatible with the granted modes of other locks on the same * resource. Thus, the use of this flag results in conversion requests -* being ordered on a "first come first servce" basis. +* being ordered on a "first come first serve" basis. * * DCT: This condition is all about new conversions being able to occur * "in place" while the lock remains on the granted queue (assuming -- 2.30.2
Re: [PATCH V11 3/5] kbuild: Allow .dtso format for overlay source files
On 16-03-21, 02:43, Masahiro Yamada wrote: > On Mon, Mar 15, 2021 at 3:40 PM Viresh Kumar wrote: > > On 14-03-21, 20:16, Frank Rowand wrote: > > What about doing this then in unittest's Makefile instead (which I > > already suggested earlier), that will make everything work just fine > > without any other changes ? > > > > +# Required for of unittest files as they can't be renamed to .dtso > > +$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE > > + $(call if_changed_dep,dtc) > > If those rules are only needed by drivers/of/unittest-data/Makefile, > they should not be located in scripts/Makefile.lib. Right, this is exactly what I suggested. > But how can we fix drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a779*.dts > if these are doing bad things. > They seem to be overlay files even though the file name suffix is .dts > > $ find drivers -name '*.dts' > drivers/staging/pi433/Documentation/devicetree/pi433-overlay.dts > drivers/staging/mt7621-dts/gbpc2.dts > drivers/staging/mt7621-dts/gbpc1.dts > drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7791.dts > drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7795.dts > drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7796.dts > drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7793.dts > drivers/gpu/drm/rcar-du/rcar_du_of_lvds_r8a7790.dts For all the above files, even if they are really overlay files, we won't use fdtoverlay tool to apply them to some base dtb and so if we leave them as is, i.e. .dts->.dtb, it won't break anything. The problem only happens if someone wants to generate .dtbo for them instead and then they should be named .dtso as we won't allow .dts -> .dtbo conversion there. -- viresh
Re: [PATCH v2 16/27] perf evlist: Warn as events from different hybrid PMUs in a group
Hi Jiri, On 3/16/2021 7:03 AM, Jiri Olsa wrote: On Thu, Mar 11, 2021 at 03:07:31PM +0800, Jin Yao wrote: SNIP goto try_again; } + + if (errno == EINVAL && perf_pmu__hybrid_exist()) + evlist__warn_hybrid_group(evlist); rc = -errno; evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7a732508b2b4..6f780a039db0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -239,6 +239,9 @@ static void evlist__check_cpu_maps(struct evlist *evlist) struct evsel *evsel, *pos, *leader; char buf[1024]; + if (evlist__hybrid_exist(evlist)) + return; this should be in separate patch and explained Now I have another idea. If a group consists of atom events and core events, we still follow current disabling group solution? I mean removing following code: if (evlist__hybrid_exist(evlist)) return; evlist__check_cpu_maps then continues running and disabling the group. But also report with a warning that says "WARNING: Group has events from different hybrid PMUs". Do you like this way? + evlist__for_each_entry(evlist, evsel) { leader = evsel->leader; @@ -726,6 +729,10 @@ enum counter_recovery { static enum counter_recovery stat_handle_error(struct evsel *counter) { char msg[BUFSIZ]; + + if (perf_pmu__hybrid_exist() && errno == EINVAL) + evlist__warn_hybrid_group(evsel_list); + /* * PPC returns ENXIO for HW counters until 2.6.37 * (behavior changed with commit b0a873e). diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f139151b9433..5ec891418cdd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -2224,3 +2224,47 @@ void evlist__invalidate_all_cpus(struct evlist *evlist) perf_cpu_map__put(evlist->core.all_cpus); evlist->core.all_cpus = perf_cpu_map__empty_new(1); } + +static bool group_hybrid_conflict(struct evsel *leader) +{ + struct evsel *pos, *prev = NULL; + + for_each_group_evsel(pos, leader) { + if (!pos->pmu_name || !perf_pmu__is_hybrid(pos->pmu_name)) + continue; + + if (prev && strcmp(prev->pmu_name, pos->pmu_name)) + return true; + + prev = pos; + } + + return false; +} + +void evlist__warn_hybrid_group(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_group_leader(evsel) && + evsel->core.nr_members > 1 && hm, could we just iterate all the members and make sure the first found hybrid event's pmu matches the pmu of the rest hybrid events in the list? '{cpu_core/event1/,cpu_core/event2/}','{cpu_atom/event3/,cpu_atom/event4/}' Two or more groups need to be supported. We get the first hybrid event's pmu (cpu_core in this example) but it doesn't match the cpu_atom/event3/ and cpu_atom/event4/. But actually this case should be supported, right? + group_hybrid_conflict(evsel)) { + WARN_ONCE(1, "WARNING: Group has events from " +"different hybrid PMUs\n"); + return; + } + } +} + +bool evlist__hybrid_exist(struct evlist *evlist) evlist__has_hybrid seems better Yes, agree. Thanks Jin Yao jirka
Re: [PATCHv2 0/3] Add UVC 1.5 Region Of Interest control to uvcvideo
On (21/02/08 14:17), Sergey Senozhatsky wrote: > Hello, > > RFC Hi Laurent, Gentle ping. -ss
Re: [PATCH] fs: Trivial typo fix in the file coredump.c
On 3/15/21 10:03 PM, Bhaskar Chowdhury wrote: > > s/postion/position/ > > Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap > --- > Al, I hope this time I read the comments well enough,if still > I am at fault , curse me! > > fs/coredump.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/fs/coredump.c b/fs/coredump.c > index 1c0fdc1aa70b..3ecae122ffd9 100644 > --- a/fs/coredump.c > +++ b/fs/coredump.c > @@ -923,7 +923,7 @@ EXPORT_SYMBOL(dump_align); > > /* > * Ensures that file size is big enough to contain the current file > - * postion. This prevents gdb from complaining about a truncated file > + * position. This prevents gdb from complaining about a truncated file > * if the last "write" to the file was dump_skip. > */ > void dump_truncate(struct coredump_params *cprm) > -- -- ~Randy
[PATCH v10] sched/fair: select idle cpu from idle cpumask for task wakeup
From: Aubrey Li Add idle cpumask to track idle cpus in sched domain. Every time a CPU enters idle, the CPU is set in idle cpumask to be a wakeup target. And if the CPU is not in idle, the CPU is cleared in idle cpumask during scheduler tick to ratelimit idle cpumask update. When a task wakes up to select an idle cpu, scanning idle cpumask has lower cost than scanning all the cpus in last level cache domain, especially when the system is heavily loaded. v9->v10: - Update scan cost only when the idle cpumask is scanned, i.e, the idle cpumask is not empty v8->v9: - rebase on top of tip/sched/core, no code change v7->v8: - refine update_idle_cpumask, no functionality change - fix a suspicious RCU usage warning with CONFIG_PROVE_RCU=y v6->v7: - place the whole idle cpumask mechanism under CONFIG_SMP v5->v6: - decouple idle cpumask update from stop_tick signal, set idle CPU in idle cpumask every time the CPU enters idle v4->v5: - add update_idle_cpumask for s2idle case - keep the same ordering of tick_nohz_idle_stop_tick() and update_ idle_cpumask() everywhere v3->v4: - change setting idle cpumask from every idle entry to tickless idle if cpu driver is available - move clearing idle cpumask to scheduler_tick to decouple nohz mode v2->v3: - change setting idle cpumask to every idle entry, otherwise schbench has a regression of 99th percentile latency - change clearing idle cpumask to nohz_balancer_kick(), so updating idle cpumask is ratelimited in the idle exiting path - set SCHED_IDLE cpu in idle cpumask to allow it as a wakeup target v1->v2: - idle cpumask is updated in the nohz routines, by initializing idle cpumask with sched_domain_span(sd), nohz=off case remains the original behavior Cc: Peter Zijlstra Cc: Mel Gorman Cc: Vincent Guittot Cc: Qais Yousef Cc: Valentin Schneider Cc: Jiang Biao Cc: Tim Chen Signed-off-by: Aubrey Li --- include/linux/sched/topology.h | 13 kernel/sched/core.c| 2 ++ kernel/sched/fair.c| 47 -- kernel/sched/idle.c| 5 + kernel/sched/sched.h | 4 kernel/sched/topology.c| 3 ++- 6 files changed, 71 insertions(+), 3 deletions(-) diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8f0f778..905e382 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -74,8 +74,21 @@ struct sched_domain_shared { atomic_tref; atomic_tnr_busy_cpus; int has_idle_cores; + /* +* Span of all idle CPUs in this domain. +* +* NOTE: this field is variable length. (Allocated dynamically +* by attaching extra space to the end of the structure, +* depending on how many CPUs the kernel has booted up with) +*/ + unsigned long idle_cpus_span[]; }; +static inline struct cpumask *sds_idle_cpus(struct sched_domain_shared *sds) +{ + return to_cpumask(sds->idle_cpus_span); +} + struct sched_domain { /* These fields must be setup */ struct sched_domain __rcu *parent; /* top domain must be null terminated */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca2bb62..310bf9a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4552,6 +4552,7 @@ void scheduler_tick(void) #ifdef CONFIG_SMP rq->idle_balance = idle_cpu(cpu); + update_idle_cpumask(cpu, rq->idle_balance); trigger_load_balance(rq); #endif } @@ -8209,6 +8210,7 @@ void __init sched_init(void) rq->idle_stamp = 0; rq->avg_idle = 2*sysctl_sched_migration_cost; rq->max_idle_balance_cost = sysctl_sched_migration_cost; + rq->last_idle_state = 1; INIT_LIST_HEAD(&rq->cfs_tasks); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 794c2cb..24384b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6134,7 +6134,12 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t if (!this_sd) return -1; - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + /* +* sched_domain_shared is set only at shared cache level, +* this works only because select_idle_cpu is called with +* sd_llc. +*/ + cpumask_and(cpus, sds_idle_cpus(sd->shared), p->cpus_ptr); if (sched_feat(SIS_PROP) && !smt) { u64 avg_cost, avg_idle, span_avg; @@ -6173,7 +6178,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t if (smt) set_idle_cores(this, false); - if (sched_feat(SIS_PROP) && !smt) { + if (sched_feat(SIS_PROP) && !smt && (cpu < nr_cpumask_bits)) { time = cpu_clock(this) - time; update_avg(&this_sd->avg_scan_cost, time); } @@ -6838,6 +6843,44 @@ balance_fair(stru
Re: [PATCH] usb: host: Mundane spello fix in the file sl811_cs.c
On 3/15/21 9:52 PM, Bhaskar Chowdhury wrote: > > s/seting/setting/ > > Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap > --- > drivers/usb/host/sl811_cs.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c > index 72136373ffab..16d157013018 100644 > --- a/drivers/usb/host/sl811_cs.c > +++ b/drivers/usb/host/sl811_cs.c > @@ -94,7 +94,7 @@ static int sl811_hc_init(struct device *parent, > resource_size_t base_addr, > return -EBUSY; > platform_dev.dev.parent = parent; > > - /* finish seting up the platform device */ > + /* finish setting up the platform device */ > resources[0].start = irq; > > resources[1].start = base_addr; > -- -- ~Randy
Re: [PATCH] samples: bpf: Fix a spelling typo in do_hbm_test.sh
Hello: This patch was applied to bpf/bpf-next.git (refs/heads/master): On Mon, 15 Mar 2021 21:44:54 +0900 you wrote: > This patch fixes a spelling typo in do_hbm_test.sh > > Signed-off-by: Masanari Iida > --- > samples/bpf/do_hbm_test.sh | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) Here is the summary with links: - samples: bpf: Fix a spelling typo in do_hbm_test.sh https://git.kernel.org/bpf/bpf-next/c/d94436a5d1a0 You are awesome, thank you! -- Deet-doot-dot, I am a bot. https://korg.docs.kernel.org/patchwork/pwbot.html
Re: [PATCH 5.10 000/290] 5.10.24-rc1 review
On Mon, 15 Mar 2021 at 19:27, wrote: > > From: Greg Kroah-Hartman > > This is the start of the stable review cycle for the 5.10.24 release. > There are 290 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Wed, 17 Mar 2021 13:55:02 +. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.10.24-rc1.gz > or in the git tree and branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > linux-5.10.y > and the diffstat can be found below. > > thanks, > > greg k-h Results from Linaro’s test farm. No regressions on arm64, arm, x86_64, and i386. Tested-by: Linux Kernel Functional Testing Summary kernel: 5.10.24-rc1 git repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git git branch: linux-5.10.y git commit: c6b3724e56923191dc567ceb626ba15daa49313c git describe: v5.10.23-291-gc6b3724e5692 Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.10.y/build/v5.10.23-291-gc6b3724e5692 No regressions (compared to build v5.10.23) No fixes (compared to build v5.10.23) Ran 58761 total tests in the following environments and test suites. Environments -- - arc - arm - arm64 - dragonboard-410c - hi6220-hikey - i386 - juno-r2 - juno-r2-compat - juno-r2-kasan - mips - nxp-ls2088 - nxp-ls2088-64k_page_size - parisc - powerpc - qemu-arm-clang - qemu-arm64-clang - qemu-arm64-kasan - qemu-i386-clang - qemu-x86_64-clang - qemu-x86_64-kasan - qemu-x86_64-kcsan - qemu_arm - qemu_arm64 - qemu_arm64-compat - qemu_i386 - qemu_x86_64 - qemu_x86_64-compat - riscv - s390 - sh - sparc - x15 - x86 - x86-kasan - x86_64 Test Suites --- * build * linux-log-parser * install-android-platform-tools-r2600 * kselftest-lkdtm * ltp-containers-tests * ltp-cve-tests * ltp-dio-tests * ltp-fcntl-locktests-tests * ltp-filecaps-tests * ltp-fs-tests * ltp-fs_bind-tests * ltp-fs_perms_simple-tests * ltp-fsx-tests * ltp-hugetlb-tests * ltp-io-tests * ltp-mm-tests * ltp-nptl-tests * ltp-pty-tests * ltp-securebits-tests * ltp-syscalls-tests * perf * fwts * kselftest- * kselftest-bpf * kselftest-intel_pstate * kselftest-kvm * kselftest-livepatch * kselftest-ptrace * kselftest-rseq * kselftest-rtc * kselftest-seccomp * kselftest-sigaltstack * kselftest-size * kselftest-splice * kselftest-static_keys * kselftest-sync * kselftest-sysctl * kselftest-timens * kselftest-timers * kselftest-tmpfs * kselftest-tpm2 * kselftest-user * kselftest-zram * libhugetlbfs * ltp-cap_bounds-tests * ltp-commands-tests * ltp-cpuhotplug-tests * ltp-crypto-tests * ltp-ipc-tests * ltp-math-tests * ltp-sched-tests * ltp-tracing-tests * network-basic-tests * v4l2-compliance * kselftest-android * kselftest-capabilities * kselftest-cgroup * kselftest-clone3 * kselftest-core * kselftest-cpu-hotplug * kselftest-efivarfs * kselftest-filesystems * kselftest-firmware * kselftest-fpu * kselftest-futex * kselftest-gpio * kselftest-ipc * kselftest-ir * kselftest-kcmp * kselftest-kexec * kselftest-lib * kselftest-membarrier * kselftest-memfd * kselftest-memory-hotplug * kselftest-mincore * kselftest-mount * kselftest-mqueue * kselftest-net * kselftest-netfilter * kselftest-nsfs * kselftest-openat2 * kselftest-pid_namespace * kselftest-pidfd * kselftest-proc * kselftest-pstore * kselftest-tc-testing * kselftest-vm * kselftest-x86 * ltp-controllers-tests * ltp-open-posix-tests * kselftest-cpufreq * kvm-unit-tests * kunit * rcutorture * ssuite * kselftest-vsyscall-mode-native- * kselftest-vsyscall-mode-none- -- Linaro LKFT https://lkft.linaro.org
Re: [PATCH] x86: events: intel: A letter change in a word to make it sound right,in the file bts.c
On 3/15/21 9:19 PM, Bhaskar Chowdhury wrote: > > s/kernal/kernel/ > > Signed-off-by: Bhaskar Chowdhury > --- > arch/x86/events/intel/bts.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c > index 731dd8d0dbb1..6320d2cfd9d3 100644 > --- a/arch/x86/events/intel/bts.c > +++ b/arch/x86/events/intel/bts.c > @@ -594,7 +594,7 @@ static __init int bts_init(void) >* we cannot use the user mapping since it will not be available >* if we're not running the owning process. >* > - * With PTI we can't use the kernal map either, because its not > + * With PTI we can't use the kernel map either, because its not it's >* there when we run userspace. >* >* For now, disable this driver when using PTI. > -- -- ~Randy
Re: [PATCH] samples: bpf: Fix a spelling typo in do_hbm_test.sh
On Mon, Mar 15, 2021 at 5:45 AM Masanari Iida wrote: > > This patch fixes a spelling typo in do_hbm_test.sh > > Signed-off-by: Masanari Iida > --- Thanks, applied to bpf-next. For the future patches, please use [PATCH bpf-next] subject prefix if you are sending patches against bpf-next tree (of [PATCH bpf], if it's against bpf tree). > samples/bpf/do_hbm_test.sh | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh > index 21790ea5c460..38e4599350db 100755 > --- a/samples/bpf/do_hbm_test.sh > +++ b/samples/bpf/do_hbm_test.sh > @@ -10,7 +10,7 @@ > Usage() { >echo "Script for testing HBM (Host Bandwidth Manager) framework." >echo "It creates a cgroup to use for testing and load a BPF program to > limit" > - echo "egress or ingress bandwidht. It then uses iperf3 or netperf to > create" > + echo "egress or ingress bandwidth. It then uses iperf3 or netperf to > create" >echo "loads. The output is the goodput in Mbps (unless -D was used)." >echo "" >echo "USAGE: $name [out] [-b=|--bpf=] [-c=|--cc=]" > -- > 2.25.0 >
Re: [PATCH V2] mips: asm: octeon: A typo fix in the file cvmx-address.h
On 3/15/21 9:33 PM, Bhaskar Chowdhury wrote: > > s/techically/technically/ > > Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap > --- > Changes from V1: > Meh, missed the changelog text, so added :) > > arch/mips/include/asm/octeon/cvmx-address.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/mips/include/asm/octeon/cvmx-address.h > b/arch/mips/include/asm/octeon/cvmx-address.h > index ef8c4a61..5df5c90f6a5d 100644 > --- a/arch/mips/include/asm/octeon/cvmx-address.h > +++ b/arch/mips/include/asm/octeon/cvmx-address.h > @@ -152,7 +152,7 @@ typedef union { > > /* physical mem address */ > struct { > - /* techically, <47:40> are dont-cares */ > + /* technically, <47:40> are dont-cares */ > uint64_t zeroes:24; > /* the hardware ignores <39:36> in Octeon I */ > uint64_t unaddr:4; > -- -- ~Randy
Re: [PATCH 5.11 000/306] 5.11.7-rc1 review
On Mon, 15 Mar 2021 at 19:27, wrote: > > From: Greg Kroah-Hartman > > This is the start of the stable review cycle for the 5.11.7 release. > There are 306 patches in this series, all will be posted as a response > to this one. If anyone has any issues with these being applied, please > let me know. > > Responses should be made by Wed, 17 Mar 2021 13:54:26 +. > Anything received after that time might be too late. > > The whole patch series can be found in one patch at: > > https://www.kernel.org/pub/linux/kernel/v5.x/stable-review/patch-5.11.7-rc1.gz > or in the git tree and branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > linux-5.11.y > and the diffstat can be found below. > > thanks, > > greg k-h Results from Linaro’s test farm. No regressions on arm64, arm, x86_64, and i386. Tested-by: Linux Kernel Functional Testing Summary kernel: 5.11.7-rc1 git repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git git branch: linux-5.11.y git commit: f109baa2424b2523fbb975de996fedc7a53ef531 git describe: v5.11.6-307-gf109baa2424b Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.11.y/build/v5.11.6-307-gf109baa2424b No regressions (compared to build v5.11.6) No fixes (compared to build v5.11.6) Ran 55999 total tests in the following environments and test suites. Environments -- - arc - arm - arm64 - dragonboard-410c - hi6220-hikey - i386 - juno-64k_page_size - juno-r2 - juno-r2-compat - juno-r2-kasan - mips - nxp-ls2088 - nxp-ls2088-64k_page_size - parisc - powerpc - qemu-arm-clang - qemu-arm64-clang - qemu-arm64-kasan - qemu-i386-clang - qemu-x86_64-clang - qemu-x86_64-kasan - qemu-x86_64-kcsan - qemu_arm - qemu_arm64 - qemu_arm64-compat - qemu_i386 - qemu_x86_64 - qemu_x86_64-compat - riscv - s390 - sh - sparc - x15 - x86 - x86-kasan - x86_64 Test Suites --- * build * linux-log-parser * install-android-platform-tools-r2600 * kselftest- * kselftest-efivarfs * kselftest-filesystems * kselftest-firmware * kselftest-fpu * kselftest-futex * kselftest-gpio * kselftest-ipc * kselftest-ir * kselftest-kcmp * kselftest-lib * kselftest-lkdtm * kselftest-membarrier * kselftest-memfd * kselftest-memory-hotplug * kselftest-mincore * kselftest-mount * kselftest-mqueue * kselftest-net * kselftest-netfilter * kselftest-nsfs * kselftest-openat2 * kselftest-pid_namespace * kselftest-pidfd * kselftest-proc * kselftest-pstore * kselftest-rseq * kselftest-rtc * kselftest-seccomp * kselftest-sigaltstack * kselftest-size * kselftest-splice * kselftest-static_keys * kselftest-sync * kselftest-sysctl * kselftest-tc-testing * ltp-cap_bounds-tests * ltp-containers-tests * ltp-cpuhotplug-tests * ltp-crypto-tests * ltp-dio-tests * ltp-fs-tests * ltp-io-tests * ltp-ipc-tests * ltp-syscalls-tests * perf * v4l2-compliance * fwts * kselftest-android * kselftest-bpf * kselftest-capabilities * kselftest-cgroup * kselftest-clone3 * kselftest-core * kselftest-cpu-hotplug * kselftest-cpufreq * kselftest-timens * kselftest-timers * kselftest-tmpfs * kselftest-tpm2 * kselftest-user * kselftest-zram * ltp-commands-tests * ltp-cve-tests * ltp-fcntl-locktests-tests * ltp-filecaps-tests * ltp-fs_bind-tests * ltp-fs_perms_simple-tests * ltp-fsx-tests * ltp-hugetlb-tests * ltp-math-tests * ltp-nptl-tests * ltp-pty-tests * ltp-sched-tests * ltp-securebits-tests * ltp-tracing-tests * network-basic-tests * kselftest-intel_pstate * kselftest-kexec * kselftest-kvm * kselftest-livepatch * kselftest-ptrace * kselftest-vm * kselftest-x86 * libhugetlbfs * ltp-controllers-tests * ltp-mm-tests * ltp-open-posix-tests * kvm-unit-tests * rcutorture * kunit * kselftest-vsyscall-mode-native- * kselftest-vsyscall-mode-none- * ssuite -- Linaro LKFT https://lkft.linaro.org
Re: [PATCH] smp: kernel/panic.c - silence warnings
On 3/15/21 9:08 PM, He Ying wrote: > We found these warnings in kernel/panic.c by using sparse tool: > warning: symbol 'panic_smp_self_stop' was not declared. > warning: symbol 'nmi_panic_self_stop' was not declared. > warning: symbol 'crash_smp_send_stop' was not declared. > > To avoid them, add declarations for these three functions in > include/linux/smp.h. > > Reported-by: Hulk Robot > Signed-off-by: He Ying > --- > include/linux/smp.h | 8 > 1 file changed, 8 insertions(+) > > diff --git a/include/linux/smp.h b/include/linux/smp.h > index 70c6f6284dcf..861a253cc179 100644 > --- a/include/linux/smp.h > +++ b/include/linux/smp.h > @@ -50,6 +50,14 @@ extern unsigned int total_cpus; > int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, >int wait); > > +/* > + * Cpus stopping functions in panic. All have default weak definations. definitions. > + * Architecure dependent code may override them. Architecture-dependent > + */ > +void panic_smp_self_stop(void); > +void nmi_panic_self_stop(struct pt_regs *regs); > +void crash_smp_send_stop(void); > + > /* > * Call a function on all processors > */ > -- ~Randy
Re: [PATCH v12 0/2] UIO support for dfl devices
Hi Greg: I listed below some answers from Moritz and Yilun from previous mails for your question. Do you have more comments? Thanks in advance, Yilun On Mon, Mar 08, 2021 at 09:59:34AM +0800, Xu Yilun wrote: > This patchset supports some dfl device drivers written in userspace. > > There are some Q&A about why UIO driver is needed in v11: > > >From Greg: > Why are you saying that an ethernet driver should be using the UIO > interface? > > And why can't you use the existing UIO drivers that bind to memory > regions specified by firmware? Without an interrupt being used, why is > UIO needed at all? > > >From Moritz: > Essentially I see two options: > - Have a DFL bus driver instantiate a platform driver (uio_pdrv_genirq) > which I *think* you described above? > - What this patch implements -- a UIO driver on the DFL bus > > These FPGA devices can on the fly change their contents and -- even if > just for test -- being able to expose a bunch of registers via UIO can > be extremely useful. > > Whether a device should expose registers or not should be up to the > implemeneter of the FPGA design I think (policy). This patch (or the > previous version) provides a mechanism to do so via DFL. > > This is similar in nature to uio_pdrv_genirq on a DT based platform, to > expose the registers you instantiate the DT node. > > Re-implementing a new driver for each of these instances doesn't seem > desirable and tying DFL as enumeration mechanism to UIO seems like a > good compromise for enabling this kind of functionality. > > Note this is *not* an attempt to bypass the network stack or other > existing subsystems. > > See the original message in: > > https://lore.kernel.org/linux-fpga/ydvq8ao8v3nhl...@epycbox.lan/T/#m66ba2c96848e3dea38d1a4f16dfea3cb291f7975 > > > >From Yilun: > The ETH GROUP IP is not designed as the full functional ethernet > controller. It is specially developed for the Intel N3000 NIC. Since it > is an FPGA based card, it is designed for the users to runtime reload > part of the MAC layer logic developed by themselves, while the ETH GROUP > is another part of the MAC which is not expected to be reloaded by > customers, but it provides some configurations for software to work with > the user logic. > > So I category the feature as the devices that "designed for specific > purposes and does not fit into one of the standard kernel subsystems". > Some related description could be found in Patch #2, to illustrate why > using UIO for some DFL devices. > > There are now UIO drivers for PCI or platform devices, but in this case > we are going to export a DFL(Device Feature List) bus device to > userspace, a DFL driver for UIO is needed to bind to it. > > See the original message in: > > https://lore.kernel.org/linux-fpga/ydvq8ao8v3nhl...@epycbox.lan/T/#m91b303fd61485644353fad1e1e9c11d528844684 > > > Xu Yilun (2): > uio: uio_dfl: add userspace i/o driver for DFL bus > Documentation: fpga: dfl: Add description for DFL UIO support > > Documentation/fpga/dfl.rst | 26 ++ > MAINTAINERS| 1 + > drivers/uio/Kconfig| 17 > drivers/uio/Makefile | 1 + > drivers/uio/uio_dfl.c | 66 > ++ > 5 files changed, 111 insertions(+) > create mode 100644 drivers/uio/uio_dfl.c > > -- > 2.7.4
[RFC PATCH v3 18/18] dyndbg: shuffle ddebug_table fields
In preparation to unionize structs _ddebug & ddebug_table, shuffle fields in latter so they match the layout of the former. This MAY simplify initialization of the header field, in particular by preserving *sites. It also sets up a later conversion to a flex-array ddebugs[]. This step is mostly to isolate/prove no breakage before HEAD++ Signed-off-by: Jim Cromie --- include/linux/dynamic_debug.h | 29 + lib/dynamic_debug.c | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 229cfd81ffb3..22f11218047f 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -66,6 +66,35 @@ struct _ddebug { #endif } __aligned(8); +/* this pair of header structs correspond quite deeply to struct + * _ddebug(|_site)s above; they are also created into __dyndbg* + * sections (by DEFINE_DYNAMIC_DEBUG_TABLE), and should be unionized + * with them to reinforce this. + * + * struct _ddebug_header is the important one, it has enough space to + * take struct ddebug_table's job, ie: link together into a list, and + * keep track of the modname & _ddebug(|_sites) vectors. + * + * Its other job is handled by its placement in the front of a + * module's _ddebug[N] entries. Each _ddebug knows its N, so the + * header's address is computable, and its site pointer is available + * to get _ddebug_sites[N]. Then we can drop _ddebug.sites, regaining + * parity with original _ddebug footprint. + * + * Eventually, N will index a fetch from a compressed block, or for + * enabled callsites, a hash. A global hash is probably adequate, if + * ~5k elements doesnt degrade access time. + */ +struct _ddebug_site_header { + /* we have 24 bytes total here, all currently unused */ +} __aligned(8); + +struct _ddebug_header { + struct _ddebug_site* sites; + struct list_head link; + const char* mod_name; + unsigned num_ddebugs; +} __aligned(8); #if defined(CONFIG_DYNAMIC_DEBUG_CORE) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c1a7345277eb..5d1ce7f21c30 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -45,11 +45,11 @@ extern struct _ddebug_site __start___dyndbg_sites[]; extern struct _ddebug_site __stop___dyndbg_sites[]; struct ddebug_table { + struct _ddebug_site *sites; struct list_head link; const char *mod_name; unsigned int num_ddebugs; struct _ddebug *ddebugs; - struct _ddebug_site *sites; }; struct ddebug_query { -- 2.29.2
[RFC PATCH v3 17/18] dyndbg: RFC - DECLARE/DEFINE_DYNAMIC_DEBUG_TABLE
V4-proto - now currently diet-3i Simplify: .gnu.linkonce._mod_.dyndbg -> .gnu.linkonce.dyndbg ie drop _mod_ This puts a single header record at front of the vectors, solving 2 problems (discussed below) simultaneously: - header in front, allowing flex-array rep of layout. - single header, not per-module. adequate for needs, no wasted space. this now appears to work - get: dp range-check good on builtin, !builtin. todo: - _sym_##_site symbols in init/main.i still busted - _index init in __ddebug_add_module - cleanup commentary below. DEFINE_DYNAMIC_DEBUG_TABLE is based on DECLARE_DYNAMIC_DEBUG_METADATA. Like its model, it creates/allocates a pair of structs: _ddebug & _ddebug_site, and inits them distinctively. Its purpose is to create an in-situ module-header-pair for each module's sub-vectors of _ddebug[], _ddebug_sites[]. Once the header-pair is reliably linked into the vectors, code can get from _ddebug[N] to the header-pair, then to _ddebug_sites[N] at runtime by saving N into _ddebug._index during init. With this, we can drop the site pointer in struct _ddebug. Eventually, this module-header-pair can be adapted to be an in-situ replacement for the separately allocated struct ddebug_tables, and be linked into the ddebug_tables list. RFC, NOTES: DYNAMIC_DEBUG is a 'transparent' facility, in that uses of pr_debug get extra features without additional api. Because of this, DEFINE_DYNAMIC_DEBUG_TABLE is 'transparently' invoked by dynamic_debug.h on behalf of all its (indirect) users, including printk.h. IOW this has wide effects; it results in multiple redundant declarations of header records, even single object files may get multiple copies. Using .gnu.linkonce._mod_.dyndbg(|_site) section names and "__used __weak " seems to resolve the redundancies. I havent tried with clang. In vmlinux-lds.h, the 2 KEEPs are modified to append those 2 new header sections to their respective existing __dyndbg* sections, in an interleaved manner. This places the header records immediately after the modules' block of _ddebug*s, in a knowable offset from &__dyndbg[0]. scripts/Makefile.lib gets a new -DKBUILD_MODSYM defn, with a value like KBUILD_MODNAME, except that its not __stringified. It is used to create a pair of module-ish named variables: _sym_##_dyndbg_base. For some non-obvious reason, the substitution doesnt work, resulting in per-module symbol names like KBUILD_MODSYM_dyndbg_base. This subtly alters the header initialization and is_dyndbg_header_pair(), which is used in __init to find the headers adjacent to each modules' block of _ddebug records. This isnt fatal to the plan; we just need the storage reserved where its accessible by known offset from the _ddebug[N] record of an enabled pr-debug. But it would be nice to have the symbol names consistent with the intent. I looked for a MODULE_SINGLETON(name_suffix) to use, similarly to how UNIQUE_ID is used to construct names, but found nothing. The .gnu.linkonce._mod_. works to eliminate all the extra headers in each module, but a problem remains; it adds unneeded headers for modules with zero pr-debugs. So Im seeing ~1500 extra headers. I tried several flavors of conditional linking, now think I want/need a linker-script language extension: KEEP ( *(__dyndbg) AND *(.gnu.linkonce.dyndbg) ) # my need KEEP ( *(foo_tbl) OR *(.gnu.linkonce.foo_alt) ) # for completeness I've managed to alter ld's grammar, but its only compile tested, and is missing the conditional linking pieces. Since this patchset's value proposition is a memory shrink, ~1500 extra headers is fatal, and this patchset must be a slow-cook. V4 todo: Time to simplify, drop _mod_, and change _ddebug.module_index to ._index, indicating that its no longer keyed to module, but to the whole compilation unit, which for the kernel includes all the builtin modules. loadable modules should get their own. tbt. this could obsolete all the above problems. Signed-off-by: Jim Cromie --- drivers/gpu/drm/i915/i915_drv.c | 2 + include/asm-generic/vmlinux.lds.h | 27 +--- include/linux/dynamic_debug.h | 100 +- lib/dynamic_debug.c | 28 ++--- scripts/Makefile.lib | 2 + 5 files changed, 139 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8e9cb44e66e5..51163e0d40cf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -91,6 +91,8 @@ static const struct drm_driver driver; +//DEFINE_DYNAMIC_DEBUG_TABLE(); + static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) { int domain = pci_domain_nr(dev_priv->drm.pdev->bus); diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 4f2af9de2f03..482d94078785 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -335,6 +335,22 @@ KEEP(*(.dtb.init.rodata))
[RFC PATCH v3 16/18] dyndbg: prevent build bugs via -DNO_DYNAMIC_DEBUG_TABLE
The next patch adds DEFINE_DYNAMIC_DEBUG_TABLE(), which breaks some subtrees with special compile constraints (efi etc). Avoid this by adding a define to suppress the *remote declaration* done by DEFINE_DYNAMIC_DEBUG_TABLE(), automatically, on behalf of all possible users of pr_debug. Signed-off-by: Jim Cromie --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/entry/vdso/Makefile | 3 +++ arch/x86/purgatory/Makefile | 1 + drivers/firmware/efi/libstub/Makefile | 3 ++- 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e0bc3988c3fa..ada4eb960d95 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -31,6 +31,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE cflags-$(CONFIG_X86_32) := -march=i386 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 05c4abc2fdfd..619878f2c427 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -29,6 +29,9 @@ vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o vobjs-$(CONFIG_X86_SGX)+= vsgx.o +# avoid a x86_64_RELATIVE error +KBUILD_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE + # files to link into kernel obj-y += vma.o extable.o KASAN_SANITIZE_vma.o := y diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 95ea17a9d20c..95ba7b18410f 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -35,6 +35,7 @@ PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss -g0 PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING PURGATORY_CFLAGS += -fno-stack-protector +PURGATORY_CFLAGS += -DNO_DYNAMIC_DEBUG_TABLE # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c23466e05e60..def8febefbd3 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -13,7 +13,8 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ \ -Wno-pointer-sign \ $(call cc-disable-warning, address-of-packed-member) \ $(call cc-disable-warning, gnu) \ - -fno-asynchronous-unwind-tables + -fno-asynchronous-unwind-tables \ + -DNO_DYNAMIC_DEBUG_TABLE # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly # disable the stackleak plugin -- 2.29.2
[RFC PATCH v3 13/18] dyndbg+module: expose ddebug_sites to modules
In order to drop the pointer connecting _ddebug's to _ddebug_sites, we need to elevate the latter; we need to track it in (internal) ddebug_tables, and set it in ddebug_add_module. That last part exposes it by interface to module.c, so we add a field to load_info, and adjust load_module to initialize it from the elf section. Its possible that this closes a "hole" created when __dyndbg_sites section was added, in that the section wasn't "managed" directly, and could conceivably get lost later. I never saw any misbehavior loading i915.ko into a vm, but still.. TBD/RFC: these 2 vectors should be in a single struct. if this struct can have ddebugs[], ie a flex-array, then container_of can get us to the sites*, yielding &ddebug_sites[N], and allowing to drop _ddebug.site The trouble with this is that ddebugs* now points to someone elses memory, and we cant just steal it and stomp on the memory just in front of it (for the sites ptr). rename n to numdbgs Signed-off-by: Jim Cromie --- include/linux/dynamic_debug.h | 4 ++-- kernel/module-internal.h | 1 + kernel/module.c | 9 ++--- lib/dynamic_debug.c | 18 +++--- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index d811273c8484..7d33475d226a 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -70,8 +70,8 @@ struct _ddebug { /* exported for module authors to exercise >control */ int dynamic_debug_exec_queries(const char *query, const char *modname); -int ddebug_add_module(struct _ddebug *tab, unsigned int n, - const char *modname); +int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites, + unsigned int numdbgs, const char *modname); extern int ddebug_remove_module(const char *mod_name); extern __printf(2, 3) void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...); diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 33783abc377b..fb61eb2f8032 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -18,6 +18,7 @@ struct load_info { char *secstrings, *strtab; unsigned long symoffs, stroffs, init_typeoffs, core_typeoffs; struct _ddebug *debug; + struct _ddebug_site *sites; unsigned int num_debug; bool sig_ok; #ifdef CONFIG_KALLSYMS diff --git a/kernel/module.c b/kernel/module.c index 30479355ab85..792903230acd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2780,11 +2780,12 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) } #endif /* CONFIG_KALLSYMS */ -static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsigned int num) +static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, + struct _ddebug_site *sites, unsigned int num) { if (!debug) return; - ddebug_add_module(debug, num, mod->name); + ddebug_add_module(debug, sites, num, mod->name); } static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug) @@ -,6 +3334,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) info->debug = section_objs(info, "__dyndbg", sizeof(*info->debug), &info->num_debug); + info->sites = section_objs(info, "__dyndbg_sites", + sizeof(*info->sites), &info->num_debug); return 0; } @@ -4004,7 +4007,7 @@ static int load_module(struct load_info *info, const char __user *uargs, goto free_arch_cleanup; } - dynamic_debug_setup(mod, info->debug, info->num_debug); + dynamic_debug_setup(mod, info->debug, info->sites, info->num_debug); /* Ftrace init must be called in the MODULE_STATE_UNFORMED state */ ftrace_module_init(mod); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index fdc2d0e15731..5529b17461ae 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -49,6 +49,7 @@ struct ddebug_table { const char *mod_name; unsigned int num_ddebugs; struct _ddebug *ddebugs; + struct _ddebug_site *sites; }; struct ddebug_query { @@ -1014,8 +1015,8 @@ static const struct proc_ops proc_fops = { * Allocate a new ddebug_table for the given module * and add it to the global list. */ -int ddebug_add_module(struct _ddebug *tab, unsigned int n, -const char *name) +int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites, + unsigned int numdbgs, const char *modname) { struct ddebug_table *dt; @@ -1030,15 +1031,16 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, * member of struct module, which lives at least as long as * this struct ddebug_table. */ - dt->mod_name
[RFC PATCH v3 09/18] dyndbg: optimize ddebug_emit_prefix
Add early return if no callsite info is specified in site-flags. This avoids fetching site info that isn't going to be printed. Signed-off-by: Jim Cromie --- include/linux/dynamic_debug.h | 9 + lib/dynamic_debug.c | 3 +++ 2 files changed, 12 insertions(+) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index bc8027292c02..aa4a3f5d8d35 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -40,6 +40,15 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID(1<<4) + +#define _DPRINTK_FLAGS_INCL_ANYSITE\ + (_DPRINTK_FLAGS_INCL_MODNAME\ +| _DPRINTK_FLAGS_INCL_FUNCNAME \ +| _DPRINTK_FLAGS_INCL_LINENO) +#define _DPRINTK_FLAGS_INCL_ANY\ + (_DPRINTK_FLAGS_INCL_ANYSITE\ +| _DPRINTK_FLAGS_INCL_TID) + #if defined DEBUG #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT #else diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 1f0cac4a66af..af9cf97f869b 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -632,6 +632,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *dp, char *buf) } pos_after_tid = pos; + if (!(dp->flags & _DPRINTK_FLAGS_INCL_ANYSITE)) + return buf; + if (desc) { if (dp->flags & _DPRINTK_FLAGS_INCL_MODNAME) pos += snprintf(buf + pos, remaining(pos), "%s:", -- 2.29.2
[RFC PATCH v3 14/18] dyndbg: add ddebug_site(_get|_put) abstraction
Replace direct ->site refs with _get(),_put() internal API. Right now, _get() just returns ->site and _put() does nothing. Later we can replace that implementation with one using ->module_index to fetch then forget site data dynamically. Several approaches are possible: A: !site -> fill from backing store 1st try at this is/was using zram. At init, it copied each callsite into a zs-allocation, and all site-> refs afterward went thru _get/_put to zs-map on demand, and zs-unmap the site info. This worked until I tried to keep callsites mapped while they're enabled, when it gave lockdep warns/panics. IIRC theres a zram patchset doing something with locking; I need to retry this approach, even if other options are better, this might be a validating use case. B: block store Another approach is to compress the new linker section, using some algorithm thats good at indexed decompression. I probed this approach, using objcopy, unsuccessfully: objcopy --dump-section __dyndbg=dd \ --dump-section __dyndbg_sites=ddsites $IMG >From vmlinux.o dumps were mostly empty (pre-link/reloc data?) and vmlinux didnt have the section. C: callsite composed from __dyndbg[N] & __dyndbg_site[N] We know _ddebug records are in a vector, either in the builtin __dyndbg linker section, or the same from a modprobed one. The builtin section has all builtin module sub-sections catenated dogether. At init, we iterate over the section, and "parse it" by creating a ddebug_table for each module with prdebugs. ddebug_table.num_debugs remembers the size of each modules' vector of prdebugs. We need a few things: - _ddebug.index field, which knows offset to start of this sub-vector. this new field will be "free" because the struct has padding. it can be initialized during init, then RO. - a back-pointer at the beginning of the sub-vector, to the ddebug_table "owning" (but not containing) this sub-vector of prdebugs. If we had both, we could get from the ddebug element to its vector root, back up to the owning ddebug_table, then down to the _callsite vector, and index to the right element. While slower than a pointer deref, this is a cold path, and it allows elimination of the per-callsite pointer member, thus greater density of the sections, and still can support sparse site info. That back-pointer feels tricky. It needs to be 1st in the sub-vector D: (C1?) add a header record to each sub-vector If we can insert a header record into each modules' __dyndbg* section sub-vectors, we can simplify the cold path above; a single sites* pointer in the header can give us access to __dyndbg_sites[N] Signed-off-by: Jim Cromie --- lib/dynamic_debug.c | 31 +-- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 5529b17461ae..34329e323ed5 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -144,6 +144,14 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg) query->first_lineno, query->last_lineno); } +static struct _ddebug_site *ddebug_site_get(struct _ddebug *dp) +{ + return dp->site; /* passthru abstraction */ +} +static inline void ddebug_site_put(struct _ddebug *dp) +{ +} + static int ddebug_match_site(const struct ddebug_query *query, const struct _ddebug *dp, const struct _ddebug_site *dc) @@ -239,16 +247,18 @@ static int ddebug_change(const struct ddebug_query *query, for (i = 0; i < dt->num_ddebugs; i++) { struct _ddebug *dp = &dt->ddebugs[i]; - struct _ddebug_site *dc = dp->site; + struct _ddebug_site *dc; + + dc = ddebug_site_get(dp); if (!ddebug_match_site(query, dp, dc)) - continue; + goto skipsite; nfound++; newflags = (dp->flags & modifiers->mask) | modifiers->flags; if (newflags == dp->flags) - continue; + goto skipsite; ddebug_alter_site(dp, modifiers); @@ -264,6 +274,9 @@ static int ddebug_change(const struct ddebug_query *query, dt->mod_name, dp->lineno, ddebug_describe_flags(dp->flags, &fbuf), dp->format); + + skipsite: + ddebug_site_put(dp); } } mutex_unlock(&ddebug_lock); @@ -633,11 +646,11 @@ static int remaining(int wrote) return 0; } -static char *__dynamic_emit_prefix(const struct _ddebug *dp, char *buf) +static char *__dynamic_emit_prefix(struct _ddebug *dp, char *buf) { int pos_after_tid; int pos = 0; - co
[RFC PATCH v3 15/18] dyndbg: add _index to struct _ddebug
We currently use dp->site to map: &__dyndbg[N] -> &__dyndbg_sites[N]. We want to drop site, new _ddebug._index provides the N. The mapping is done in ddebug_site_get(): For builtin modules, a _ddebug *ptr is between __start___dyndbg and __stop___dyndbg, and we can use &__start___dyndbg_sites[N] directly. For loadable modules, we still need work, so we print rubbish, and just return site pointer (which is correct). ddebug_add_module() handles _index initialization: Its new task is to number each module consecutively, so it gets new base arg to pass the next starting index. To actually drop site, We need both the module's __dyndbg* section addys, and we need their relative placement to have a base-to-base offset. PLAN - a table header connecting 2 tables. - ddebug_table points to both __dyndbgs & __dyndbg_sites. but *ddebugs & *sites are independent. no path from ddebugs[n] -> ddebug_sites[n] If we have a header record in-situ, which keeps the site pointer we seek to eliminate from _ddebug, and its in element[0] of both vectors, we can go: ddebugs[n] -> ddebugs[0] -> containerof -> site[n] union ddebug_table_header { struct ddebug_table *owner; struct _ddebug item; } and struct ddebug_table_vector { struct ddebug_table *owner; struct _ddebug vector[]; } Signed-off-by: Jim Cromie --- include/linux/dynamic_debug.h | 2 ++ lib/dynamic_debug.c | 43 +-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 7d33475d226a..18689db0e2c0 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -29,6 +29,7 @@ struct _ddebug { /* format is always needed, lineno shares word with flags */ const char *format; const unsigned lineno:18; + unsigned _index:14; /* * The flags field controls the behaviour at the callsite. * The bits here are changed dynamically when the user @@ -56,6 +57,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_DEFAULT 0 #endif unsigned int flags:8; + #ifdef CONFIG_JUMP_LABEL union { struct static_key_true dd_key_true; diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 34329e323ed5..3b53035d63d6 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -123,6 +123,8 @@ do { \ #define vpr_info(fmt, ...) vnpr_info(1, fmt, ##__VA_ARGS__) #define v2pr_info(fmt, ...)vnpr_info(2, fmt, ##__VA_ARGS__) #define v3pr_info(fmt, ...)vnpr_info(3, fmt, ##__VA_ARGS__) +#define v4pr_info(fmt, ...)vnpr_info(4, fmt, ##__VA_ARGS__) +#define v5pr_info(fmt, ...)vnpr_info(5, fmt, ##__VA_ARGS__) static void vpr_info_dq(const struct ddebug_query *query, const char *msg) { @@ -146,7 +148,17 @@ static void vpr_info_dq(const struct ddebug_query *query, const char *msg) static struct _ddebug_site *ddebug_site_get(struct _ddebug *dp) { - return dp->site; /* passthru abstraction */ + v4pr_info("get %d: %s.%s.%d\n", dp->_index, dp->site->modname, + dp->site->function, dp->lineno); + + if (dp >= __start___dyndbg && dp < __stop___dyndbg) { + v4pr_info(" is builtin: %d %ld\n", dp->_index, dp - __start___dyndbg); + return &__start___dyndbg_sites[ dp - __start___dyndbg ]; + } else { + v3pr_info(" is loaded: %d %ld\n", dp->_index, dp - __start___dyndbg); + return dp->site; + } + return dp->site; } static inline void ddebug_site_put(struct _ddebug *dp) { @@ -1034,14 +1046,16 @@ static const struct proc_ops proc_fops = { * Allocate a new ddebug_table for the given module * and add it to the global list. */ -int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites, - unsigned int numdbgs, const char *modname) +static int __ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites, + unsigned numdbgs, unsigned base, + const char *modname) { struct ddebug_table *dt; + int i; dt = kzalloc(sizeof(*dt), GFP_KERNEL); if (dt == NULL) { - pr_err("error adding module: %s\n", name); + pr_err("error adding module: %s\n", modname); return -ENOMEM; } /* @@ -1055,6 +1069,13 @@ int ddebug_add_module(struct _ddebug *tab, struct _ddebug_site *sites, dt->ddebugs = tab; dt->sites = sites; + v3pr_info("add-module: %s\n", modname); + for (i = 0; i < numdbgs; i++) { + tab[i]._index = base++; + v3pr_info(" %d %d %s.%s.%d\n", i, tab[i]._index, modname, + tab[i].site->function, tab[i].lineno); + } + mutex_lock(&ddebug_lock); list_add(&dt->link, &ddebug_tables);
[RFC PATCH v3 08/18] dyndbg: accept null site in ddebug_proc_show
Accept a ddebug record with a null site pointer, and write abbreviated output for that record that doesn't include site info (but does include line-number, since that can be used in >control queries). Also add a 2nd header line with a template for the new output. Signed-off-by: Jim Cromie --- lib/dynamic_debug.c | 25 + 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 0c535f3c2ba9..1f0cac4a66af 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -918,18 +918,27 @@ static int ddebug_proc_show(struct seq_file *m, void *p) if (p == SEQ_START_TOKEN) { seq_puts(m, -"# filename:lineno [module]function flags format\n"); +"#: filename:lineno [module]function flags format\n" +"#| [module]:lineno flags format\n" + ); return 0; } dc = dp->site; - - seq_printf(m, "%s:%u [%s]%s =%s \"", - trim_prefix(dc->filename), dp->lineno, - iter->table->mod_name, dc->function, - ddebug_describe_flags(dp->flags, &flags)); - seq_escape(m, dp->format, "\t\r\n\""); - seq_puts(m, "\"\n"); + if (dc) { + seq_printf(m, "%s:%u [%s]%s =%s \"", + trim_prefix(dc->filename), dp->lineno, + iter->table->mod_name, dc->function, + ddebug_describe_flags(dp->flags, &flags)); + seq_escape(m, dp->format, "\t\r\n\""); + seq_puts(m, "\"\n"); + } else { + seq_printf(m, "[%s]:%u =%s \"", + iter->table->mod_name, dp->lineno, + ddebug_describe_flags(dp->flags, &flags)); + seq_escape(m, dp->format, "\t\r\n\""); + seq_puts(m, "\"\n"); + } return 0; } -- 2.29.2
[RFC PATCH v3 12/18] dyndbg: allow deleting site info via control interface
Allow users & subsystems to selectively delete callsite info for pr-debug callsites. Hopefully, this can lead to actual recovery of memory. DRM is a potential user which would drop the sites: - has distinct categories for logging, and can map them over to a format prefix, like: "drm:core:", "drm:kms:", etc. - are happy with group control of all the callsites in a class/cateory. individual control is still possible using queries including line numbers - don't need dynamic "module:function:line:" prefixes in log messages - don't care about loss of context in /proc/dynamic_debug/control before: init/initramfs.c:485 [initramfs]unpack_to_rootfs =_ "Detected %s compressed data\012" init/main.c:1337 [main]run_init_process =pm "%s\012" init/main.c:1335 [main]run_init_process =pm " with environment:\012" init/main.c:1334 [main]run_init_process =pm "%s\012" init/main.c:1332 [main]run_init_process =pm " with arguments:\012" init/main.c:1121 [main]initcall_blacklisted =pm "initcall %s blacklisted\012" init/main.c:1082 [main]initcall_blacklist =pm "blacklisting initcall %s\012" then: bash-5.0# echo file init/main.c +D > /proc/dynamic_debug/control after: init/initramfs.c:485 [initramfs]unpack_to_rootfs =_ "Detected %s compressed data\012" [main]:1337 =pmD "%s\012" [main]:1335 =pmD " with environment:\012" [main]:1334 =pmD "%s\012" [main]:1332 =pmD " with arguments:\012" [main]:1121 =pmD "initcall %s blacklisted\012" [main]:1082 =pmD "blacklisting initcall %s\012" Notes: If Drm adopted dyndbg, i915 + drm* would add ~1600 prdebugs, amdgpu + drm* would add ~3200 callsites, so the additional memory costs are substantial. In trade, drm and drivers would avoid lots of calls to drm_debug_enabled(). This patch should reduce the costs. Using this interface, drm could drop site info for all categories / prefixes controlled by bits in drm.debug, while preserving site info and individual selectivity for any uncategorized prdebugs, and for all other modules. Lastly, because lineno field was not moved into _ddebug_callsite, it can be used to modify a single[*] callsite even if drm has dropped all the callsite data: echo module $mod format ^$prefix line $line +p >control Dropping site info is a one-way, information losing operation, so minor misuse is possible. Worst case is maybe (depending upon previous settings) some loss of logging context/decorations. echo +D > /proc/dynamic_debug/control [*] amdgpu has some macros invoking clusters of pr_debugs; each use of them creates a cluster of pr-debugs with the same line number. Signed-off-by: Jim Cromie --- include/linux/dynamic_debug.h | 1 + lib/dynamic_debug.c | 9 + 2 files changed, 10 insertions(+) diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index aa4a3f5d8d35..d811273c8484 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -40,6 +40,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_INCL_FUNCNAME (1<<2) #define _DPRINTK_FLAGS_INCL_LINENO (1<<3) #define _DPRINTK_FLAGS_INCL_TID(1<<4) +#define _DPRINTK_FLAGS_DELETE_SITE (1<<7) /* drop site info to save ram */ #define _DPRINTK_FLAGS_INCL_ANYSITE\ (_DPRINTK_FLAGS_INCL_MODNAME\ diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 6fbd9099c2fa..fdc2d0e15731 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -92,6 +92,7 @@ static struct { unsigned flag:8; char opt_char; } opt_array[] = { { _DPRINTK_FLAGS_INCL_LINENO, 'l' }, { _DPRINTK_FLAGS_INCL_TID, 't' }, { _DPRINTK_FLAGS_NONE, '_' }, + { _DPRINTK_FLAGS_DELETE_SITE, 'D' }, }; struct flagsbuf { char buf[ARRAY_SIZE(opt_array)+1]; }; @@ -201,6 +202,14 @@ static void ddebug_alter_site(struct _ddebug *dp, } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT) static_branch_enable(&dp->key.dd_key_true); #endif + /* delete site info for this callsite */ + if (modifiers->flags & _DPRINTK_FLAGS_DELETE_SITE) { + if (dp->site) { + vpr_info("dropping site info %s.%s.%d\n", dp->site->filename, + dp->site->function, dp->lineno); + dp->site = NULL; + } + } } /* -- 2.29.2
[RFC PATCH v3 11/18] dyndbg: refactor ddebug_alter_site out of ddebug_change
Move the JUMP_LABEL/static-key code to a separate function. no functional changes. Signed-off-by: Jim Cromie --- lib/dynamic_debug.c | 22 +++--- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 2d011ac3308d..6fbd9099c2fa 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -191,6 +191,18 @@ static int ddebug_match_site(const struct ddebug_query *query, return true; } +static void ddebug_alter_site(struct _ddebug *dp, + struct flag_settings *modifiers) +{ +#ifdef CONFIG_JUMP_LABEL + if (dp->flags & _DPRINTK_FLAGS_PRINT) { + if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT)) + static_branch_disable(&dp->key.dd_key_true); + } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT) + static_branch_enable(&dp->key.dd_key_true); +#endif +} + /* * Search the tables for _ddebug's which match the given `query' and * apply the `flags' and `mask' to them. Returns number of matching @@ -227,13 +239,9 @@ static int ddebug_change(const struct ddebug_query *query, newflags = (dp->flags & modifiers->mask) | modifiers->flags; if (newflags == dp->flags) continue; -#ifdef CONFIG_JUMP_LABEL - if (dp->flags & _DPRINTK_FLAGS_PRINT) { - if (!(modifiers->flags & _DPRINTK_FLAGS_PRINT)) - static_branch_disable(&dp->key.dd_key_true); - } else if (modifiers->flags & _DPRINTK_FLAGS_PRINT) - static_branch_enable(&dp->key.dd_key_true); -#endif + + ddebug_alter_site(dp, modifiers); + dp->flags = newflags; if (dc) -- 2.29.2