[patch V3 29/44] x86/idt: Move 32bit idt_descr to C code
32bit has the idt_descr sitting in the low level assembly entry code. There is no reason for that. Move it into the C file and use the 64bit version of it. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head_32.S |6 -- arch/x86/kernel/idt.c | 10 +- 2 files changed, 5 insertions(+), 11 deletions(-) --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -622,7 +622,6 @@ ENTRY(initial_stack) .data .globl boot_gdt_descr -.globl idt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -631,11 +630,6 @@ ENTRY(initial_stack) .word __BOOT_DS+7 .long boot_gdt - __PAGE_OFFSET - .word 0 # 32-bit align idt_desc.address -idt_descr: - .word IDT_ENTRIES*8-1 # idt contains 256 entries - .long idt_table - # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address ENTRY(early_gdt_descr) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,15 +10,15 @@ /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; -#ifdef CONFIG_X86_64 -/* No need to be aligned, but done to keep all IDTs defined the same way. */ -gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; - struct desc_ptr idt_descr __ro_after_init = { - .size = IDT_ENTRIES * 16 - 1, + .size = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1, .address= (unsigned long) idt_table, }; +#ifdef CONFIG_X86_64 +/* No need to be aligned, but done to keep all IDTs defined the same way. */ +gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; + const struct desc_ptr debug_idt_descr = { .size = IDT_ENTRIES * 16 - 1, .address= (unsigned long) debug_idt_table,
[patch V3 19/44] x86/ipi: Make platform IPI depend on APIC
The platform IPI vector is only installed when the local APIC is enabled. All users of it depend on the local APIC anyway. Make the related code conditional on CONFIG_X86_LOCAL_APIC. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/entry_arch.h |3 +-- arch/x86/kernel/irq.c | 11 ++- 2 files changed, 7 insertions(+), 7 deletions(-) --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -17,8 +17,6 @@ BUILD_INTERRUPT(irq_move_cleanup_interru BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR) #endif -BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) - #ifdef CONFIG_HAVE_KVM BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR) BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR) @@ -37,6 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_i BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR) #ifdef CONFIG_IRQ_WORK BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -29,9 +29,6 @@ EXPORT_PER_CPU_SYMBOL(irq_regs); atomic_t irq_err_count; -/* Function pointer for generic interrupt vector handling */ -void (*x86_platform_ipi_callback)(void) = NULL; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -87,13 +84,13 @@ int arch_show_interrupts(struct seq_file for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); seq_puts(p, " APIC ICR read retries\n"); -#endif if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); seq_puts(p, " Platform interrupts\n"); } +#endif #ifdef CONFIG_SMP seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) @@ -183,9 +180,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_irq_work_irqs; sum += irq_stats(cpu)->icr_read_retry_count; -#endif if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; +#endif #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; @@ -259,6 +256,9 @@ u64 arch_irq_stat(void) return 1; } +#ifdef CONFIG_X86_LOCAL_APIC +/* Function pointer for generic interrupt vector handling */ +void (*x86_platform_ipi_callback)(void) = NULL; /* * Handler for X86_PLATFORM_IPI_VECTOR. */ @@ -275,6 +275,7 @@ u64 arch_irq_stat(void) exiting_irq(); set_irq_regs(old_regs); } +#endif #ifdef CONFIG_HAVE_KVM static void dummy_handler(void) {}
[patch V3 30/44] x86/idt: Remove unused set_trap_gate()
This inline is not used at all. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 12 1 file changed, 12 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -446,18 +446,6 @@ static inline void set_system_intr_gate( _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); } -static inline void set_system_trap_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS); -} - -static inline void set_trap_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS); -} - static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) { BUG_ON((unsigned)n > 0xFF);
[patch V3 25/44] x86: Replace access to desc_struct:a/b fields
The union inside of desc_struct which allows access to the raw u32 parts of the descriptors. This raw access part is about to go away. Replace the few code parts which access those fields. Signed-off-by: Thomas Gleixner Reviewed-by: Boris Ostrovsky Cc: Juergen Gross --- arch/x86/include/asm/xen/hypercall.h |6 -- arch/x86/kernel/tls.c|2 +- arch/x86/xen/enlighten_pv.c |2 +- 3 files changed, 6 insertions(+), 4 deletions(-) --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -552,6 +552,8 @@ static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { + u32 *p = (u32 *) &desc; + mcl->op = __HYPERVISOR_update_descriptor; if (sizeof(maddr) == sizeof(long)) { mcl->args[0] = maddr; @@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall } else { mcl->args[0] = maddr; mcl->args[1] = maddr >> 32; - mcl->args[2] = desc.a; - mcl->args[3] = desc.b; + mcl->args[2] = *p++; + mcl->args[3] = *p; } trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4); --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -93,7 +93,7 @@ static void set_tls_desc(struct task_str while (n-- > 0) { if (LDT_empty(info) || LDT_zero(info)) { - desc->a = desc->b = 0; + memset(desc, 0, sizeof(*desc)); } else { fill_ldt(desc, info); --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -494,7 +494,7 @@ static void __init xen_load_gdt_boot(con static inline bool desc_equal(const struct desc_struct *d1, const struct desc_struct *d2) { - return d1->a == d2->a && d1->b == d2->b; + return !memcmp(d1, d2, sizeof(*d1)); } static void load_TLS_descriptor(struct thread_struct *t,
[patch V3 33/44] x86/idt: Move early IDT setup out of 32bit asm
The early IDT setup can be done in C code like it's done on 64 bit. Reuse the 64 bit version. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/segment.h |1 + arch/x86/kernel/head32.c |4 arch/x86/kernel/head_32.S | 36 ++-- arch/x86/kernel/idt.c |4 4 files changed, 11 insertions(+), 34 deletions(-) --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -238,6 +238,7 @@ #ifndef __ASSEMBLY__ extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE]; +extern void early_ignore_irq(void); /* * Load a segment. Fall back on loading the zero segment if something goes --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,9 @@ static void __init i386_default_early_se asmlinkage __visible void __init i386_start_kernel(void) { cr4_init_shadow(); + + idt_setup_early_handler(); + sanitize_boot_params(&boot_params); x86_early_init_platform_quirks(); --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -345,7 +345,6 @@ ENTRY(startup_32_smp) movl %eax,%cr0 lgdt early_gdt_descr - lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax# reload all the segment registers movl %eax,%ss # after changing gdt. @@ -378,37 +377,6 @@ ENDPROC(startup_32_smp) */ __INIT setup_once: - /* -* Set up a idt with 256 interrupt gates that push zero if there -* is no error code and then jump to early_idt_handler_common. -* It doesn't actually load the idt - that needs to be done on -* each CPU. Interrupts are enabled elsewhere, when we can be -* relatively sure everything is ok. -*/ - - movl $idt_table,%edi - movl $early_idt_handler_array,%eax - movl $NUM_EXCEPTION_VECTORS,%ecx -1: - movl %eax,(%edi) - movl %eax,4(%edi) - /* interrupt gate, dpl=0, present */ - movl $(0x8E00 + __KERNEL_CS),2(%edi) - addl $EARLY_IDT_HANDLER_SIZE,%eax - addl $8,%edi - loop 1b - - movl $256 - NUM_EXCEPTION_VECTORS,%ecx - movl $ignore_int,%edx - movl $(__KERNEL_CS << 16),%eax - movw %dx,%ax/* selector = 0x0010 = cs */ - movw $0x8E00,%dx/* interrupt gate - dpl=0, present */ -2: - movl %eax,(%edi) - movl %edx,4(%edi) - addl $8,%edi - loop 2b - #ifdef CONFIG_CC_STACKPROTECTOR /* * Configure the stack canary. The linker can't handle this by @@ -498,7 +466,7 @@ ENDPROC(early_idt_handler_common) /* This is the default interrupt "handler" :-) */ ALIGN -ignore_int: +ENTRY(early_ignore_irq) cld #ifdef CONFIG_PRINTK pushl %eax @@ -533,7 +501,7 @@ ENDPROC(early_idt_handler_common) hlt_loop: hlt jmp hlt_loop -ENDPROC(ignore_int) +ENDPROC(early_ignore_irq) __INITDATA .align 4 GLOBAL(early_recursion_flag) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -34,6 +34,10 @@ void __init idt_setup_early_handler(void for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); +#ifdef CONFIG_X86_32 + for ( ; i < NR_VECTORS; i++) + set_intr_gate(i, early_ignore_irq); +#endif load_idt(&idt_descr); }
Re: [PATCH] leds/trigger/activity: add a system activity LED trigger
Hi Jacek, On Sun, Aug 27, 2017 at 06:44:05PM +0200, Jacek Anaszewski wrote: > Hi Willy, > > Thanks for the updated patch. > > One formal note: please send the patches with git send-email instead > of attaching them to the message. Yep, I hesitated and wanted to reply. Will do it the other way next time, sorry for the hassle. > > diff --git a/drivers/leds/trigger/ledtrig-activity.c > > b/drivers/leds/trigger/ledtrig-activity.c > > new file mode 100644 > > index 000..6f00235 > > --- /dev/null > > +++ b/drivers/leds/trigger/ledtrig-activity.c > > @@ -0,0 +1,297 @@ > > +/* > > + * Activity LED trigger > > + * > > + * Copyright (C) 2017 Willy Tarreau > > + * Partially based on Atsushi Nemoto's ledtrig-heartbeat.c. > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + * > > + */ > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > +#include > > Please sort the includes alphabetically. I'm amazed I did this, I suspect I inherited it from the original file because I'm also used to annoy people for the same thing! Shame on me! > > + activity_data->time_left -= 100; > > + if (activity_data->time_left <= 0) { > > + activity_data->time_left = 0; > > + activity_data->state = !activity_data->state; > > + led_set_brightness_nosleep(led_cdev, > > + (activity_data->state ^ activity_data->invert) ? > > + led_cdev->max_brightness : LED_OFF); > > Have you considered making the top brightness adjustable? I'd make it > possible especially that we have a similar solution in the > ledtrig-heartbeat.c already - see the following patch in 4.12: > > commit fb3d769173d26268d7bf068094a599bb28b2ac63 > Author: Jacek Anaszewski > Date: Wed Nov 9 11:43:46 2016 +0100 (...) I never thought about it and it makes a lot of sense actually. I'll check this commit, thanks for the pointer. > > + switch (pm_event) { > > + case PM_SUSPEND_PREPARE: > > + case PM_HIBERNATION_PREPARE: > > + case PM_RESTORE_PREPARE: > > + led_trigger_unregister(&activity_led_trigger); > > + break; > > + case PM_POST_SUSPEND: > > + case PM_POST_HIBERNATION: > > + case PM_POST_RESTORE: > > + rc = led_trigger_register(&activity_led_trigger); > > + if (rc) > > + pr_err("could not re-register activity trigger\n"); > > + break; > > + default: > > + break; > > + } > > + return NOTIFY_DONE; > > +} > > It turned out to cause problems in ledtrig-heartbeat.c and was reverted. > Please don't register pm notifier and remove related facilities from the > patch according to the following revert patch: > > commit 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc > Author: Zhang Bo > Date: Tue Jun 13 10:39:20 2017 +0800 OK fine for me. I thought it was mandatory to properly handle pm eventhough I was not particularly interested in this for this specific purpose. I'll send you an updated patch ASAP. Thanks very much for your review, Willy
[patch V3 31/44] x86/idt: Consolidate IDT invalidation
kexec and reboot have both code to invalidate IDT. Create a common function and use it. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h|3 +++ arch/x86/kernel/idt.c | 11 +++ arch/x86/kernel/machine_kexec_32.c | 14 +- arch/x86/kernel/reboot.c |4 +--- 4 files changed, 16 insertions(+), 16 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -503,4 +503,7 @@ static inline void load_current_idt(void else load_idt((const struct desc_ptr *)&idt_descr); } + +extern void idt_invalidate(void *addr); + #endif /* _ASM_X86_DESC_H */ --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -24,3 +24,14 @@ const struct desc_ptr debug_idt_descr = .address= (unsigned long) debug_idt_table, }; #endif + +/** + * idt_invalidate - Invalidate interrupt descriptor table + * @addr: The virtual address of the 'invalid' IDT + */ +void idt_invalidate(void *addr) +{ + struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 }; + + load_idt(&idt); +} --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -26,18 +26,6 @@ #include #include -static void set_idt(void *newidt, __u16 limit) -{ - struct desc_ptr curidt; - - /* ia32 supports unaliged loads & stores */ - curidt.size= limit; - curidt.address = (unsigned long)newidt; - - load_idt(&curidt); -} - - static void set_gdt(void *newgdt, __u16 limit) { struct desc_ptr curgdt; @@ -245,7 +233,7 @@ void machine_kexec(struct kimage *image) * If you want to load them you must set up your own idt & gdt. */ set_gdt(phys_to_virt(0), 0); - set_idt(phys_to_virt(0), 0); + idt_invalidate(phys_to_virt(0)); /* now call it */ image->start = relocate_kernel_ptr((unsigned long)image->head, --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -38,8 +38,6 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static const struct desc_ptr no_idt = {}; - /* * This is set if we need to go through the 'emergency' path. * When machine_emergency_restart() is called, we may be on @@ -638,7 +636,7 @@ static void native_machine_emergency_res break; case BOOT_TRIPLE: - load_idt(&no_idt); + idt_invalidate(NULL); __asm__ __volatile__("int3"); /* We're probably dead after this, but... */
[patch V3 32/44] x86/idt: Move early IDT handler setup to IDT code
The early IDT handler setup is done in C entry code for 64 bit and in ASM entry code for 32 bit. Move the 64bit variant to the IDT code so it can be shared with 32bit in the next step. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h |9 + arch/x86/kernel/head64.c|6 +- arch/x86/kernel/idt.c | 12 3 files changed, 22 insertions(+), 5 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -504,6 +504,15 @@ static inline void load_current_idt(void load_idt((const struct desc_ptr *)&idt_descr); } +extern void idt_setup_early_handler(void); +extern void idt_setup_early_traps(void); + +#ifdef CONFIG_X86_64 +extern void idt_setup_early_pf(void); +#else +static inline void idt_setup_early_pf(void) { } +#endif + extern void idt_invalidate(void *addr); #endif /* _ASM_X86_DESC_H */ --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -255,8 +255,6 @@ static void __init copy_bootdata(char *r asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) { - int i; - /* * Build-time sanity checks on the kernel image and module * area mappings. (these are purely build-time and produce no code) @@ -282,9 +280,7 @@ asmlinkage __visible void __init x86_64_ kasan_early_init(); - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) - set_intr_gate(i, early_idt_handler_array[i]); - load_idt((const struct desc_ptr *)&idt_descr); + idt_setup_early_handler(); copy_bootdata(__va(real_mode_data)); --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -26,6 +26,18 @@ const struct desc_ptr debug_idt_descr = #endif /** + * idt_setup_early_handler - Initializes the idt table with early handlers + */ +void __init idt_setup_early_handler(void) +{ + int i; + + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) + set_intr_gate(i, early_idt_handler_array[i]); + load_idt(&idt_descr); +} + +/** * idt_invalidate - Invalidate interrupt descriptor table * @addr: The virtual address of the 'invalid' IDT */
[patch V3 35/44] x86/idt: Switch early trap init to IDT tables
Add the initialization table for the early trap setup and replace the early trap init code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/idt.c | 53 arch/x86/kernel/setup.c |4 +-- arch/x86/kernel/traps.c | 27 3 files changed, 55 insertions(+), 29 deletions(-) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -48,6 +48,28 @@ struct idt_data { #define TSKG(_vector, _gdt)\ G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) +/* + * Early traps running on the DEFAULT_STACK because the other interrupt + * stacks work only after cpu_init(). + */ +static const __initdata struct idt_data early_idts[] = { + INTG(X86_TRAP_DB, debug), + SYSG(X86_TRAP_BP, int3), +#ifdef CONFIG_X86_32 + INTG(X86_TRAP_PF, page_fault), +#endif +}; + +#ifdef CONFIG_X86_64 +/* + * Early traps running on the DEFAULT_STACK because the other interrupt + * stacks work only after cpu_init(). + */ +static const __initdata struct idt_data early_pf_idts[] = { + INTG(X86_TRAP_PF, page_fault), +}; +#endif + /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; @@ -93,6 +115,37 @@ idt_setup_from_table(gate_desc *idt, con } /** + * idt_setup_early_traps - Initialize the idt table with early traps + * + * On X8664 these traps do not use interrupt stacks as they can't work + * before cpu_init() is invoked and sets up TSS. The IST variants are + * installed after that. + */ +void __init idt_setup_early_traps(void) +{ + idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts)); + load_idt(&idt_descr); +} + +#ifdef CONFIG_X86_64 +/** + * idt_setup_early_pf - Initialize the idt table with early pagefault handler + * + * On X8664 this does not use interrupt stacks as they can't work before + * cpu_init() is invoked and sets up TSS. The IST variant is installed + * after that. + * + * FIXME: Why is 32bit and 64bit installing the PF handler at different + * places in the early setup code? + */ +void __init idt_setup_early_pf(void) +{ + idt_setup_from_table(idt_table, early_pf_idts, +ARRAY_SIZE(early_pf_idts)); +} +#endif + +/** * idt_setup_early_handler - Initializes the idt table with early handlers */ void __init idt_setup_early_handler(void) --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -891,7 +891,7 @@ void __init setup_arch(char **cmdline_p) */ olpc_ofw_detect(); - early_trap_init(); + idt_setup_early_traps(); early_cpu_init(); early_ioremap_init(); @@ -1162,7 +1162,7 @@ void __init setup_arch(char **cmdline_p) init_mem_mapping(); - early_trap_pf_init(); + idt_setup_early_pf(); /* * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features) --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -923,33 +923,6 @@ dotraplinkage void do_iret_error(struct } #endif -/* Set of traps needed for early debugging. */ -void __init early_trap_init(void) -{ - /* -* Don't use IST to set DEBUG_STACK as it doesn't work until TSS -* is ready in cpu_init() <-- trap_init(). Before trap_init(), -* CPU runs at ring 0 so it is impossible to hit an invalid -* stack. Using the original stack works well enough at this -* early stage. DEBUG_STACK will be equipped after cpu_init() in -* trap_init(). -*/ - set_intr_gate(X86_TRAP_DB, debug); - /* int3 can be called from all */ - set_system_intr_gate(X86_TRAP_BP, &int3); -#ifdef CONFIG_X86_32 - set_intr_gate(X86_TRAP_PF, page_fault); -#endif - load_idt(&idt_descr); -} - -void __init early_trap_pf_init(void) -{ -#ifdef CONFIG_X86_64 - set_intr_gate(X86_TRAP_PF, page_fault); -#endif -} - void __init trap_init(void) { int i;
[patch V3 34/44] x86/idt: Prepare for table based init
The IDT setup code is handled in several places. All of them use variants of set_intr_gate() inlines. This can be done with a table based initialization, which allows to reduce the inline zoo and puts all IDT related code and information into a single place. Add the infrastructure. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/idt.c | 67 ++ 1 file changed, 67 insertions(+) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -5,8 +5,49 @@ */ #include +#include +#include #include +struct idt_data { + unsigned intvector; + unsigned intsegment; + struct idt_bits bits; + const void *addr; +}; + +#define DPL0 0x0 +#define DPL3 0x3 + +#define DEFAULT_STACK 0 + +#define G(_vector, _addr, _ist, _type, _dpl, _segment) \ + { \ + .vector = _vector, \ + .bits.ist = _ist, \ + .bits.type = _type,\ + .bits.dpl = _dpl, \ + .bits.p = 1,\ + .addr = _addr,\ + .segment= _segment, \ + } + +/* Interrupt gate */ +#define INTG(_vector, _addr) \ + G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS) + +/* System interrupt gate */ +#define SYSG(_vector, _addr) \ + G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS) + +/* Interrupt gate with interrupt stack */ +#define ISTG(_vector, _addr, _ist) \ + G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) + +/* Task gate */ +#define TSKG(_vector, _gdt)\ + G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3) + /* Must be page-aligned because the real IDT is used in a fixmap. */ gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss; @@ -25,6 +66,32 @@ const struct desc_ptr debug_idt_descr = }; #endif +static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d) +{ + unsigned long addr = (unsigned long) d->addr; + + gate->offset_low= (u16) addr; + gate->segment = (u16) d->segment; + gate->bits = d->bits; + gate->offset_middle = (u16) (addr >> 16); +#ifdef CONFIG_X86_64 + gate->offset_high = (u32) (addr >> 32); + gate->reserved = 0; +#endif +} + +static __init void +idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) +{ + gate_desc desc; + + for (; size > 0; t++, size--) { + idt_init_desc(&desc, t); + set_bit(t->vector, used_vectors); + write_idt_entry(idt, t->vector, &desc); + } +} + /** * idt_setup_early_handler - Initializes the idt table with early handlers */
[patch V3 26/44] x86/gdt: Use bitfields for initialization
The GDT entry related code uses partially bitfields and macros which initialize the two 16 bit parts of the entry by magic shift and mask operations. Clean it up and use the bitfields to initialize and access entries. Signed-off-by: Thomas Gleixner --- arch/x86/entry/vdso/vma.c|2 - arch/x86/include/asm/desc.h | 26 ++- arch/x86/include/asm/desc_defs.h | 44 +-- arch/x86/math-emu/fpu_system.h |2 - 4 files changed, 38 insertions(+), 36 deletions(-) --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -351,7 +351,7 @@ static void vgetcpu_cpu_init(void *arg) * and 8 bits for the node) */ d.limit0 = cpu | ((node & 0xf) << 12); - d.limit = node >> 4; + d.limit1 = node >> 4; d.type = 5; /* RO data, expand down, accessed */ d.dpl = 3; /* Visible to user code */ d.s = 1;/* Not a system segment */ --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -23,7 +23,7 @@ static inline void fill_ldt(struct desc_ desc->s = 1; desc->dpl = 0x3; desc->p = info->seg_not_present ^ 1; - desc->limit = (info->limit & 0xf) >> 16; + desc->limit1= (info->limit & 0xf) >> 16; desc->avl = info->useable; desc->d = info->seg_32bit; desc->g = info->limit_in_pages; @@ -170,14 +170,20 @@ static inline void pack_descriptor(struc unsigned long limit, unsigned char type, unsigned char flags) { - desc->a = ((base & 0x) << 16) | (limit & 0x); - desc->b = (base & 0xff00) | ((base & 0xff) >> 16) | - (limit & 0x000f) | ((type & 0xff) << 8) | - ((flags & 0xf) << 20); - desc->p = 1; + desc->limit0= (u16) limit; + desc->base0 = (u16) base; + desc->base1 = (base >> 16) & 0xFF; + desc->type = type & 0x0F; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit1= (limit >> 16) & 0xF; + desc->avl = (flags >> 0) & 0x01; + desc->l = (flags >> 1) & 0x01; + desc->d = (flags >> 2) & 0x01; + desc->g = (flags >> 3) & 0x01; } - static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { @@ -195,7 +201,7 @@ static inline void set_tssldt_descriptor desc->base2 = (addr >> 24) & 0xFF; desc->base3 = (u32) (addr >> 32); #else - pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); + pack_descriptor((struct desc_struct *)d, addr, size, type, 0); #endif } @@ -395,13 +401,13 @@ static inline void set_desc_base(struct static inline unsigned long get_desc_limit(const struct desc_struct *desc) { - return desc->limit0 | (desc->limit << 16); + return desc->limit0 | (desc->limit1 << 16); } static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit) { desc->limit0 = limit & 0x; - desc->limit = (limit >> 16) & 0xf; + desc->limit1 = (limit >> 16) & 0xf; } #ifdef CONFIG_X86_64 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -11,34 +11,30 @@ #include -/* - * FIXME: Accessing the desc_struct through its fields is more elegant, - * and should be the one valid thing to do. However, a lot of open code - * still touches the a and b accessors, and doing this allow us to do it - * incrementally. We keep the signature as a struct, rather than a union, - * so we can get rid of it transparently in the future -- glommer - */ /* 8 byte segment descriptor */ struct desc_struct { - union { - struct { - unsigned int a; - unsigned int b; - }; - struct { - u16 limit0; - u16 base0; - unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; - unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; - }; - }; + u16 limit0; + u16 base0; + u16 base1: 8, type: 4, s: 1, dpl: 2, p: 1; + u16 limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; } __attribute__((packed)); -#define GDT_ENTRY_INIT(flags, base, limit) { { { \ - .a = ((limit) & 0x) | (((base) & 0x) << 16), \ - .b = (((base) & 0xff) >> 16) | (((flags) & 0xf0ff) << 8) | \ - ((limit) & 0xf) | ((base) & 0xff00), \ -
[patch V3 37/44] x86/idt: Move ist stack based traps to table init
Initialize the IST based traps via a table Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h |2 ++ arch/x86/kernel/idt.c | 22 ++ arch/x86/kernel/traps.c |9 + 3 files changed, 25 insertions(+), 8 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -509,9 +509,11 @@ extern void idt_setup_early_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); +extern void idt_setup_ist_traps(void); extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } +static inline void idt_setup_ist_traps(void) { } static inline void idt_setup_debugidt_traps(void) { } #endif --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -92,6 +92,20 @@ struct desc_ptr idt_descr __ro_after_ini gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; /* + * The exceptions which use Interrupt stacks. They are setup after + * cpu_init() when the TSS has been initialized. + */ +static const __initdata struct idt_data ist_idts[] = { + ISTG(X86_TRAP_DB, debug, DEBUG_STACK), + ISTG(X86_TRAP_NMI, nmi,NMI_STACK), + ISTG(X86_TRAP_BP, int3, DEBUG_STACK), + ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), +#ifdef CONFIG_X86_MCE + ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), +#endif +}; + +/* * Override for the debug_idt. Same as the default, but with interrupt * stack set to DEFAULT_STACK (0). Required for NMI trap handling. */ @@ -158,6 +172,14 @@ void __init idt_setup_early_pf(void) } /** + * idt_setup_ist_traps - Initialize the idt table with traps using IST + */ +void __init idt_setup_ist_traps(void) +{ + idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts)); +} + +/** * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps */ void __init idt_setup_debugidt_traps(void) --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -979,14 +979,7 @@ void __init trap_init(void) */ cpu_init(); - /* -* X86_TRAP_DB and X86_TRAP_BP have been set -* in early_trap_init(). However, ITS works only after -* cpu_init() loads TSS. See comments in early_trap_init(). -*/ - set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); - /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + idt_setup_ist_traps(); x86_init.irqs.trap_init();
Re: [PATCH net-next v2 05/14] net: mvpp2: do not force the link mode
Hi Russell, On Fri, Aug 25, 2017 at 11:43:13PM +0100, Russell King - ARM Linux wrote: > On Fri, Aug 25, 2017 at 04:48:12PM +0200, Antoine Tenart wrote: > > The link mode (speed, duplex) was forced based on what the phylib > > returns. This should not be the case, and only forced by ethtool > > functions manually. This patch removes the link mode enforcement from > > the phylib link_event callback. > > So how does RGMII work (which has no in-band signalling between the PHY > and MAC)? > > phylib expects the network driver to configure it according to the PHY > state at link_event time - I think you need to explain more why you > think that this is not necessary. Good catch, this won't work properly with RGMII. This could be done out-of-band according to the spec, but that would use PHY polling and we do not want that (the same concern was raised by Andrew on another patch). I'll keep this mode enforcement for RGMII then. Thanks! Antoine -- Antoine Ténart, Free Electrons Embedded Linux and Kernel engineering http://free-electrons.com signature.asc Description: PGP signature
[patch V3 39/44] x86/idt: Move APIC gate initialization to tables
Replace the APIC/SMP vector gate initialization with the table based mechanism. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h |1 arch/x86/kernel/idt.c | 48 ++ arch/x86/kernel/irqinit.c | 69 3 files changed, 50 insertions(+), 68 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -507,6 +507,7 @@ static inline void load_current_idt(void extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); extern void idt_setup_traps(void); +extern void idt_setup_apic_and_irq_gates(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -103,6 +103,46 @@ static const __initdata struct idt_data #endif }; +/* + * The APIC and SMP idt entries + */ +static const __initdata struct idt_data apic_idts[] = { +#ifdef CONFIG_SMP + INTG(RESCHEDULE_VECTOR, reschedule_interrupt), + INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), + INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt), + INTG(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt), + INTG(REBOOT_VECTOR, reboot_interrupt), +#endif + +#ifdef CONFIG_X86_THERMAL_VECTOR + INTG(THERMAL_APIC_VECTOR, thermal_interrupt), +#endif + +#ifdef CONFIG_X86_MCE_THRESHOLD + INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt), +#endif + +#ifdef CONFIG_X86_MCE_AMD + INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt), +#endif + +#ifdef CONFIG_X86_LOCAL_APIC + INTG(LOCAL_TIMER_VECTOR,apic_timer_interrupt), + INTG(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi), +# ifdef CONFIG_HAVE_KVM + INTG(POSTED_INTR_VECTOR,kvm_posted_intr_ipi), + INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi), + INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi), +# endif +# ifdef CONFIG_IRQ_WORK + INTG(IRQ_WORK_VECTOR, irq_work_interrupt), +# endif + INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt), + INTG(ERROR_APIC_VECTOR, error_interrupt), +#endif +}; + #ifdef CONFIG_X86_64 /* * Early traps running on the DEFAULT_STACK because the other interrupt @@ -242,6 +282,14 @@ void __init idt_setup_debugidt_traps(voi #endif /** + * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates + */ +void __init idt_setup_apic_and_irq_gates(void) +{ + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); +} + +/** * idt_setup_early_handler - Initializes the idt table with early handlers */ void __init idt_setup_early_handler(void) --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -87,73 +87,6 @@ void __init init_IRQ(void) x86_init.irqs.intr_init(); } -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP - /* -* The reschedule interrupt is a CPU-to-CPU reschedule-helper -* IPI, driven by wakeup. -*/ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); - - /* IPI used for rebooting/stopping */ - alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); -#endif /* CONFIG_SMP */ -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - -#ifdef CONFIG_X86_THERMAL_VECTOR - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#ifdef CONFIG_X86_MCE_THRESHOLD - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -#endif - -#ifdef CONFIG_X86_MCE_AMD - alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt); -#endif - -#ifdef CONFIG_X86_LOCAL_APIC - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* IPI for X86 platform specific use */ - alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); -#ifdef CONFIG_HAVE_KVM - /* IPI for KVM to deliver posted interrupt */ - alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi); - /* IPI for KVM to deliver interrupt to wake up tasks */ - alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi); - /* IPI for KVM to deliver nested posted interrupt */ - alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi); -#endif - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spuriou
Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote: > Hello all, > > This is _RFC_. > > I want to request for comments about if it's reasonable conceptually. If > yes, I want to resend after working it more carefully. > > Could you let me know your opinions about this? > > ->8- > From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001 > From: Byungchul Park > Date: Fri, 25 Aug 2017 17:35:07 +0900 > Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks > > We introduced the following commit to detect deadlocks caused by > wait_for_completion() in flush_{workqueue, work}() and other locks. But > now LOCKDEP_COMPLETIONS is introduced, such works are automatically done > by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore. > Removed it. > No.. the existing annotation is strictly better because it will _always_ warn. It doesn't need to first observe things just right.
[patch V3 36/44] x86/idt: Move debug stack init to table based
Add the debug_idt init table and make use of it. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h |2 ++ arch/x86/kernel/idt.c | 23 +++ arch/x86/kernel/traps.c |6 +- 3 files changed, 26 insertions(+), 5 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -509,8 +509,10 @@ extern void idt_setup_early_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); +extern void idt_setup_debugidt_traps(void); #else static inline void idt_setup_early_pf(void) { } +static inline void idt_setup_debugidt_traps(void) { } #endif extern void idt_invalidate(void *addr); --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -68,6 +68,15 @@ static const __initdata struct idt_data static const __initdata struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, page_fault), }; + +/* + * Override for the debug_idt. Same as the default, but with interrupt + * stack set to DEFAULT_STACK (0). Required for NMI trap handling. + */ +static const __initdata struct idt_data dbg_idts[] = { + INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_BP, int3), +}; #endif /* Must be page-aligned because the real IDT is used in a fixmap. */ @@ -82,6 +91,10 @@ struct desc_ptr idt_descr __ro_after_ini /* No need to be aligned, but done to keep all IDTs defined the same way. */ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; +/* + * Override for the debug_idt. Same as the default, but with interrupt + * stack set to DEFAULT_STACK (0). Required for NMI trap handling. + */ const struct desc_ptr debug_idt_descr = { .size = IDT_ENTRIES * 16 - 1, .address= (unsigned long) debug_idt_table, @@ -143,6 +156,16 @@ void __init idt_setup_early_pf(void) idt_setup_from_table(idt_table, early_pf_idts, ARRAY_SIZE(early_pf_idts)); } + +/** + * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps + */ +void __init idt_setup_debugidt_traps(void) +{ + memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); + + idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts)); +} #endif /** --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -990,9 +990,5 @@ void __init trap_init(void) x86_init.irqs.trap_init(); -#ifdef CONFIG_X86_64 - memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); -#endif + idt_setup_debugidt_traps(); }
[patch V3 40/44] x86/idt: Move interrupt gate initialization to IDT code
Move the gate intialization from interrupt init to the IDT code so all IDT related operations are at a single place. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/idt.c | 18 ++ arch/x86/kernel/irqinit.c | 18 -- 2 files changed, 18 insertions(+), 18 deletions(-) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -286,7 +286,25 @@ void __init idt_setup_debugidt_traps(voi */ void __init idt_setup_apic_and_irq_gates(void) { + int i = FIRST_EXTERNAL_VECTOR; + void *entry; + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); + + for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { + entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + set_intr_gate(i, entry); + } + + for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { +#ifdef CONFIG_X86_LOCAL_APIC + set_bit(i, used_vectors); + set_intr_gate(i, spurious_interrupt); +#else + entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + set_intr_gate(i, entry); +#endif + } } /** --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -89,29 +89,11 @@ void __init init_IRQ(void) void __init native_init_IRQ(void) { - int i; - /* Execute any quirks before the call gates are initialised: */ x86_init.irqs.pre_vector_init(); idt_setup_apic_and_irq_gates(); - /* -* Cover the whole vector space, no vector can escape -* us. (some of these will be overridden and become -* 'special' SMP interrupts) -*/ - i = FIRST_EXTERNAL_VECTOR; - for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { - /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - set_intr_gate(i, irq_entries_start + - 8 * (i - FIRST_EXTERNAL_VECTOR)); - } -#ifdef CONFIG_X86_LOCAL_APIC - for_each_clear_bit_from(i, used_vectors, NR_VECTORS) - set_intr_gate(i, spurious_interrupt); -#endif - if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) setup_irq(2, &irq2);
[patch V3 38/44] x86/idt: Move regular trap init to tables
Initialize the regular traps with a table. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h |1 arch/x86/kernel/idt.c | 51 arch/x86/kernel/traps.c | 41 --- 3 files changed, 53 insertions(+), 40 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -506,6 +506,7 @@ static inline void load_current_idt(void extern void idt_setup_early_handler(void); extern void idt_setup_early_traps(void); +extern void idt_setup_traps(void); #ifdef CONFIG_X86_64 extern void idt_setup_early_pf(void); --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -60,6 +60,49 @@ static const __initdata struct idt_data #endif }; +/* + * The default IDT entries which are set up in trap_init() before + * cpu_init() is invoked. Interrupt stacks cannot be used at that point and + * the traps which use them are reinitialized with IST after cpu_init() has + * set up TSS. + */ +static const __initdata struct idt_data def_idts[] = { + INTG(X86_TRAP_DE, divide_error), + INTG(X86_TRAP_NMI, nmi), + INTG(X86_TRAP_BR, bounds), + INTG(X86_TRAP_UD, invalid_op), + INTG(X86_TRAP_NM, device_not_available), + INTG(X86_TRAP_OLD_MF, coprocessor_segment_overrun), + INTG(X86_TRAP_TS, invalid_TSS), + INTG(X86_TRAP_NP, segment_not_present), + INTG(X86_TRAP_SS, stack_segment), + INTG(X86_TRAP_GP, general_protection), + INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug), + INTG(X86_TRAP_MF, coprocessor_error), + INTG(X86_TRAP_AC, alignment_check), + INTG(X86_TRAP_XF, simd_coprocessor_error), + +#ifdef CONFIG_X86_32 + TSKG(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS), +#else + INTG(X86_TRAP_DF, double_fault), +#endif + INTG(X86_TRAP_DB, debug), + INTG(X86_TRAP_NMI, nmi), + INTG(X86_TRAP_BP, int3), + +#ifdef CONFIG_X86_MCE + INTG(X86_TRAP_MC, &machine_check), +#endif + + SYSG(X86_TRAP_OF, overflow), +#if defined(CONFIG_IA32_EMULATION) + SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), +#elif defined(CONFIG_X86_32) + SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), +#endif +}; + #ifdef CONFIG_X86_64 /* * Early traps running on the DEFAULT_STACK because the other interrupt @@ -154,6 +197,14 @@ void __init idt_setup_early_traps(void) load_idt(&idt_descr); } +/** + * idt_setup_traps - Initialize the idt table with default traps + */ +void __init idt_setup_traps(void) +{ + idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts)); +} + #ifdef CONFIG_X86_64 /** * idt_setup_early_pf - Initialize the idt table with early pagefault handler --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -925,46 +925,7 @@ dotraplinkage void do_iret_error(struct void __init trap_init(void) { - int i; - - set_intr_gate(X86_TRAP_DE, divide_error); - set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); - /* int4 can be called from all */ - set_system_intr_gate(X86_TRAP_OF, &overflow); - set_intr_gate(X86_TRAP_BR, bounds); - set_intr_gate(X86_TRAP_UD, invalid_op); - set_intr_gate(X86_TRAP_NM, device_not_available); -#ifdef CONFIG_X86_32 - set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); -#else - set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); -#endif - set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); - set_intr_gate(X86_TRAP_TS, invalid_TSS); - set_intr_gate(X86_TRAP_NP, segment_not_present); - set_intr_gate(X86_TRAP_SS, stack_segment); - set_intr_gate(X86_TRAP_GP, general_protection); - set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); - set_intr_gate(X86_TRAP_MF, coprocessor_error); - set_intr_gate(X86_TRAP_AC, alignment_check); -#ifdef CONFIG_X86_MCE - set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); -#endif - set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - -#ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#endif - -#ifdef CONFIG_X86_32 - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#endif + idt_setup_traps(); /* * Set the IDT descriptor to a fixed read-only location, so that the
[patch V3 27/44] x86/ldttss: Cleanup 32bit descriptors
Like the IDT descriptors the LDT/TSS descriptors are pointlessly different on 32 and 64 bit. Unify them and get rid of the duplicated code. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 26 +++--- arch/x86/include/asm/desc_defs.h | 27 --- 2 files changed, 15 insertions(+), 38 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -166,42 +166,22 @@ native_write_gdt_entry(struct desc_struc memcpy(&gdt[entry], desc, size); } -static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, - unsigned long limit, unsigned char type, - unsigned char flags) -{ - desc->limit0= (u16) limit; - desc->base0 = (u16) base; - desc->base1 = (base >> 16) & 0xFF; - desc->type = type & 0x0F; - desc->s = 0; - desc->dpl = 0; - desc->p = 1; - desc->limit1= (limit >> 16) & 0xF; - desc->avl = (flags >> 0) & 0x01; - desc->l = (flags >> 1) & 0x01; - desc->d = (flags >> 2) & 0x01; - desc->g = (flags >> 3) & 0x01; -} - static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { -#ifdef CONFIG_X86_64 - struct ldttss_desc64 *desc = d; + struct ldttss_desc *desc = d; memset(desc, 0, sizeof(*desc)); - desc->limit0= size & 0x; + desc->limit0= (u16) size; desc->base0 = (u16) addr; desc->base1 = (addr >> 16) & 0xFF; desc->type = type; desc->p = 1; desc->limit1= (size >> 16) & 0xF; desc->base2 = (addr >> 24) & 0xFF; +#ifdef CONFIG_X86_64 desc->base3 = (u32) (addr >> 32); -#else - pack_descriptor((struct desc_struct *)d, addr, size, type, 0); #endif } --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -49,24 +49,21 @@ enum { DESCTYPE_S = 0x10, /* !system */ }; -/* LDT or TSS descriptor in the GDT. 16 bytes. */ -struct ldttss_desc64 { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - +/* LDT or TSS descriptor in the GDT. */ +struct ldttss_desc { + u16 limit0; + u16 base0; + u16 base1 : 8, type : 5, dpl : 2, p : 1; + u16 limit1 : 4, zero0 : 3, g : 1, base2 : 8; #ifdef CONFIG_X86_64 -typedef struct ldttss_desc64 ldt_desc; -typedef struct ldttss_desc64 tss_desc; -#else -typedef struct desc_struct ldt_desc; -typedef struct desc_struct tss_desc; + u32 base3; + u32 zero1; #endif +} __attribute__((packed)); + +typedef struct ldttss_desc ldt_desc; +typedef struct ldttss_desc tss_desc; struct idt_bits { u16 ist : 3,
[patch V3 42/44] x86/idt: Deinline setup functions
Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 37 ++--- arch/x86/kernel/idt.c | 43 ++- 2 files changed, 36 insertions(+), 44 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,44 +390,11 @@ static inline void set_desc_limit(struct desc->limit1 = (limit >> 16) & 0xf; } -static inline void _set_gate(int gate, unsigned type, const void *addr, -unsigned dpl, unsigned ist, unsigned seg) -{ - gate_desc s; - - pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); - /* -* does not need to be atomic because it is only done once at -* setup time -*/ - write_idt_entry(idt_table, gate, &s); -} - -static inline void set_intr_gate(unsigned int n, const void *addr) -{ - BUG_ON(n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); -} +void set_intr_gate(unsigned int n, const void *addr); +void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long used_vectors[]; -static inline void alloc_system_vector(int vector) -{ - BUG_ON(vector < FIRST_SYSTEM_VECTOR); - if (!test_bit(vector, used_vectors)) { - set_bit(vector, used_vectors); - } else { - BUG(); - } -} - -#define alloc_intr_gate(n, addr) \ - do {\ - alloc_system_vector(n); \ - set_intr_gate(n, addr); \ - } while (0) - - #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr); static inline bool is_debug_idt_enabled(void) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -212,15 +212,16 @@ static inline void idt_init_desc(gate_de #endif } -static __init void -idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size) +static void +idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sys) { gate_desc desc; for (; size > 0; t++, size--) { idt_init_desc(&desc, t); - set_bit(t->vector, used_vectors); write_idt_entry(idt, t->vector, &desc); + if (sys) + set_bit(t->vector, used_vectors); } } @@ -233,7 +234,8 @@ idt_setup_from_table(gate_desc *idt, con */ void __init idt_setup_early_traps(void) { - idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts)); + idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts), +true); load_idt(&idt_descr); } @@ -242,7 +244,7 @@ void __init idt_setup_early_traps(void) */ void __init idt_setup_traps(void) { - idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts)); + idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true); } #ifdef CONFIG_X86_64 @@ -259,7 +261,7 @@ void __init idt_setup_traps(void) void __init idt_setup_early_pf(void) { idt_setup_from_table(idt_table, early_pf_idts, -ARRAY_SIZE(early_pf_idts)); +ARRAY_SIZE(early_pf_idts), true); } /** @@ -267,7 +269,7 @@ void __init idt_setup_early_pf(void) */ void __init idt_setup_ist_traps(void) { - idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts)); + idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true); } /** @@ -277,7 +279,7 @@ void __init idt_setup_debugidt_traps(voi { memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts)); + idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), false); } #endif @@ -289,7 +291,7 @@ void __init idt_setup_apic_and_irq_gates int i = FIRST_EXTERNAL_VECTOR; void *entry; - idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts)); + idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) { entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); @@ -333,3 +335,26 @@ void idt_invalidate(void *addr) load_idt(&idt); } + +void set_intr_gate(unsigned int n, const void *addr) +{ + struct idt_data data; + + BUG_ON(n > 0xFF); + + memset(&data, 0, sizeof(data)); + data.vector = n; + data.addr = addr; + data.segment= __KERNEL_CS; + data.bits.type = GATE_INTERRUPT; + data.bits.p = 1; + + idt_setup_from_table(idt_table, &data, 1, false); +} + +void alloc_intr_gate(unsigned int n, const void *addr) +{ + BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR); + set_bit(n, used_vectors); + set_intr_gate(n, addr); +}
[patch V3 41/44] x86/idt: Remove unused functions/inlines
The IDT related inlines are not longer used. Remove them. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/desc.h | 36 1 file changed, 36 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,16 +390,6 @@ static inline void set_desc_limit(struct desc->limit1 = (limit >> 16) & 0xf; } -#ifdef CONFIG_X86_64 -static inline void set_nmi_gate(int gate, void *addr) -{ - gate_desc s; - - pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS); - write_idt_entry(debug_idt_table, gate, &s); -} -#endif - static inline void _set_gate(int gate, unsigned type, const void *addr, unsigned dpl, unsigned ist, unsigned seg) { @@ -437,32 +427,6 @@ static inline void alloc_system_vector(i set_intr_gate(n, addr); \ } while (0) -/* - * This routine sets up an interrupt gate at directory privilege level 3. - */ -static inline void set_system_intr_gate(unsigned int n, void *addr) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS); -} - -static inline void set_task_gate(unsigned int n, unsigned int gdt_entry) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3)); -} - -static inline void set_intr_gate_ist(int n, void *addr, unsigned ist) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS); -} - -static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) -{ - BUG_ON((unsigned)n > 0xFF); - _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); -} #ifdef CONFIG_X86_64 DECLARE_PER_CPU(u32, debug_idt_ctr);
[patch V3 44/44] x86/idt: Hide set_intr_gate()
set_intr_gate() is an internal function of the IDT code. The only user left is the KVM code which replaces the pagefault handler eventually. Provide an explicit update_intr_gate() function and make set_intr_gate() static. While at it replace the magic number 14 in the KVM code with the proper trap define. Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini --- arch/x86/include/asm/desc.h |2 +- arch/x86/kernel/idt.c | 33 - arch/x86/kernel/kvm.c |2 +- 3 files changed, 22 insertions(+), 15 deletions(-) --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -390,7 +390,7 @@ static inline void set_desc_limit(struct desc->limit1 = (limit >> 16) & 0xf; } -void set_intr_gate(unsigned int n, const void *addr); +void update_intr_gate(unsigned int n, const void *addr); void alloc_intr_gate(unsigned int n, const void *addr); extern unsigned long used_vectors[]; --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -225,6 +225,22 @@ idt_setup_from_table(gate_desc *idt, con } } +static void set_intr_gate(unsigned int n, const void *addr) +{ + struct idt_data data; + + BUG_ON(n > 0xFF); + + memset(&data, 0, sizeof(data)); + data.vector = n; + data.addr = addr; + data.segment= __KERNEL_CS; + data.bits.type = GATE_INTERRUPT; + data.bits.p = 1; + + idt_setup_from_table(idt_table, &data, 1, false); +} + /** * idt_setup_early_traps - Initialize the idt table with early traps * @@ -336,20 +352,11 @@ void idt_invalidate(void *addr) load_idt(&idt); } -void set_intr_gate(unsigned int n, const void *addr) +void __init update_intr_gate(unsigned int n, const void *addr) { - struct idt_data data; - - BUG_ON(n > 0xFF); - - memset(&data, 0, sizeof(data)); - data.vector = n; - data.addr = addr; - data.segment= __KERNEL_CS; - data.bits.type = GATE_INTERRUPT; - data.bits.p = 1; - - idt_setup_from_table(idt_table, &data, 1, false); + if (WARN_ON_ONCE(!test_bit(n, used_vectors))) + return; + set_intr_gate(n, addr); } void alloc_intr_gate(unsigned int n, const void *addr) --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -455,7 +455,7 @@ static int kvm_cpu_down_prepare(unsigned static void __init kvm_apf_trap_init(void) { - set_intr_gate(14, async_page_fault); + update_intr_gate(X86_TRAP_PF, async_page_fault); } void __init kvm_guest_init(void)
[patch V3 24/44] x86/fpu: Use bitfield accessors for desc_struct
desc_struct is a union of u32 fields and bitfields. The access to the u32 fields is done with magic macros. Convert it to use the bitfields and replace the macro magic with parseable inline functions. Signed-off-by: Thomas Gleixner --- arch/x86/math-emu/fpu_entry.c | 11 - arch/x86/math-emu/fpu_system.h | 48 ++-- arch/x86/math-emu/get_address.c | 17 +++--- 3 files changed, 51 insertions(+), 25 deletions(-) --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -147,7 +147,7 @@ void math_emulate(struct math_emu_info * } code_descriptor = FPU_get_ldt_descriptor(FPU_CS); - if (SEG_D_SIZE(code_descriptor)) { + if (code_descriptor.d) { /* The above test may be wrong, the book is not clear */ /* Segmented 32 bit protected mode */ addr_modes.default_mode = SEG32; @@ -155,11 +155,10 @@ void math_emulate(struct math_emu_info * /* 16 bit protected mode */ addr_modes.default_mode = PM16; } - FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor); - code_limit = code_base - + (SEG_LIMIT(code_descriptor) + - 1) * SEG_GRANULARITY(code_descriptor) - - 1; + FPU_EIP += code_base = seg_get_base(&code_descriptor); + code_limit = seg_get_limit(&code_descriptor) + 1; + code_limit *= seg_get_granularity(&code_descriptor); + code_limit += code_base - 1; if (code_limit < code_base) code_limit = 0x; } --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -34,17 +34,43 @@ static inline struct desc_struct FPU_get return ret; } -#define SEG_D_SIZE(x) ((x).b & (3 << 21)) -#define SEG_G_BIT(x) ((x).b & (1 << 23)) -#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) -#define SEG_286_MODE(x)((x).b & ( 0xff00 | 0xf | (1 << 23))) -#define SEG_BASE_ADDR(s) (((s).b & 0xff00) \ -| (((s).b & 0xff) << 16) | ((s).a >> 16)) -#define SEG_LIMIT(s) (((s).b & 0xff) | ((s).a & 0x)) -#define SEG_EXECUTE_ONLY(s)(((s).b & ((1 << 11) | (1 << 9))) == (1 << 11)) -#define SEG_WRITE_PERM(s) (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9)) -#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ -== (1 << 10)) +#define SEG_TYPE_WRITABLE (1U << 1) +#define SEG_TYPE_EXPANDS_DOWN (1U << 2) +#define SEG_TYPE_EXECUTE (1U << 3) +#define SEG_TYPE_EXPAND_MASK (SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE) +#define SEG_TYPE_EXECUTE_MASK (SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE) + +static inline unsigned long seg_get_base(struct desc_struct *d) +{ + unsigned long base = (unsigned long)d->base2 << 24; + + return base | ((unsigned long)d->base1 << 16) | d->base0; +} + +static inline unsigned long seg_get_limit(struct desc_struct *d) +{ + return ((unsigned long)d->limit << 16) | d->limit0; +} + +static inline unsigned long seg_get_granularity(struct desc_struct *d) +{ + return d->g ? 4096 : 1; +} + +static inline bool seg_expands_down(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN; +} + +static inline bool seg_execute_only(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE; +} + +static inline bool seg_writable(struct desc_struct *d) +{ + return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE; +} #define I387 (¤t->thread.fpu.state) #define FPU_info (I387->soft.info) --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c @@ -159,17 +159,18 @@ static long pm_address(u_char FPU_modrm, } descriptor = FPU_get_ldt_descriptor(addr->selector); - base_address = SEG_BASE_ADDR(descriptor); + base_address = seg_get_base(&descriptor); address = base_address + offset; - limit = base_address - + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1; + limit = seg_get_limit(&descriptor) + 1; + limit *= seg_get_granularity(&descriptor); + limit += base_address - 1; if (limit < base_address) limit = 0x; - if (SEG_EXPAND_DOWN(descriptor)) { - if (SEG_G_BIT(descriptor)) + if (seg_expands_down(&descriptor)) { + if (descriptor.g) { seg_top = 0x; - else { + } else { seg_top = base_address + (1 << 20); if (seg_top < base_address) seg_top = 0xff
[patch V3 11/44] x86/apic: Remove the duplicated tracing versions of interrupts
The error and the spurious interrupt are really rare events and not at all so performance sensitive that two NOP5s can not be tolerated when tracing is disabled. Remove the nonsense. Signed-off-by: Thomas Gleixner Reviewed-by: Steven Rostedt (VMware) --- arch/x86/include/asm/hw_irq.h |4 +-- arch/x86/kernel/apic/apic.c | 43 +- 2 files changed, 12 insertions(+), 35 deletions(-) --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -48,15 +48,15 @@ extern asmlinkage void call_function_sin #ifdef CONFIG_TRACING /* Interrupt handlers registered during init_IRQ */ -extern void trace_error_interrupt(void); extern void trace_irq_work_interrupt(void); -extern void trace_spurious_interrupt(void); extern void trace_thermal_interrupt(void); extern void trace_reschedule_interrupt(void); extern void trace_threshold_interrupt(void); extern void trace_deferred_error_interrupt(void); extern void trace_call_function_interrupt(void); extern void trace_call_function_single_interrupt(void); +#define trace_error_interrupt error_interrupt +#define trace_spurious_interrupt spurious_interrupt #define trace_x86_platform_ipi x86_platform_ipi #define trace_apic_timer_interrupt apic_timer_interrupt #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1899,10 +1899,14 @@ void __init register_lapic_address(unsig /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -static void __smp_spurious_interrupt(u8 vector) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { + u8 vector = ~regs->orig_ax; u32 v; + entering_irq(); + trace_spurious_apic_entry(vector); + /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1917,22 +1921,7 @@ static void __smp_spurious_interrupt(u8 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " "should never happen.\n", vector, smp_processor_id()); -} -__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) -{ - entering_irq(); - __smp_spurious_interrupt(~regs->orig_ax); - exiting_irq(); -} - -__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) -{ - u8 vector = ~regs->orig_ax; - - entering_irq(); - trace_spurious_apic_entry(vector); - __smp_spurious_interrupt(vector); trace_spurious_apic_exit(vector); exiting_irq(); } @@ -1940,10 +1929,8 @@ static void __smp_spurious_interrupt(u8 /* * This interrupt should never happen with our APIC/SMP architecture */ -static void __smp_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { - u32 v; - u32 i = 0; static const char * const error_interrupt_reason[] = { "Send CS error",/* APIC Error Bit 0 */ "Receive CS error", /* APIC Error Bit 1 */ @@ -1954,6 +1941,10 @@ static void __smp_error_interrupt(struct "Received illegal vector", /* APIC Error Bit 6 */ "Illegal register address", /* APIC Error Bit 7 */ }; + u32 v, i = 0; + + entering_irq(); + trace_error_apic_entry(ERROR_APIC_VECTOR); /* First tickle the hardware, only then report what went on. -- REW */ if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ @@ -1975,20 +1966,6 @@ static void __smp_error_interrupt(struct apic_printk(APIC_DEBUG, KERN_CONT "\n"); -} - -__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) -{ - entering_irq(); - __smp_error_interrupt(regs); - exiting_irq(); -} - -__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) -{ - entering_irq(); - trace_error_apic_entry(ERROR_APIC_VECTOR); - __smp_error_interrupt(regs); trace_error_apic_exit(ERROR_APIC_VECTOR); exiting_irq(); }
[patch V3 43/44] x86/idt: Simplify alloc_intr_gate
The only users of alloc_intr_gate() are hypervisors, which both check the used_vectors bitmap whether they have allocated the gate already. Move that check into alloc_intr_gate() and simplify the users. Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: "K. Y. Srinivasan" Cc: Stephen Hemminger Cc: Boris Ostrovsky Cc: Juergen Gross --- arch/x86/kernel/cpu/mshyperv.c |9 ++--- arch/x86/kernel/idt.c|6 +++--- drivers/xen/events/events_base.c |6 ++ 3 files changed, 7 insertions(+), 14 deletions(-) --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -59,13 +59,8 @@ void hyperv_vector_handler(struct pt_reg void hv_setup_vmbus_irq(void (*handler)(void)) { vmbus_handler = handler; - /* -* Setup the IDT for hypervisor callback. Prevent reallocation -* at module reload. -*/ - if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - hyperv_callback_vector); + /* Setup the IDT for hypervisor callback */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); } void hv_remove_vmbus_irq(void) --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -354,7 +354,7 @@ void set_intr_gate(unsigned int n, const void alloc_intr_gate(unsigned int n, const void *addr) { - BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR); - set_bit(n, used_vectors); - set_intr_gate(n, addr); + BUG_ON(n < FIRST_SYSTEM_VECTOR); + if (!test_and_set_bit(n, used_vectors)) + set_intr_gate(n, addr); } --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1653,10 +1653,8 @@ void xen_callback_vector(void) return; } pr_info("Xen HVM callback vector for event delivery is enabled\n"); - /* in the restore case the vector has already been allocated */ - if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, - xen_hvm_callback_vector); + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, + xen_hvm_callback_vector); } } #else
[patch V3 23/44] x86/percpu: Use static initializer for GDT entry
The IDT cleanup is about to remove pack_descriptor(). The GDT setup for the percpu storage can be achieved with the static initializer as well. Replace it. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup_percpu.c |9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -155,13 +155,10 @@ static void __init pcpup_populate_pte(un static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 - struct desc_struct gdt; + struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu), + 0xF); - pack_descriptor(&gdt, per_cpu_offset(cpu), 0xF, - 0x2 | DESCTYPE_S, 0x8); - gdt.s = 1; - write_gdt_entry(get_cpu_gdt_rw(cpu), - GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); + write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S); #endif }
Re: [PATCH] zram: add zstd to the supported algorithms list
Hi Nick, On Fri, Aug 25, 2017 at 07:31:14PM +, Nick Terrell wrote: > On 8/24/17, 10:19 PM, "Minchan Kim" wrote: > > On Fri, Aug 25, 2017 at 01:35:35AM +, Nick Terrell wrote: > [..] > > > I think using dictionaries in zram could be very interesting. We could for > > > example, take a random sample of the RAM and use that as the dictionary > > > for compression. E.g. take 32 512B samples from RAM and build a 16 KB > > > dictionary (sizes may vary). > > > > For static option, could we create the dictionary with data in zram > > and dump the dictionary into file. And then, rebuiling zram or kernel > > includes the dictionary into images. > > > > For it, we would need some knob like > > > > cat /sys/block/zram/zstd_dict > dict.data > > > > CONFIG_ZSTD_DICT_DIR= > > CONFIG_ZSTD_DICT_FILE= > > My guess is that a static dictionary won't cut it, since different > workloads will have drastically different RAM contents, so we won't be able > to construct a single dictionary that works for them all. I'd love to be > proven wrong though. zRAM is popular for system swap in embedded world. In mobile phone, there would be different workloads as you said but other scenario like refrigerator, TV and so will have very specific scenario so it would be a great to have. > > > For dynamic option, could we make the dictionary with data > > in zram dynamically? So, upcoming pages will use the newly > > created dictionary but old compressed pages will use own dictionary. > > Yeah thats totally possible on the compression side, we would just need to > save which pages were compressed with which dictionary somewhere. Great. We have zram->table for object based and zspage for pages unit so I expect it wouldn't be hard to implement. > > > I'm not sure it's possible, anyway, if predefined dict can help > > comp ratio a lot in 4K data, I really love the feature and will support > > to have it. ;) > > > > > > > > I'm not sure how you would pass a dictionary into the crypto compression > > > API, but I'm sure we can make something work if dictionary compression > > > proves to be beneficial enough. > > > > Yes, it would be better to integrate the feature crypto but Please, don't > > tie to > > crypto API. If it's hard to support with current cypto API in short time, > > I really want to support it with zcomp_zstd.c. > > > > Please look at old zcomp model. > > http://elixir.free-electrons.com/linux/v4.7/source/drivers/block/zram/zcomp_lz4.c > > Thanks for the link, we could definitely make zcomp work with dictionaries. > > > > What data have you, or anyone, used for benchmarking compression ratio > > > and > > > speed for RAM? Since it is such a specialized application, the standard > > > compression benchmarks aren't very applicable. > > > > I have used my image dumped from desktop swap device. > > Of course, it doesn't cover all of cases in the world but it would be better > > to use IO benchmark buffer, IMHO. :) > > Since adding dictionary support won't be quite as easy as adding zstd > support, I think the first step is building a set of benchmarks that > represent some common real world scenarios. We can easily test different > dictionary construction algorithms in userspace, and determine if the work > will pay off for some workloads. I'll collect some RAM samples from my > device and run some preliminary tests. Sweet. I am looking forward to seeing your result. Thanks!
Re: [PATCH net-next v2 09/14] net: mvpp2: dynamic reconfiguration of the PHY mode
Hi Russell, On Fri, Aug 25, 2017 at 11:46:16PM +0100, Russell King - ARM Linux wrote: > On Fri, Aug 25, 2017 at 04:48:16PM +0200, Antoine Tenart wrote: > > This patch adds logic to reconfigure the comphy/gop when the link status > > change at runtime. This is very useful on boards such as the mcbin which > > have SFP and Ethernet ports connected to the same MAC port: depending on > > what the user connects the driver will automatically reconfigure the > > link mode. > > This commit commentry needs updating - as I've already pointed out in > the previous round, the need to reconfigure things has *nothing* to do > with there being SFP and "Ethernet" ports present. Hence, your commit > message is entirely misleading. That's right. I'll update the commit message. Thanks! Antoine -- Antoine Ténart, Free Electrons Embedded Linux and Kernel engineering http://free-electrons.com signature.asc Description: PGP signature
[PATCH v4] ACPI / PMIC: Add opregion driver for Intel Dollar Cove TI PMIC
This patch adds the opregion driver for Dollar Cove TI PMIC on Intel Cherry Trail devices. The patch is based on the original work by Intel, found at: https://github.com/01org/ProductionKernelQuilts with many cleanups and rewrites. The driver is currently provided only as built-in to follow other PMIC opregion drivers convention. The re-enumeration of devices at probe is required for fixing the issues on HP x2 210 G2. See bug#195689. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=193891 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195689 Reviewed-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Takashi Iwai --- I'm resending only this one as v4 patch. v3->v4: * Rename CHTDC_* with CHT_DC_* in Kconfig/Makefile * add cht_ prefix to the driver name string to align with others v2->v3: * Rename dc_ti with chtdc_ti in all places * Driver/kconfig renames accordingly * Constification * Added acks by Andy and Mika v1->v2: * get_raw_temp cleanup in opregion driver, mention about register endianess drivers/acpi/Kconfig| 6 ++ drivers/acpi/Makefile | 1 + drivers/acpi/pmic/intel_pmic_chtdc_ti.c | 137 3 files changed, 144 insertions(+) create mode 100644 drivers/acpi/pmic/intel_pmic_chtdc_ti.c diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 1ce52f84dc23..176fae699891 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -521,6 +521,12 @@ config CHT_WC_PMIC_OPREGION help This config adds ACPI operation region support for CHT Whiskey Cove PMIC. +config CHT_DC_TI_PMIC_OPREGION + bool "ACPI operation region support for Dollar Cove TI PMIC" + depends on INTEL_SOC_PMIC_CHTDC_TI + help + This config adds ACPI operation region support for Dollar Cove TI PMIC. + endif config ACPI_CONFIGFS diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index b1aacfc62b1f..cd228822d4a3 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_CRC_PMIC_OPREGION) += pmic/intel_pmic_crc.o obj-$(CONFIG_XPOWER_PMIC_OPREGION) += pmic/intel_pmic_xpower.o obj-$(CONFIG_BXT_WC_PMIC_OPREGION) += pmic/intel_pmic_bxtwc.o obj-$(CONFIG_CHT_WC_PMIC_OPREGION) += pmic/intel_pmic_chtwc.o +obj-$(CONFIG_CHT_DC_TI_PMIC_OPREGION) += pmic/intel_pmic_chtdc_ti.o obj-$(CONFIG_ACPI_CONFIGFS)+= acpi_configfs.o diff --git a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c new file mode 100644 index ..109c1e9c9c7a --- /dev/null +++ b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c @@ -0,0 +1,137 @@ +/* + * Dollar Cove TI PMIC operation region driver + * Copyright (C) 2014 Intel Corporation. All rights reserved. + * + * Rewritten and cleaned up + * Copyright (C) 2017 Takashi Iwai + */ + +#include +#include +#include +#include +#include "intel_pmic.h" + +/* registers stored in 16bit BE (high:low, total 10bit) */ +#define CHTDC_TI_VBAT 0x54 +#define CHTDC_TI_DIETEMP 0x56 +#define CHTDC_TI_BPTHERM 0x58 +#define CHTDC_TI_GPADC 0x5a + +static struct pmic_table chtdc_ti_power_table[] = { + { .address = 0x00, .reg = 0x41 }, + { .address = 0x04, .reg = 0x42 }, + { .address = 0x08, .reg = 0x43 }, + { .address = 0x0c, .reg = 0x45 }, + { .address = 0x10, .reg = 0x46 }, + { .address = 0x14, .reg = 0x47 }, + { .address = 0x18, .reg = 0x48 }, + { .address = 0x1c, .reg = 0x49 }, + { .address = 0x20, .reg = 0x4a }, + { .address = 0x24, .reg = 0x4b }, + { .address = 0x28, .reg = 0x4c }, + { .address = 0x2c, .reg = 0x4d }, + { .address = 0x30, .reg = 0x4e }, +}; + +static struct pmic_table chtdc_ti_thermal_table[] = { + { + .address = 0x00, + .reg = CHTDC_TI_GPADC + }, + { + .address = 0x0c, + .reg = CHTDC_TI_GPADC + }, + /* TMP2 -> SYSTEMP */ + { + .address = 0x18, + .reg = CHTDC_TI_GPADC + }, + /* TMP3 -> BPTHERM */ + { + .address = 0x24, + .reg = CHTDC_TI_BPTHERM + }, + { + .address = 0x30, + .reg = CHTDC_TI_GPADC + }, + /* TMP5 -> DIETEMP */ + { + .address = 0x3c, + .reg = CHTDC_TI_DIETEMP + }, +}; + +static int chtdc_ti_pmic_get_power(struct regmap *regmap, int reg, int bit, + u64 *value) +{ + int data; + + if (regmap_read(regmap, reg, &data)) + return -EIO; + + *value = data & 1; + return 0; +} + +static int chtdc_ti_pmic_update_power(struct regmap *regmap, int reg, int bit, + bool on) +{ + return regmap_update_bits(regmap, reg, 1, on); +} + +static int chtdc_ti_pmic_get_raw_temp(struct regmap *regmap, int reg) +{ +
Re: linux-next: manual merge of the scsi tree with the staging tree
On Mon, Aug 28, 2017 at 04:41:27PM +1000, Stephen Rothwell wrote: > Hi James, > > Today's linux-next merge of the scsi tree got a conflict in: > > drivers/staging/unisys/visorhba/visorhba_main.c > > between commits: > > 781facd05eb9 ("staging: unisys: visorhba: visorhba_main.c: fixed comment > formatting issues") > > from the staging tree and commit: > > 7bc4e528d9f6 ("scsi: visorhba: sanitze private device data allocation") > > from the scsi tree. > > I fixed it up (see below) and can carry the fix as necessary. This > is now fixed as far as linux-next is concerned, but any non trivial > conflicts should be mentioned to your upstream maintainer when your tree > is submitted for merging. You may also want to consider cooperating > with the maintainer of the conflicting tree to minimise any particularly > complex conflicts. Ick, messy merge, thanks for doing this. greg k-h
Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag
On Mon, 2017-08-28 at 08:10 +0200, Mike Galbraith wrote: > Iff deeper cstate etc for > longer does make a big difference, I can imagine wakeup time migrate > leftward if capacity exists as an "on battery" tactic. (though that > thought also invokes some unpleasant bounce fest images) (consolidate left would have to be LB global to avoid fight with self)
Re: [PATCH] DSA support for Micrel KSZ8895
Hi! > >No, tag_ksz part probably is not acceptable. Do you see solution > >better than just copying it into tag_ksz1 file? > > You could have all Micrel tag implementations live under net/dsa/tag_ksz.c > and have e.g: DSA_TAG_PROTO_KSZ for the current (newer) switches and > DSA_TAG_PROTO_KSZ_LEGACY (or any other name) for the older switches and you > would provide two sets of function pointers depending on which protocol is > requested by the switch. > > Considering the minor difference needed in tagging here, it might be > acceptable to actually keep the current functions and just have the xmit() > call check what get_tag_protocol returns and use word 1 or 0 based on that. > Even though that's a fast path it shouldn't hurt performance too much. If it > does, we can always copy the tagging protocol into dsa_slave_priv so you have > a fast access to it. > Actually I believe I can do optimizer tricks to keep this zero-cost with clean code, if needed. > > > >Any more comments, etc? > > The MII emulation bits are interesting, was it not sufficient if you > implemented phy_read and phy_write operations that perform the necessary > internal PHY accesses or maybe you don't get access to standard MII > registers? b53 does such a thing and we merely just need to do a simple shift > to access the MII register number, thus avoiding the translation. > We don't get standard MII registers over SPI bus. > >Help would be welcome. > > I concur with Andrew, try to get a patch series, even an RFC one together so > we can review things individually. > > How functional is your driver so far? I'd say the basic stuff to get working: > counters (debugging), link management (auto-negotiation, forced, etc.) and > basic bridging: all ports separate by default and working port to port > switching when brought together in a bridge. VLAN, FDB, MDB, other ethtool > goodies can be added later on. > Which counters are essential? Link management and basic bridging should work, not sure if I'll have time to do more than that. Best regards, Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html signature.asc Description: Digital signature
Re: [PATCH v2 4/5] cramfs: add mmap support
On Wed, Aug 16, 2017 at 01:35:35PM -0400, Nicolas Pitre wrote: > +static const struct vm_operations_struct cramfs_vmasplit_ops; > +static int cramfs_vmasplit_fault(struct vm_fault *vmf) > +{ > + struct mm_struct *mm = vmf->vma->vm_mm; > + struct vm_area_struct *vma, *new_vma; > + unsigned long split_val, split_addr; > + unsigned int split_pgoff, split_page; > + int ret; > + > + /* Retrieve the vma split address and validate it */ > + vma = vmf->vma; > + split_val = (unsigned long)vma->vm_private_data; > + split_pgoff = split_val & 0x; > + split_page = split_val >> 16; > + split_addr = vma->vm_start + split_page * PAGE_SIZE; > + pr_debug("fault: addr=%#lx vma=%#lx-%#lx split=%#lx\n", > + vmf->address, vma->vm_start, vma->vm_end, split_addr); > + if (!split_val || split_addr >= vma->vm_end || vmf->address < > split_addr) > + return VM_FAULT_SIGSEGV; > + > + /* We have some vma surgery to do and need the write lock. */ > + up_read(&mm->mmap_sem); > + if (down_write_killable(&mm->mmap_sem)) > + return VM_FAULT_RETRY; > + > + /* Make sure the vma didn't change between the locks */ > + vma = find_vma(mm, vmf->address); > + if (vma->vm_ops != &cramfs_vmasplit_ops) { > + /* > + * Someone else raced with us and could have handled the fault. > + * Let it go back to user space and fault again if necessary. > + */ > + downgrade_write(&mm->mmap_sem); > + return VM_FAULT_NOPAGE; > + } > + > + /* Split the vma between the directly mapped area and the rest */ > + ret = split_vma(mm, vma, split_addr, 0); Egads... Everything else aside, who said that your split_... will have anything to do with the vma you get from find_vma()?
Re: [PATCH v5] iio: accel: mma8452: improvements to handle multiple events
Am 28.08.2017 02:23 schrieb Harinath Nampally: This driver supports multiple devices like mma8653, mma8652, mma8452, mma8453 and fxls8471. Almost all these devices have more than one event. Current driver design hardcodes the event specific information, so only one event can be supported by this driver at any given time. Also current design doesn't have the flexibility to add more events. This patch improves by detaching the event related information from chip_info struct,and based on channel type and event direction the corresponding event configuration registers are picked dynamically. Hence both transient and freefall events can be handled in read/write callbacks. Changes are thoroughly tested on fxls8471 device on imx6UL Eval board using iio_event_monitor user space program. After this fix both Freefall and Transient events are handled by the driver without any conflicts. Changes since v4 -> v5 -Add supported_events and enabled_events in chip_info structure so that devices(mma865x) which has no support for transient event will fallback to freefall event. Hence this patch changes won't break for devices that can't support transient events Changes since v3 -> v4 -Add 'const struct ev_regs_accel_falling' -Add 'const struct ev_regs_accel_rising' -Refactor mma8452_get_event_regs function to remove the fill in the struct and return above structs -Condense the commit's subject message Changes since v2 -> v3 -Fix typo in commit message -Replace word 'Bugfix' with 'Improvements' -Describe more accurate commit message -Replace breaks with returns -Initialise transient event threshold mask -Remove unrelated change of IIO_ACCEL channel type check in read/write event callbacks Changes since v1 -> v2 -Fix indentations -Remove unused fields in mma8452_event_regs struct -Remove redundant return statement -Remove unrelated changes like checkpatch.pl warning fixes Signed-off-by: Harinath Nampally --- drivers/iio/accel/mma8452.c | 349 +++- 1 file changed, 183 insertions(+), 166 deletions(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index eb6e3dc..0a97e61b 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -59,7 +59,9 @@ #define MMA8452_FF_MT_THS 0x17 #define MMA8452_FF_MT_THS_MASK0x7f #define MMA8452_FF_MT_COUNT0x18 +#define MMA8452_FF_MT_CHAN_SHIFT 3 #define MMA8452_TRANSIENT_CFG 0x1d +#define MMA8452_TRANSIENT_CFG_CHAN(chan) BIT(chan + 1) #define MMA8452_TRANSIENT_CFG_HPF_BYP BIT(0) #define MMA8452_TRANSIENT_CFG_ELE BIT(4) #define MMA8452_TRANSIENT_SRC 0x1e @@ -69,6 +71,7 @@ #define MMA8452_TRANSIENT_THS 0x1f #define MMA8452_TRANSIENT_THS_MASKGENMASK(6, 0) #define MMA8452_TRANSIENT_COUNT0x20 +#define MMA8452_TRANSIENT_CHAN_SHIFT 1 #define MMA8452_CTRL_REG1 0x2a #define MMA8452_CTRL_ACTIVE BIT(0) #define MMA8452_CTRL_DR_MASK GENMASK(5, 3) @@ -107,6 +110,42 @@ struct mma8452_data { const struct mma_chip_info *chip_info; }; + /** + * struct mma8452_event_regs - chip specific data related to events + * @ev_cfg: event config register address + * @ev_src: event source register address + * @ev_ths: event threshold register address + * @ev_ths_mask: mask for the threshold value + * @ev_count: event count (period) register address + * + * Since not all chips supported by the driver support comparing high pass + * filtered data for events (interrupts), different interrupt sources are + * used for different chips and the relevant registers are included here. + */ +struct mma8452_event_regs { + u8 ev_cfg; + u8 ev_src; + u8 ev_ths; + u8 ev_ths_mask; + u8 ev_count; +}; + +static const struct mma8452_event_regs ev_regs_accel_falling = { + .ev_cfg = MMA8452_FF_MT_CFG, + .ev_src = MMA8452_FF_MT_SRC, + .ev_ths = MMA8452_FF_MT_THS, + .ev_ths_mask = MMA8452_FF_MT_THS_MASK, + .ev_count = MMA8452_FF_MT_COUNT +}; + +static const struct mma8452_event_regs ev_regs_accel_rising = { + .ev_cfg = MMA8452_TRANSIENT_CFG, + .ev_src = MMA8452_TRANSIENT_SRC, + .ev_ths = MMA8452_TRANSIENT_THS, + .ev_ths_mask = MMA8452_TRANSIENT_THS_MASK, + .ev_count = MMA8452_TRANSIENT_COUNT, +}; + /** * struct mma_chip_info - chip specific data * @chip_id: WHO_AM_I register's value @@ -116,40 +
Re: [PATCH] Revert "pinctrl: sunxi: Don't enforce bias disable (for now)"
1;4803;0c On Sun, Aug 27, 2017 at 03:55:23PM +0300, Priit Laes wrote: > This reverts commit 2154d94b40ea2a5de05245521371d0461bb0d669. > > The original patch was intented to avoid some issues with the sunxi > gpio rework and was supposed to be reverted after all the required > DT bits had been merged around v4.10. > > Signed-off-by: Priit Laes Acked-by: Maxime Ripard Thanks! Maxime -- Maxime Ripard, Free Electrons Embedded Linux and Kernel engineering http://free-electrons.com signature.asc Description: PGP signature
Re: [PATCH] mm/page_alloc: don't reserve ZONE_HIGHMEM for ZONE_MOVABLE request
+CC linux-api On 08/28/2017 02:28 AM, Joonsoo Kim wrote: > On Fri, Aug 25, 2017 at 09:56:10AM +0200, Vlastimil Babka wrote: >> On 08/25/2017 02:20 AM, Joonsoo Kim wrote: >>> On Thu, Aug 24, 2017 at 11:41:58AM +0200, Vlastimil Babka wrote: >>> >>> Hmm, this is already pointed by Minchan and I have answered that. >>> >>> lkml.kernel.org/r/<20170421013243.GA13966@js1304-desktop> >>> >>> If you have a better idea, please let me know. >> >> My idea is that size of sysctl_lowmem_reserve_ratio is ZONE_NORMAL+1 and >> it has no entries for zones > NORMAL. The >> setup_per_zone_lowmem_reserve() is adjusted to only set >> lower_zone->lowmem_reserve[j] for idx <= ZONE_NORMAL. >> >> I can't imagine somebody would want override the ratio for HIGHMEM or >> MOVABLE >> (where it has no effect anyway) so the simplest thing is not to expose >> it at all. > > Seems reasonable. However, if there is a user who checks > sysctl_lowmem_reserve_ratio entry for HIGHMEM and change it, suggested > interface will cause a problem since it doesn't expose ratio for > HIGHMEM. Am I missing something? As you explained, it makes little sense to change it for HIGHMEM which only affects MOVABLE allocations. Also I doubt there are many systems with both HIGHMEM (implies 32bit) *and* MOVABLE (implies NUMA, memory hotplug...) zones. So I would just remove it, and if somebody will really miss it, we can always add it back. In any case, please CC linux-api on the next version. > Thanks. > > >> >>> Thanks. >>> >> >> -- >> To unsubscribe, send a message with 'unsubscribe linux-mm' in >> the body to majord...@kvack.org. For more info on Linux MM, >> see: http://www.linux-mm.org/ . >> Don't email: mailto:"d...@kvack.org";> em...@kvack.org
linux-next: manual merge of the scsi tree with the staging tree
Hi James, Today's linux-next merge of the scsi tree got a conflict in: drivers/staging/unisys/visorhba/visorhba_main.c between commits: 781facd05eb9 ("staging: unisys: visorhba: visorhba_main.c: fixed comment formatting issues") from the staging tree and commit: 7bc4e528d9f6 ("scsi: visorhba: sanitze private device data allocation") from the scsi tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc drivers/staging/unisys/visorhba/visorhba_main.c index 8567e447891e,ddce92552ff5.. --- a/drivers/staging/unisys/visorhba/visorhba_main.c +++ b/drivers/staging/unisys/visorhba/visorhba_main.c @@@ -44,12 -44,11 +44,11 @@@ static struct visor_channeltype_descrip }; MODULE_DEVICE_TABLE(visorbus, visorhba_channel_types); -MODULE_ALIAS("visorbus:" VISOR_VHBA_CHANNEL_UUID_STR); +MODULE_ALIAS("visorbus:" VISOR_VHBA_CHANNEL_GUID_STR); struct visordisk_info { + struct scsi_device *sdev; u32 valid; - /* Disk Path */ - u32 channel, id, lun; atomic_t ios_threshold; atomic_t error_count; struct visordisk_info *next; @@@ -105,25 -101,19 +104,19 @@@ struct visorhba_devices_open struct visorhba_devdata *devdata; }; - #define for_each_vdisk_match(iter, list, match) \ - for (iter = &list->head; iter->next; iter = iter->next) \ - if ((iter->channel == match->channel) && \ - (iter->id == match->id) && \ - (iter->lun == match->lun)) - /* - *visor_thread_start - starts a thread for the device - *@threadfn: Function the thread starts - *@thrcontext: Context to pass to the thread, i.e. devdata - *@name: string describing name of thread + * visor_thread_start - Starts a thread for the device + * @threadfn: Function the thread starts + * @thrcontext: Context to pass to the thread, i.e. devdata + * @name: String describing name of thread * - *Starts a thread for the device. + * Starts a thread for the device. * - *Return the task_struct * denoting the thread on success, - * or NULL on failure + * Return: The task_struct * denoting the thread on success, + * or NULL on failure */ -static struct task_struct *visor_thread_start -(int (*threadfn)(void *), void *thrcontext, char *name) +static struct task_struct *visor_thread_start(int (*threadfn)(void *), +void *thrcontext, char *name) { struct task_struct *task; @@@ -302,21 -280,19 +295,20 @@@ static void cleanup_scsitaskmgmt_handle } /* - *forward_taskmgmt_command - send taskmegmt command to the Service - * Partition - *@tasktype: Type of taskmgmt command - *@scsidev: Scsidev that issued command + * forward_taskmgmt_command - Send taskmegmt command to the Service + * Partition + * @tasktype: Type of taskmgmt command + * @scsidev: Scsidev that issued command * - *Create a cmdrsp packet and send it to the Serivce Partition - *that will service this request. - *Returns whether the command was queued successfully or not. + * Create a cmdrsp packet and send it to the Serivce Partition + * that will service this request. + * + * Return: Int representing whether command was queued successfully or not */ static int forward_taskmgmt_command(enum task_mgmt_types tasktype, - struct scsi_cmnd *scsicmd) + struct scsi_device *scsidev) { struct uiscmdrsp *cmdrsp; - struct scsi_device *scsidev = scsicmd->device; struct visorhba_devdata *devdata = (struct visorhba_devdata *)scsidev->host->hostdata; int notifyresult = 0x; @@@ -607,24 -570,19 +604,21 @@@ static int visorhba_slave_alloc(struct struct visorhba_devdata *devdata; struct Scsi_Host *scsihost = (struct Scsi_Host *)scsidev->host; ++ /* already allocated return success */ + if (scsidev->hostdata) - return 0; /* already allocated return success */ ++ return 0; + + /* even though we errored, treat as success */ devdata = (struct visorhba_devdata *)scsihost->hostdata; if (!devdata) - return 0; /* even though we errored, treat as success */ + return 0; - /* already allocated return success */ - for_each_vdisk_match(vdisk, devdata, scsidev) - return 0; - - tmpvdisk = kzalloc(sizeof(*tmpvdisk), GFP_ATOMIC); - if (!tmpvdisk) + vdisk =
Re: [PATCH] DSA support for Micrel KSZ8895
Hi! > > No, tag_ksz part probably is not acceptable. Do you see solution > > better than just copying it into tag_ksz1 file? > > How about something like this, which needs further work to actually > compile, but should give you the idea. If that's acceptable, yes, I can do something similar. I don't think CONFIG_NET_DSA_TAG_KSZ_8K / CONFIG_NET_DSA_TAG_KSZ_9K is suitable naming (these will probably differ according to number of ports), what about keeping CONFIG_NET_DSA_TAG_KSZ and adding CONFIG_NET_DSA_TAG_KSZ_1B (for one byte)? Thanks, Pavel >Andrew > > index 99e38af85fc5..843e77b7c270 100644 > --- a/net/dsa/dsa.c > +++ b/net/dsa/dsa.c > @@ -49,8 +49,11 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] > = { > #ifdef CONFIG_NET_DSA_TAG_EDSA > [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops, > #endif > -#ifdef CONFIG_NET_DSA_TAG_KSZ > - [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops, > +#ifdef CONFIG_NET_DSA_TAG_KSZ_8K > + [DSA_TAG_PROTO_KSZ8K] = &ksz8k_netdev_ops, > +#endif > +#ifdef CONFIG_NET_DSA_TAG_KSZ_9K > + [DSA_TAG_PROTO_KSZ9K] = &ksz9k_netdev_ops, > #endif > #ifdef CONFIG_NET_DSA_TAG_LAN9303 > [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops, > diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c > index de66ca8e6201..398b833889f1 100644 > --- a/net/dsa/tag_ksz.c > +++ b/net/dsa/tag_ksz.c > @@ -35,6 +35,9 @@ > static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) > { > struct dsa_slave_priv *p = netdev_priv(dev); > + struct dsa_port *dp = p->dp; > + struct dsa_switch *ds = dp->ds; > + struct dsa_switch_tree *dst = ds->dst; > struct sk_buff *nskb; > int padlen; > u8 *tag; > @@ -69,8 +72,14 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, > struct net_device *dev) > } > > tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); > - tag[0] = 0; > - tag[1] = 1 << p->dp->index; /* destination port */ > + if (dst->tag_ops == ksz8k_netdev_ops) { > + tag[0] = 1 << p->dp->index; /* destination port */0; > + tag[1] = 0; > + } > + > + if (dst->tag_ops == ksz9k_netdev_ops) { > + tag[0] = 0; > + tag[1] = 1 << p->dp->index; /* destination port */ > > return nskb; > } > @@ -98,7 +107,12 @@ static struct sk_buff *ksz_rcv(struct sk_buff *skb, > struct net_device *dev, > return skb; > } > > -const struct dsa_device_ops ksz_netdev_ops = { > +const struct dsa_device_ops ksz8k_netdev_ops = { > + .xmit = ksz_xmit, > + .rcv= ksz_rcv, > +}; > + > +const struct dsa_device_ops ksz9k_netdev_ops = { > .xmit = ksz_xmit, > .rcv= ksz_rcv, > }; -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html signature.asc Description: Digital signature
Re: [PATCH] s390/zcrypt: make CPRBX const
On 08/25/2017 03:10 PM, Bhumika Goyal wrote: > Make this const as it is only used in a copy operation. > > Signed-off-by: Bhumika Goyal > --- > drivers/s390/crypto/zcrypt_msgtype6.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c > b/drivers/s390/crypto/zcrypt_msgtype6.c > index 4fddb43..afd20ce 100644 > --- a/drivers/s390/crypto/zcrypt_msgtype6.c > +++ b/drivers/s390/crypto/zcrypt_msgtype6.c > @@ -140,7 +140,7 @@ struct function_and_rules_block { > * + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK') > * - VUD block > */ > -static struct CPRBX static_cprbx = { > +static const struct CPRBX static_cprbx = { > .cprb_len = 0x00DC, > .cprb_ver_id= 0x02, > .func_id= {0x54, 0x32}, Applied. Will be available with the next merge. Thanks and have a nice day. Harald Freudenberger
[PATCH v2 2/2] ARM: dts: sun7i: Add dts file for A20-OLinuXino-MICRO-eMMC
A20-OLinuXino-MICRO has option with onboard eMMC chip. For now it's only shipped with 4BG chip, but in the future this may change. Signed-off-by: Stefan Mavrodiev --- arch/arm/boot/dts/Makefile | 1 + .../boot/dts/sun7i-a20-olinuxino-micro-emmc.dts| 70 ++ 2 files changed, 71 insertions(+) create mode 100644 arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 4b17f35..e1d1e93 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -880,6 +880,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \ sun7i-a20-olinuxino-lime2.dtb \ sun7i-a20-olinuxino-lime2-emmc.dtb \ sun7i-a20-olinuxino-micro.dtb \ + sun7i-a20-olinuxino-micro-emmc.dtb \ sun7i-a20-orangepi.dtb \ sun7i-a20-orangepi-mini.dtb \ sun7i-a20-pcduino3.dtb \ diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts new file mode 100644 index 000..d99e7b1 --- /dev/null +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts @@ -0,0 +1,70 @@ + /* + * Copyright 2017 Olimex Ltd. + * Stefan Mavrodiev + * + * This file is dual-licensed: you can use it either under the terms + * of the GPL or the X11 license, at your option. Note that this dual + * licensing only applies to this file, and not this project as a + * whole. + * + * a) This file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Or, alternatively, + * + * b) Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "sun7i-a20-olinuxino-micro.dts" + +/ { + model = "Olimex A20-OLinuXino-MICRO-eMMC"; + compatible = "olimex,a20-olinuxino-micro-emmc", "allwinner,sun7i-a20"; + + mmc2_pwrseq: pwrseq { + compatible = "mmc-pwrseq-emmc"; + reset-gpios = <&pio 2 16 GPIO_ACTIVE_LOW>; + }; +}; + +&mmc2 { + pinctrl-names = "default"; + pinctrl-0 = <&mmc2_pins_a>; + vmmc-supply = <®_vcc3v3>; + bus-width = <4>; + non-removable; + mmc-pwrseq = <&mmc2_pwrseq>; + status = "okay"; + + emmc: emmc@0 { + reg = <0>; + compatible = "mmc-card"; + broken-hpi; + }; +}; -- 2.7.4
[PATCH v2 1/2] ARM: dts: sun7i: Fix A20-OLinuXino-MICRO dts for LAN8710
>From revision J the board uses new phy chip LAN8710. Compared with RTL8201, RA17 pin is TXERR. It has pullup which causes phy not to work. To fix this PA17 is muxed with GMAC function. This makes the pin output-low. This patch is compatible with earlier board revisions, since this pin wasn't connected to phy. Signed-off-by: Stefan Mavrodiev --- arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts index 0b7403e..cb1b081 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts @@ -102,7 +102,7 @@ &gmac { pinctrl-names = "default"; - pinctrl-0 = <&gmac_pins_mii_a>; + pinctrl-0 = <&gmac_pins_mii_a>,<&gmac_txerr>; phy = <&phy1>; phy-mode = "mii"; status = "okay"; @@ -229,6 +229,11 @@ }; &pio { + gmac_txerr: gmac_txerr@0 { + pins = "PA17"; + function = "gmac"; + }; + mmc3_cd_pin_olinuxinom: mmc3_cd_pin@0 { pins = "PH11"; function = "gpio_in"; -- 2.7.4
[PATCH v2 0/2] Update board support for A20-OLinuXino-MICRO
>From rev.J of A20-OLinuXino-MICRO, the board has new PHY chip (LAN8710) which replace RTL8201. Also there is option for 4GB eMMC chip. Changes in v2: * Remove pinctrl request for eMMC reset pin * Dump the idea of renaming boards with emmc * Using txerr as gmac function Stefan Mavrodiev (2): ARM: dts: sun7i: Fix A20-OLinuXino-MICRO dts for LAN8710 ARM: dts: sun7i: Add dts file for A20-OLinuXino-MICRO-eMMC arch/arm/boot/dts/Makefile | 1 + .../boot/dts/sun7i-a20-olinuxino-micro-emmc.dts| 70 ++ arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts| 7 ++- 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts -- 2.7.4
Re: [PATCH 12/12] dma-mapping: turn dma_cache_sync into a dma_map_ops method
Hi Christoph, On Sun, Aug 27, 2017 at 6:10 PM, Christoph Hellwig wrote: > After we removed all the dead wood it turns out only two architectures > actually implement dma_cache_sync as a no-op: mips and parisc. Add s/no-op/real op/ > a cache_sync method to struct dma_map_ops and implement it for the > mips defualt DMA ops, and the parisc pa11 ops. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
[PATCH net-next v3 1/3] net/ncsi: Fix several packet definitions
Signed-off-by: Samuel Mendoza-Jonas --- v2: Rebased on latest net-next net/ncsi/ncsi-cmd.c | 10 +- net/ncsi/ncsi-pkt.h | 2 +- net/ncsi/ncsi-rsp.c | 3 ++- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 5e03ed190e18..7567ca63aae2 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb, struct ncsi_cmd_svf_pkt *cmd; cmd = skb_put_zero(skb, sizeof(*cmd)); - cmd->vlan = htons(nca->words[0]); - cmd->index = nca->bytes[2]; - cmd->enable = nca->bytes[3]; + cmd->vlan = htons(nca->words[1]); + cmd->index = nca->bytes[6]; + cmd->enable = nca->bytes[7]; ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb, struct ncsi_cmd_ev_pkt *cmd; cmd = skb_put_zero(skb, sizeof(*cmd)); - cmd->mode = nca->bytes[0]; + cmd->mode = nca->bytes[3]; ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -228,7 +228,7 @@ static struct ncsi_cmd_handler { { NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae }, { NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl }, { NCSI_PKT_CMD_GLS,0, ncsi_cmd_handler_default }, - { NCSI_PKT_CMD_SVF,4, ncsi_cmd_handler_svf }, + { NCSI_PKT_CMD_SVF,8, ncsi_cmd_handler_svf }, { NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev }, { NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_SMA,8, ncsi_cmd_handler_sma }, diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 3ea49ed0a935..91b4b66438df 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt { unsigned char index; /* VLAN table index */ unsigned char enable;/* Enable or disable */ __be32 checksum; /* Checksum */ - unsigned char pad[14]; + unsigned char pad[18]; }; /* Enable VLAN */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 087db775b3dc..c1a191d790e2 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr) /* Add or remove the VLAN filter */ if (!(cmd->enable & 0x1)) { - ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index); + /* HW indexes from 1 */ + ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1); } else { vlan = ntohs(cmd->vlan); ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); -- 2.14.0
Re: [PATCH] staging: rtl8723bs: remove memset before memcpy
On Mon, Aug 28, 2017 at 01:43:31AM +0530, Himanshu Jha wrote: > calling memcpy immediately after memset with the same region of memory > makes memset redundant. > > Build successfully. > Thanks for the patch, it looks good. You don't need to say that it builds successfully, because we already assume that's true. > Signed-off-by: Himanshu Jha > --- Sometimes I put a comment here under the cut off line if I want people to know that I haven't tested a patch. Anyway, don't resend the patch. It's fine as-is (unless Greg complains) but it's just for future reference. regards, dan carpenter
[PATCH net-next v3 3/3] ftgmac100: Support NCSI VLAN filtering when available
Register the ndo_vlan_rx_{add,kill}_vid callbacks and set the NETIF_F_HW_VLAN_CTAG_FILTER if NCSI is available. This allows the VLAN core to notify the NCSI driver when changes occur so that the remote NCSI channel can be properly configured to filter on the set VLAN tags. Signed-off-by: Samuel Mendoza-Jonas --- v2: Moved ftgmac100 change into same patch and reordered drivers/net/ethernet/faraday/ftgmac100.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 34dae51effd4..05fe7123d5ae 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1623,6 +1623,8 @@ static const struct net_device_ops ftgmac100_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller= ftgmac100_poll_controller, #endif + .ndo_vlan_rx_add_vid= ncsi_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = ncsi_vlan_rx_kill_vid, }; static int ftgmac100_setup_mdio(struct net_device *netdev) @@ -1837,6 +1839,9 @@ static int ftgmac100_probe(struct platform_device *pdev) NETIF_F_GRO | NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX; + if (priv->use_ncsi) + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + /* AST2400 doesn't have working HW checksum generation */ if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac"))) netdev->hw_features &= ~NETIF_F_HW_CSUM; -- 2.14.0
[PATCH net-next v3 2/3] net/ncsi: Configure VLAN tag filter
Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI stack process new VLAN tags and configure the channel VLAN filter appropriately. Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent for each one, meaning the ncsi_dev_state_config_svf state must be repeated. An internal list of VLAN tags is maintained, and compared against the current channel's ncsi_channel_filter in order to keep track within the state. VLAN filters are removed in a similar manner, with the introduction of the ncsi_dev_state_config_clear_vids state. The maximum number of VLAN tag filters is determined by the "Get Capabilities" response from the channel. Signed-off-by: Samuel Mendoza-Jonas --- v3: - Add comment describing change to ncsi_find_filter() - Catch NULL in clear_one_vid() from ncsi_get_filter() - Simplify state changes when kicking updated channel include/net/ncsi.h | 2 + net/ncsi/internal.h| 11 ++ net/ncsi/ncsi-manage.c | 308 - net/ncsi/ncsi-rsp.c| 9 +- 4 files changed, 326 insertions(+), 4 deletions(-) diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 68680baac0fd..1f96af46df49 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -28,6 +28,8 @@ struct ncsi_dev { }; #ifdef CONFIG_NET_NCSI +int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid); +int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 1308a56f2591..af3d636534ef 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -180,6 +180,7 @@ struct ncsi_channel { #define NCSI_CHANNEL_INACTIVE 1 #define NCSI_CHANNEL_ACTIVE2 #define NCSI_CHANNEL_INVISIBLE 3 + boolreconfigure_needed; spinlock_t lock; /* Protect filters etc */ struct ncsi_package *package; struct ncsi_channel_version version; @@ -235,6 +236,9 @@ enum { ncsi_dev_state_probe_dp, ncsi_dev_state_config_sp= 0x0301, ncsi_dev_state_config_cis, + ncsi_dev_state_config_clear_vids, + ncsi_dev_state_config_svf, + ncsi_dev_state_config_ev, ncsi_dev_state_config_sma, ncsi_dev_state_config_ebf, #if IS_ENABLED(CONFIG_IPV6) @@ -253,6 +257,12 @@ enum { ncsi_dev_state_suspend_done }; +struct vlan_vid { + struct list_head list; + __be16 proto; + u16 vid; +}; + struct ncsi_dev_priv { struct ncsi_dev ndev;/* Associated NCSI device */ unsigned intflags; /* NCSI device flags */ @@ -276,6 +286,7 @@ struct ncsi_dev_priv { struct work_struct work;/* For channel management */ struct packet_type ptype; /* NCSI packet Rx handler */ struct list_headnode;/* Form NCSI device list */ + struct list_headvlan_vids; /* List of active VLAN IDs */ }; struct ncsi_cmd_arg { diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a3bd5fa8ad09..11904b3b702d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table) return sizes[table]; } +u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) +{ + struct ncsi_channel_filter *ncf; + int size; + + ncf = nc->filters[table]; + if (!ncf) + return NULL; + + size = ncsi_filter_size(table); + if (size < 0) + return NULL; + + return ncf->data + size * index; +} + +/* Find the first active filter in a filter table that matches the given + * data parameter. If data is NULL, this returns the first active filter. + */ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) { struct ncsi_channel_filter *ncf; @@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) index = -1; while ((index = find_next_bit(bitmap, ncf->total, index + 1)) < ncf->total) { - if (!memcmp(ncf->data + size * index, data, size)) { + if (!data || !memcmp(ncf->data + size * index, data, size)) { spin_unlock_irqrestore(&nc->lock, flags); return index; } @@ -639,6 +658,95 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_functional; } +/* Check the VLAN filter bitmap for a set filter, and construct a + * "Set VLAN Filter - Disable" packet if found. + */ +static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, +
[PATCH net-next v3 0/3] NCSI VLAN Filtering Support
This series (mainly patch 2) adds VLAN filtering to the NCSI implementation. A fair amount of code already exists in the NCSI stack for VLAN filtering but none of it is actually hooked up. This goes the final mile and fixes a few bugs in the existing code found along the way (patch 1). Patch 3 adds the appropriate flag and callbacks to the ftgmac100 driver to enable filtering as it's a large consumer of NCSI (and what I've been testing on). v3: - Add comment describing change to ncsi_find_filter() - Catch NULL in clear_one_vid() from ncsi_get_filter() - Simplify state changes when kicking updated channel Samuel Mendoza-Jonas (3): net/ncsi: Fix several packet definitions net/ncsi: Configure VLAN tag filter ftgmac100: Support NCSI VLAN filtering when available drivers/net/ethernet/faraday/ftgmac100.c | 5 + include/net/ncsi.h | 2 + net/ncsi/internal.h | 11 ++ net/ncsi/ncsi-cmd.c | 10 +- net/ncsi/ncsi-manage.c | 308 ++- net/ncsi/ncsi-pkt.h | 2 +- net/ncsi/ncsi-rsp.c | 12 +- 7 files changed, 339 insertions(+), 11 deletions(-) -- 2.14.0
[PATCH] [media] uvcvideo: zero seq number when disabling stream
For bulk-based devices, when disabling the video stream, in addition to issue CLEAR_FEATURE(HALT), it is better to set alternate setting 0 as well or the sequnce number in host side will probably not reset to zero. Then in next time video stream start, the device will expect host starts packet from 0 sequence number but host actually continue the sequence number from last transaction and this causes transaction errors. This commit fixes this by adding set alternate setting 0 back as what isoch-based devices do. Below error message will also be eliminated for some devices: uvcvideo: Non-zero status (-71) in video completion handler. Signed-off-by: Hans Yang --- drivers/media/usb/uvc/uvc_video.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index fb86d6af398d..ad80c2a6da6a 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -1862,10 +1862,9 @@ int uvc_video_enable(struct uvc_streaming *stream, int enable) if (!enable) { uvc_uninit_video(stream, 1); - if (stream->intf->num_altsetting > 1) { - usb_set_interface(stream->dev->udev, + usb_set_interface(stream->dev->udev, stream->intfnum, 0); - } else { + if (stream->intf->num_altsetting == 1) { /* UVC doesn't specify how to inform a bulk-based device * when the video stream is stopped. Windows sends a * CLEAR_FEATURE(HALT) request to the video streaming -- 2.1.4
Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag
On Sun, 2017-08-27 at 22:27 -0700, Joel Fernandes wrote: > Hi Mike, > > On Sun, Aug 27, 2017 at 11:07 AM, Mike Galbraith wrote: > > On Sat, 2017-08-26 at 23:39 -0700, Joel Fernandes wrote: > >> > >> Also about real world benchmarks, in Android we have usecases that > >> show that the graphics performance and we have risk of frame drops if > >> we don't use the sync flag so this is a real world need. > > > > That likely has everything to do with cpufreq not realizing that your > > CPUs really are quite busy when scheduling cross core at fairly high > > frequency, and not clocking up properly. > > > > I'm glad you brought this point up. Since Android O, the userspace > processes are much more split across procedure calls due to a feature > called treble (which does this for security, modularity etc). Due to > this, a lot of things that were happening within a process boundary > happen now across process boundaries over the binder bus. Early on > folks noticed that this caused performance issues without sync flag > being used as a more strong hint. This can happen when there are 2 > threads are in different frequency domains on different CPUs and are > communicating over binder, due to this the combined load of both > threads is divided between the individual CPUs and causes them to run > at lower frequency. Where as if they are running together on the same > CPUs, then they would run at a higher frequency and perform better as > their combined load would run at a higher frequency. So a stronger > sync actually helps this case if we're careful about using it when > possible. Sure, but isn't that really a cpufreq issue? We schedule cross core quite aggressively for obvious reasons. Now on mostly idle handheld devices, you may get better battery life by stacking tasks a bit more, in which case a sync-me-harder flag may be what you really want/need, but with modern CPUs, I'm kinda skeptical of that, would have to see cold hard numbers to become a believer. Iff deeper cstate etc for longer does make a big difference, I can imagine wakeup time migrate leftward if capacity exists as an "on battery" tactic. (though that thought also invokes some unpleasant bounce fest images) -Mike
Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce
On Mon, Aug 28, 2017 at 06:28:08AM +0100, Al Viro wrote: > On Mon, Aug 28, 2017 at 02:38:37PM +1000, Paul Mackerras wrote: > > On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote: > > > On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote: > > > > > > > It seems to me that it would be better to do the anon_inode_getfd() > > > > call before the kvm_get_kvm() call, and go to the fail label if it > > > > fails. > > > > > > And what happens if another thread does close() on the (guessed) fd? > > > > Chaos ensues, but mostly because we don't have proper mutual exclusion > > on the modifications to the list. I'll add a mutex_lock/unlock to > > kvm_spapr_tce_release() and move the anon_inode_getfd() call inside > > the mutex. > > > > It looks like the other possible uses of the fd (mmap, and passing it > > as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM > > device fd) are safe. > > Frankly, it's a lot saner to have "no failure points past anon_inode_getfd()" > policy... Right. In my latest patch, there are no failure points past anon_inode_getfd(). Paul.
Re: [PATCH] connector: Delete an error message for a failed memory allocation in cn_queue_alloc_callback_entry()
On Sun, Aug 27, 2017 at 11:16:06PM +, Waskiewicz Jr, Peter wrote: > On 8/27/17 3:26 PM, SF Markus Elfring wrote: > > From: Markus Elfring > > Date: Sun, 27 Aug 2017 21:18:37 +0200 > > > > Omit an extra message for a memory allocation failure in this function. > > > > This issue was detected by using the Coccinelle software. > > Did coccinelle trip on the message or the fact you weren't returning NULL? > You've misread the patch somehow. The existing code has a NULL return and it's preserved in Markus's patch. This sort of patch is to fix a checkpatch.pl warning. The error message from this kzalloc() isn't going to get printed because it's a small allocation and small allocations always succeed in current kernels. But probably the main reason checkpatch complains is that kmalloc() already prints a stack trace and a bunch of other information so the printk doesn't add anyting. Removing it saves a little memory. I'm mostly a fan of running checkpatch on new patches or staging and not on old code... regards, dan carpenter
[PATCH] powerpc/512x: clk: constify clk_div_table
clk_div_table are not supposed to change at runtime. mpc512x_clk_divtable function working with const clk_div_table. So mark the non-const structs as const. Signed-off-by: Arvind Yadav --- arch/powerpc/platforms/512x/clock-commonclk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index add5a53..b3097fe 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -363,7 +363,7 @@ static int get_cpmf_mult_x2(void) */ /* applies to the IPS_DIV, and PCI_DIV values */ -static struct clk_div_table divtab_2346[] = { +static const struct clk_div_table divtab_2346[] = { { .val = 2, .div = 2, }, { .val = 3, .div = 3, }, { .val = 4, .div = 4, }, @@ -372,7 +372,7 @@ static int get_cpmf_mult_x2(void) }; /* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */ -static struct clk_div_table divtab_1234[] = { +static const struct clk_div_table divtab_1234[] = { { .val = 1, .div = 1, }, { .val = 2, .div = 2, }, { .val = 3, .div = 3, }, -- 1.9.1
Re: [LKP] [lkp-robot] [sched/cfs] 625ed2bf04: unixbench.score -7.4% regression
kernel test robot writes: > Greeting, > > FYI, we noticed a -7.4% regression of unixbench.score due to commit: > > > commit: 625ed2bf049d5a352c1bcca962d6e133454eaaff ("sched/cfs: Make > util/load_avg more stable") > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master > > in testcase: unixbench > on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with > 64G memory > with following parameters: > > runtime: 300s > nr_task: 100% > test: spawn > cpufreq_governor: performance > > test-description: UnixBench is the original BYTE UNIX benchmark suite aims to > test performance of Unix-like system. > This has been merged by v4.13-rc1, so we checked it again. If my understanding were correct, the patch changes the algorithm to calculate the load of CPU, so it influences the load balance behavior for this test case. 4.73 ± 8% -31.3% 3.25 ± 10% sched_debug.cpu.nr_running.max 0.95 ± 5% -29.0% 0.67 ± 4% sched_debug.cpu.nr_running.stddev As above, the effect is that the tasks are distributed into more CPUs, that is, system is more balanced. But this triggered more contention on tasklist_lock, so hurt the unixbench score, as below. 26.60 -10.6 16.05 perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.cpuidle_enter.call_cpuidle.do_idle 10.10+2.4 12.53 perf-profile.calltrace.cycles-pp._raw_write_lock_irq.do_exit.do_group_exit.sys_exit_group.entry_SYSCALL_64_fastpath 8.03+2.6 10.63 perf-profile.calltrace.cycles-pp._raw_write_lock_irq.release_task.wait_consider_task.do_wait.sys_wait4 17.98+5.2 23.14 perf-profile.calltrace.cycles-pp._raw_read_lock.do_wait.sys_wait4.entry_SYSCALL_64_fastpath 7.47+5.9 13.33 perf-profile.calltrace.cycles-pp._raw_write_lock_irq.copy_process._do_fork.sys_clone.do_syscall_64 The patch makes the tasks distributed more balanced, so I think scheduler do better job here. The problem is that the tasklist_lock isn't scalable. But considering this is only a micro-benchmark which specially exercises fork/exit/wait syscall, this may be not a big problem in reality. So, all in all, I think we can ignore this regression. Best Regards, Huang, Ying
[PATCH] net: stmmac: constify clk_div_table
clk_div_table are not supposed to change at runtime. meson8b_dwmac structure is working with const clk_div_table. So mark the non-const structs as const. Signed-off-by: Arvind Yadav --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 968..4404650b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -89,7 +89,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac) char clk_name[32]; const char *clk_div_parents[1]; const char *mux_parent_names[MUX_CLK_NUM_PARENTS]; - static struct clk_div_table clk_25m_div_table[] = { + static const struct clk_div_table clk_25m_div_table[] = { { .val = 0, .div = 5 }, { .val = 1, .div = 10 }, { /* sentinel */ }, -- 1.9.1
[PATCH] leds: pca955x: Don't invert requested value in pca955x_gpio_set_value()
The PCA9552 lines can be used either for driving LEDs or as GPIOs. The manual states that for LEDs, the operation is open-drain: The LSn LED select registers determine the source of the LED data. 00 = output is set LOW (LED on) 01 = output is set high-impedance (LED off; default) 10 = output blinks at PWM0 rate 11 = output blinks at PWM1 rate For GPIOs it suggests a pull-up so that the open-case drives the line high: For use as output, connect external pull-up resistor to the pin and size it according to the DC recommended operating characteristics. LED output pin is HIGH when the output is programmed as high-impedance, and LOW when the output is programmed LOW through the ‘LED selector’ register. The output can be pulse-width controlled when PWM0 or PWM1 are used. Now, I have a hardware design that uses the LED controller to control LEDs. However, for $reasons, we're using the leds-gpio driver to drive the them. The reasons are here are a tangent but lead to the discovery of the inversion, which manifested as the LEDs being set to full brightness at boot when we expected them to be off. As we're driving the LEDs through leds-gpio, this means wending our way through the gpiochip abstractions. So with that in mind we need to describe an active-low GPIO configuration to drive the LEDs as though they were GPIOs. The set() gpiochip callback in leds-pca955x does the following: ... if (val) pca955x_led_set(&led->led_cdev, LED_FULL); else pca955x_led_set(&led->led_cdev, LED_OFF); ... Where LED_FULL = 255. pca955x_led_set() in turn does: ... switch (value) { case LED_FULL: ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON); break; ... Where PCA955X_LS_LED_ON is defined as: #define PCA955X_LS_LED_ON 0x0 /* Output LOW */ So here we have some type confusion: We've crossed domains from GPIO behaviour to LED behaviour without accounting for possible inversions in the process. Stepping back to leds-gpio for a moment, during probe() we call create_gpio_led(), which eventually executes: if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP) { state = gpiod_get_value_cansleep(led_dat->gpiod); if (state < 0) return state; } else { state = (template->default_state == LEDS_GPIO_DEFSTATE_ON); } ... ret = gpiod_direction_output(led_dat->gpiod, state); In the devicetree the GPIO is annotated as active-low, and gpiod_get_value_cansleep() handles this for us: int gpiod_get_value_cansleep(const struct gpio_desc *desc) { int value; might_sleep_if(extra_checks); VALIDATE_DESC(desc); value = _gpiod_get_raw_value(desc); if (value < 0) return value; if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; return value; } _gpiod_get_raw_value() in turn calls through the get() callback for the gpiochip implementation, so returning to our get() implementation in leds-pca955x we find we extract the raw value from hardware: static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int offset) { struct pca955x *pca955x = gpiochip_get_data(gc); struct pca955x_led *led = &pca955x->leds[offset]; u8 reg = pca955x_read_input(pca955x->client, led->led_num / 8); return !!(reg & (1 << (led->led_num % 8))); } This behaviour is not symmetric with that of set(), where the val is inverted by the driver. Closing the loop on the GPIO_ACTIVE_LOW inversions, gpiod_direction_output(), like gpiod_get_value_cansleep(), handles it for us: int gpiod_direction_output(struct gpio_desc *desc, int value) { VALIDATE_DESC(desc); if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; else value = !!value; return _gpiod_direction_output_raw(desc, value); } All-in-all, with a value of 'keep' for default-state property in a leds-gpio child node, the current state of the hardware will in-fact be inverted; precisely the opposite of what was intended. Rework leds-pca955x so that we avoid the incorrect inversion and clarify the semantics with respect to GPIO. Signed-off-by: Andrew Jeffery --- drivers/leds/leds-pca955x.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 09303fd1fdc6..8b8f81cf10cb 100644 --- a/drivers/leds/leds-pca955
Re: [PATCH v8 00/13] Unify the interrupt delivery mode and do its setup in advance
Hi, Follow Juergen's advice, +CC xen-devel and linux-acpi In case a single patch of a series isn't stand alone it would be nice to receive at least the cover letter of the series in order to know what its all about. Thanks, dou. At 08/28/2017 11:20 AM, Dou Liyang wrote: Changes V7 --> V8: - Change the order of [12/13] patch and [11/13]patch suggested by Rafael J. Wysocki. - Fix some comments. - Do more tests in Thinkpad x121e -- Thanks for Borislav Petkov's help. [Background] MP specification defines three different interrupt delivery modes as follows: 1. PIC Mode 2. Virtual Wire Mode 3. Symmetric I/O Mode They will be setup in the different periods of booting time: 1. *PIC Mode*, the default interrupt delivery modes, will be set first. 2. *Virtual Wire Mode* will be setup during ISA IRQ initialization( step 1 in the figure.1). 3. *Symmetric I/O Mode*'s setup is related to the system 3.1 In SMP-capable system, setup during prepares CPUs(step 2) 3.2 In UP system, setup during initializes itself(step 3). start_kernel +---+ | +--> ... | |setup_arch +--> +---+ | |init_IRQ +-> +--+-+ | |init_ISA_irqs | +--> +-++ | | ++ +---> +--> | 1.init_bsp_APIC| | ... ++ +---> | rest_init +--->---+-+ | | kernel_init | +> +-+ | | kernel_init_freeable | +-> +-+ | | smp_prepare_cpus | +---> ++-+ | | | +---+ | | +-> |2. apic_bsp_setup | | | +---+ | | v | smp_init +---> +---++ |+---+ +--> |3. apic_bsp_setup | +---+ figure.1 The flow chart of the kernel startup process [Problem] 1. Cause kernel in an unmatched mode at the beginning of booting time. 2. Cause the dump-capture kernel hangs with 'notsc' option inherited from 1st kernel option. 3. Cause the code hard to read and maintain. As Ingo's and Eric's discusses[1,2], it need to be refactor. [Solution] 1. Construct a selector to unify these switches ++ |disable_apic++ ++ true | |false | | | +v--+ | |!boot_cpu_has(X86_FEATURE_APIC)+---+ +---+ true | |false | | | +---v-+v |!smp_found_config|PIC MODE +---+-+ |false |true | | v +---v-+ SYMMETRIC IO MODE | !acpi_lapic | +--+--+ | v VIRTUAL WIRE MODE 2. Unifying these setup steps of SMP-capable and UP system start_kernel ---+ | | | |x86_late_time_init +>---++ || || ++ |+> | 4. init_interrupt_mode | | ++ v 3. Execute the function as soon as possible. [Test] 1. In a theoretical code analysis, the patchset can wrap the original logic. 1) The original logic of the interrupt delivery mode setup: -Step O_1) Keep in PIC mode or virtual wire mode: Check (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) true: PIC mode false: virtual wire mode -Step O_2) Try to switch to symmetric IO mode: O_2_1) In up system: -Check disable_apic ture: O_S_1 (original situation 1) -Check whether there is a separate or integrated chip don't has: O_S_2 -Check !smp_found_config ture: O_S_3 -Others: O_S_4 O_2_2) In smp-capable system: -Check !smp_found_config && !acpi_lapic true: goto O_2_1 -Check if it is LAPIC don't has: O_S_5 -Check !max_cpus true: O_S_6 -read_apic_id() != boot_cpu_physical_apicid true: O_S_7 -Others: O_S_8 2) After that patchset, the new logic: -Step N_1) Skip step O_1 and try to switch to the final interrupt mode -Check disable_apic ture: N_S_1 (New situation 1) -Check whether there is a separate or integrated chip ture: N_S_2 -Check if (!smp_found_config) ture: N_S_3 -Check !setup_max_cpus ture: N_S_4 -Check read_apic_id() != boot_cpu_physical_apicid ture: N_S_5 -Others: N_S_6 O_S_1 is covered in N_S_1 O_S_2 is covered in N_S_2 O_S_
[PATCH] remoteproc: Introduce rproc handle accessor for children
In certain circumstances rpmsg devices needs to acquire a handle to the ancestor remoteproc instance, e.g. to invoke rproc_report_crash() when a fatal error is detected. Introduce an interface that walks the device tree in search for a remoteproc instance and return this. Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 18 ++ include/linux/remoteproc.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 564061dcc019..5b1b19519275 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1296,6 +1296,23 @@ struct rproc *rproc_get_by_phandle(phandle phandle) EXPORT_SYMBOL(rproc_get_by_phandle); /** + * rproc_get_by_child() - acquire rproc handle of @dev's ancestor + * @dev: child device to find ancestor of + * + * Returns the ancestor rproc instance, or NULL if not found. + */ +struct rproc *rproc_get_by_child(struct device *dev) +{ + for (dev = dev->parent; dev; dev = dev->parent) { + if (dev->type && !strcmp(dev->type->name, "remoteproc")) + return dev->driver_data; + } + + return NULL; +} +EXPORT_SYMBOL(rproc_get_by_child); + +/** * rproc_add() - register a remote processor * @rproc: the remote processor handle to register * @@ -1440,6 +1457,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name, rproc->dev.parent = dev; rproc->dev.type = &rproc_type; rproc->dev.class = &rproc_class; + rproc->dev.driver_data = rproc; /* Assign a unique device index and name */ rproc->index = ida_simple_get(&rproc_dev_index, 0, 0, GFP_KERNEL); diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h index 81da49564ff4..44e630eb3d94 100644 --- a/include/linux/remoteproc.h +++ b/include/linux/remoteproc.h @@ -510,6 +510,8 @@ struct rproc_vdev { }; struct rproc *rproc_get_by_phandle(phandle phandle); +struct rproc *rproc_get_by_child(struct device *dev); + struct rproc *rproc_alloc(struct device *dev, const char *name, const struct rproc_ops *ops, const char *firmware, int len); -- 2.12.0
[PATCH] remoteproc: Stop subdevices in reverse order
Subdevices might depend on earlier registered subdevices for communication purposes, as such they should be stopped in reverse order so that said communication channel is removed after the dependent subdevice is stopped. Signed-off-by: Bjorn Andersson --- drivers/remoteproc/remoteproc_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index e82f60182027..5aaa4c21d14d 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -794,7 +794,7 @@ static void rproc_remove_subdevices(struct rproc *rproc) { struct rproc_subdev *subdev; - list_for_each_entry(subdev, &rproc->subdevs, node) + list_for_each_entry_reverse(subdev, &rproc->subdevs, node) subdev->remove(subdev); } -- 2.12.0
[PATCH 2/2] clk: zte: constify clk_div_table
clk_div_table are not supposed to change at runtime. All functions working with clk_div_table provided by work with const clk_div_table. So mark the non-const structs as const. Signed-off-by: Arvind Yadav --- drivers/clk/zte/clk-zx296718.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c index 27f853d..354dd50 100644 --- a/drivers/clk/zte/clk-zx296718.c +++ b/drivers/clk/zte/clk-zx296718.c @@ -451,7 +451,7 @@ FFACTOR(0, "emmc_mux_div2", "emmc_mux", 1, 2, CLK_SET_RATE_PARENT), }; -static struct clk_div_table noc_div_table[] = { +static const struct clk_div_table noc_div_table[] = { { .val = 1, .div = 2, }, { .val = 3, .div = 4, }, }; @@ -644,7 +644,7 @@ static int __init top_clocks_init(struct device_node *np) return 0; } -static struct clk_div_table common_even_div_table[] = { +static const struct clk_div_table common_even_div_table[] = { { .val = 0, .div = 1, }, { .val = 1, .div = 2, }, { .val = 3, .div = 4, }, @@ -656,7 +656,7 @@ static int __init top_clocks_init(struct device_node *np) { .val = 15, .div = 16, }, }; -static struct clk_div_table common_div_table[] = { +static const struct clk_div_table common_div_table[] = { { .val = 0, .div = 1, }, { .val = 1, .div = 2, }, { .val = 2, .div = 3, }, -- 1.9.1
[PATCH 1/2] clk: imx: constify clk_div_table
clk_div_table are not supposed to change at runtime. All functions working with clk_div_table provided by work with const clk_div_table. So mark the non-const structs as const. Signed-off-by: Arvind Yadav --- drivers/clk/imx/clk-imx6sl.c | 6 +++--- drivers/clk/imx/clk-imx6sx.c | 6 +++--- drivers/clk/imx/clk-imx6ul.c | 6 +++--- drivers/clk/imx/clk-imx7d.c | 4 ++-- drivers/clk/imx/clk-vf610.c | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/clk/imx/clk-imx6sl.c b/drivers/clk/imx/clk-imx6sl.c index 5fd4dda..9642cdf 100644 --- a/drivers/clk/imx/clk-imx6sl.c +++ b/drivers/clk/imx/clk-imx6sl.c @@ -71,7 +71,7 @@ static const char *pll6_bypass_sels[] = { "pll6", "pll6_bypass_src", }; static const char *pll7_bypass_sels[] = { "pll7", "pll7_bypass_src", }; -static struct clk_div_table clk_enet_ref_table[] = { +static const struct clk_div_table clk_enet_ref_table[] = { { .val = 0, .div = 20, }, { .val = 1, .div = 10, }, { .val = 2, .div = 5, }, @@ -79,14 +79,14 @@ { } }; -static struct clk_div_table post_div_table[] = { +static const struct clk_div_table post_div_table[] = { { .val = 2, .div = 1, }, { .val = 1, .div = 2, }, { .val = 0, .div = 4, }, { } }; -static struct clk_div_table video_div_table[] = { +static const struct clk_div_table video_div_table[] = { { .val = 0, .div = 1, }, { .val = 1, .div = 2, }, { .val = 2, .div = 1, }, diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c index b5c96de..e6d389e 100644 --- a/drivers/clk/imx/clk-imx6sx.c +++ b/drivers/clk/imx/clk-imx6sx.c @@ -105,7 +105,7 @@ IMX6SX_CLK_EPIT2, }; -static struct clk_div_table clk_enet_ref_table[] = { +static const struct clk_div_table clk_enet_ref_table[] = { { .val = 0, .div = 20, }, { .val = 1, .div = 10, }, { .val = 2, .div = 5, }, @@ -113,14 +113,14 @@ { } }; -static struct clk_div_table post_div_table[] = { +static const struct clk_div_table post_div_table[] = { { .val = 2, .div = 1, }, { .val = 1, .div = 2, }, { .val = 0, .div = 4, }, { } }; -static struct clk_div_table video_div_table[] = { +static const struct clk_div_table video_div_table[] = { { .val = 0, .div = 1, }, { .val = 1, .div = 2, }, { .val = 2, .div = 1, }, diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index b4e0dff..5e8c18a 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -78,7 +78,7 @@ IMX6UL_CLK_MMDC_P0_FAST, IMX6UL_CLK_MMDC_P0_IPG, }; -static struct clk_div_table clk_enet_ref_table[] = { +static const struct clk_div_table clk_enet_ref_table[] = { { .val = 0, .div = 20, }, { .val = 1, .div = 10, }, { .val = 2, .div = 5, }, @@ -86,14 +86,14 @@ { } }; -static struct clk_div_table post_div_table[] = { +static const struct clk_div_table post_div_table[] = { { .val = 2, .div = 1, }, { .val = 1, .div = 2, }, { .val = 0, .div = 4, }, { } }; -static struct clk_div_table video_div_table[] = { +static const struct clk_div_table video_div_table[] = { { .val = 0, .div = 1, }, { .val = 1, .div = 2, }, { .val = 2, .div = 1, }, diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index 3da1218..2305699 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -27,7 +27,7 @@ static u32 share_count_sai3; static u32 share_count_nand; -static struct clk_div_table test_div_table[] = { +static const struct clk_div_table test_div_table[] = { { .val = 3, .div = 1, }, { .val = 2, .div = 1, }, { .val = 1, .div = 2, }, @@ -35,7 +35,7 @@ { } }; -static struct clk_div_table post_div_table[] = { +static const struct clk_div_table post_div_table[] = { { .val = 3, .div = 4, }, { .val = 2, .div = 1, }, { .val = 1, .div = 2, }, diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c index 59b1863..6dae543 100644 --- a/drivers/clk/imx/clk-vf610.c +++ b/drivers/clk/imx/clk-vf610.c @@ -102,7 +102,7 @@ static const char *ftm_fix_sels[] = { "sxosc", "ipg_bus", }; -static struct clk_div_table pll4_audio_div_table[] = { +static const struct clk_div_table pll4_audio_div_table[] = { { .val = 0, .div = 1 }, { .val = 1, .div = 2 }, { .val = 2, .div = 6 }, -- 1.9.1
[PATCH 0/2] constify clk clk_div_table
clk_div_table are not supposed to change at runtime. All functions working with clk_div_table provided by work with const clk_div_table. So mark the non-const structs as const. Arvind Yadav (2): [PATCH 1/2] clk: imx: constify clk_div_table [PATCH 2/2] clk: zte: constify clk_div_table drivers/clk/imx/clk-imx6sl.c | 6 +++--- drivers/clk/imx/clk-imx6sx.c | 6 +++--- drivers/clk/imx/clk-imx6ul.c | 6 +++--- drivers/clk/imx/clk-imx7d.c| 4 ++-- drivers/clk/imx/clk-vf610.c| 2 +- drivers/clk/zte/clk-zx296718.c | 6 +++--- 6 files changed, 15 insertions(+), 15 deletions(-) -- 1.9.1
Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce
On Mon, Aug 28, 2017 at 02:38:37PM +1000, Paul Mackerras wrote: > On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote: > > On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote: > > > > > It seems to me that it would be better to do the anon_inode_getfd() > > > call before the kvm_get_kvm() call, and go to the fail label if it > > > fails. > > > > And what happens if another thread does close() on the (guessed) fd? > > Chaos ensues, but mostly because we don't have proper mutual exclusion > on the modifications to the list. I'll add a mutex_lock/unlock to > kvm_spapr_tce_release() and move the anon_inode_getfd() call inside > the mutex. > > It looks like the other possible uses of the fd (mmap, and passing it > as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM > device fd) are safe. Frankly, it's a lot saner to have "no failure points past anon_inode_getfd()" policy...
Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag
Hi Mike, On Sun, Aug 27, 2017 at 11:07 AM, Mike Galbraith wrote: > On Sat, 2017-08-26 at 23:39 -0700, Joel Fernandes wrote: >> >> Also about real world benchmarks, in Android we have usecases that >> show that the graphics performance and we have risk of frame drops if >> we don't use the sync flag so this is a real world need. > > That likely has everything to do with cpufreq not realizing that your > CPUs really are quite busy when scheduling cross core at fairly high > frequency, and not clocking up properly. > I'm glad you brought this point up. Since Android O, the userspace processes are much more split across procedure calls due to a feature called treble (which does this for security, modularity etc). Due to this, a lot of things that were happening within a process boundary happen now across process boundaries over the binder bus. Early on folks noticed that this caused performance issues without sync flag being used as a more strong hint. This can happen when there are 2 threads are in different frequency domains on different CPUs and are communicating over binder, due to this the combined load of both threads is divided between the individual CPUs and causes them to run at lower frequency. Where as if they are running together on the same CPUs, then they would run at a higher frequency and perform better as their combined load would run at a higher frequency. So a stronger sync actually helps this case if we're careful about using it when possible. thanks, -Joel > -Mike
Re: [PATCH net-next v7 05/10] landlock: Add LSM hooks related to filesystem
On Sun, Aug 27, 2017 at 03:31:35PM +0200, Mickaël Salaün wrote: > > > How can you add 3rd argument? All FS events would have to get it, > > but in some LSM hooks such argument will be meaningless, whereas > > in other places it will carry useful info that rule can operate on. > > Would that mean that we'll have FS_3 event type and only few LSM > > hooks will be converted to it. That works, but then we'll lose > > compatiblity with old rules written for FS event and that given hook. > > Otherwise we'd need to have fancy logic to accept old FS event > > into FS_3 LSM hook. > > If we want to add a third argument to the FS event, then it will become > accessible because its type will be different than NOT_INIT. This keep > the compatibility with old rules because this new field was then denied. > > If we want to add a new argument but only for a subset of the hooks used > by the FS event, then we need to create a new event, like FS_FCNTL. For > example, we may want to add a FS_RENAME event to be able to tie the > source file and the destination file of a rename call. that's exactly my point. To add another argument FS event to a subset of hooks will require either new FS_FOO and to be backwards compatible these hooks will call _both_ FS and FS_FOO or some magic logic on kernel side that will allow old FS rules to be attached to FS_FOO hooks? Two calls doesn't scale and if we do 'magic logic' can we do it now and avoid introducing events altogether? Like all landlock programs can be landlock type and they would need to declare what arg1, arg2, argN they expect. Then at attach time the kernel only needs to verify that hook arg types match what program requested. > Anyway, I added the subtype/ABI version as a safeguard in case of > unexpected future evolution. I don't think that abi/version field adds anything in this context. I still think it should simply be removed.
Re: [PATCH v2 RESEND 1/2] dt-bindings: serial: 8250: Add MediaTek BTIF controller bindings
On 08/27/2017 10:39 PM, Sean Wang wrote: On Sun, 2017-08-27 at 22:00 +0300, Matthias Brugger wrote: On 08/19/2017 09:06 PM, sean.w...@mediatek.com wrote: From: Sean Wang Document the devicetree bindings in 8250.txt for MediaTek BTIF controller which could be found on MT7622 and MT7623 SoC. Signed-off-by: Sean Wang --- Documentation/devicetree/bindings/serial/8250.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt index 419ff6c..7528d90 100644 --- a/Documentation/devicetree/bindings/serial/8250.txt +++ b/Documentation/devicetree/bindings/serial/8250.txt @@ -14,6 +14,9 @@ Required properties: tegra132, or tegra210. - "nxp,lpc3220-uart" - "ralink,rt2880-uart" + - For MediaTek MT7623, must contain "mediatek,mt7623-btif" + - For other MediaTek SoCs , must contain "mediatek,-btif", + "mediatek,mt7623-btif" where is mt7622. Hm, to me that's confusing. What about: "mediatek,mt7623-btif": for MediaTek MT7623 "mediatek,mt7622-btif", "mediatek,mt7623-btif": for MediaTek MT7622 If in the future we have more SoCs that support the BTIF, we should add them like the mt7622 case. I had v3, but it should have similar logic and also got ack from Rob I knew all your logic of adding binding document for all MediaTek devices, even I alway added MediaTek device in dt-bindings as the way you mentioned here, but I felt this way is fine for this kind of dedicated document. The reason i don't add it as usual is the following. 8250.txt is common and shared among all uart like devices, so i don't want btif device occupies too much section and bloat the document when every new MediaTek SoC is introduced. So instead I refer to existing Nvidia device added in 8250.txt which I thought its way is simple, elegant and also using pattern I can use to add btif devices. Working on my email backlog after vactions I didn't see that this was accepted by Rob. Sorry for the noise. Matthias
Re: [PATCH v7 12/12] powerpc/vas: Define copy/paste interfaces
Michael Ellerman [m...@ellerman.id.au] wrote: > Hi Suka, > > A few more things ... > > Sukadev Bhattiprolu writes: > > > diff --git a/arch/powerpc/platforms/powernv/copy-paste.h > > b/arch/powerpc/platforms/powernv/copy-paste.h > > new file mode 100644 > > index 000..7783bb8 > > --- /dev/null > > +++ b/arch/powerpc/platforms/powernv/copy-paste.h > > @@ -0,0 +1,74 @@ > > +/* > > + * Copyright 2016 IBM Corp. > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of the GNU General Public License > > + * as published by the Free Software Foundation; either version > > + * 2 of the License, or (at your option) any later version. > > + */ > > + > > +/* > > + * Macros taken from > > tools/testing/selftests/powerpc/context_switch/cp_abort.c > > + */ > > These are both out of date, they're changed in v3.0B. > > > +#define PASTE(RA, RB, L, RC) \ > > + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) \ > > + | (L) << (31-10) | (RC) << (31-31)) > > You should define PPC_PASTE() in ppc-opcode.h > > We already have PPC_INST_PASTE, so use that. > > L and RC are gone. Ok. I thought they would come back later, but of course we can update these kernel-only calls then. > > > + > > +#define COPY(RA, RB, L) \ > > + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) \ > > + | (L) << (31-10)) > > Use PPC_COPY(). > Ok > > + > > +#define CR0_FXM"0x80" > > I don't think a #define for this helps readability. > > > +#define CR0_SHIFT 28 > > +#define CR0_MASK 0xF > > Not used. Will need them now to return value in cr0? > > > +/* > > + * Copy/paste instructions: > > + * > > + * copy RA,RB,L > > + * Copy contents of address (RA) + effective_address(RB) > > + * to internal copy-buffer. > > + * > > + * L == 1 indicates this is the first copy. > > + * > > + * L == 0 indicates its a continuation of a prior first copy. > > + * > > + * paste RA,RB,L > > + * Paste contents of internal copy-buffer to the address > > + * (RA) + effective_address(RB) > > + * > > + * L == 0 indicates its a continuation of a prior paste. i.e. > > + * don't wait for the completion or update status. > > + * > > + * L == 1 indicates this is the last paste in the group (i.e. > > + * wait for the group to complete and update status in CR0). > > + * > > + * For Power9, the L bit must be 'true' in both copy and paste. > > + */ > > + > > +static inline int vas_copy(void *crb, int offset, int first) > > +{ > > + WARN_ON_ONCE(!first); > > Please change the API to not require unused parameters. > > Same for offset. Ok, Haren's NX patches will need to drop those parameters as well. > > > + > > + __asm__ __volatile(stringify_in_c(COPY(%0, %1, %2))";" > > I've never seen __volatile before. > > Just use: asm volatile ok > > > > + : > > + : "b" (offset), "b" (crb), "i" (1) > > + : "memory"); > > + > > + return 0; > > +} > > + > > +static inline int vas_paste(void *paste_address, int offset, int last) > > +{ > > + unsigned long long cr; > > cr is 32-bits actually. ok > > > + WARN_ON_ONCE(!last); > > + > > + cr = 0; > > + __asm__ __volatile(stringify_in_c(PASTE(%1, %2, 1, 1))";" > > + "mfocrf %0," CR0_FXM ";" > > + : "=r" (cr) > > + : "b" (paste_address), "b" (offset) > > + : "memory"); > > You need cr0 in the clobbers. ok > > > + > > + return cr; > > I think it would be more natural if you just returned CR0, so if you did > shift and mask with the CR0 constants you have above. > ok > > > diff --git a/arch/powerpc/platforms/powernv/vas-window.c > > b/arch/powerpc/platforms/powernv/vas-window.c > > index 70762c3..73081b4 100644 > > --- a/arch/powerpc/platforms/powernv/vas-window.c > > +++ b/arch/powerpc/platforms/powernv/vas-window.c > > @@ -1040,6 +1041,57 @@ struct vas_window *vas_tx_win_open(int vasid, enum > > vas_cop_type cop, > > } > > EXPORT_SYMBOL_GPL(vas_tx_win_open); > > > > +int vas_copy_crb(void *crb, int offset, bool first) > > +{ > > + if (!vas_initialized()) > > + return -1; > > + > > + return vas_copy(crb, offset, first); > > +} > > +EXPORT_SYMBOL_GPL(vas_copy_crb); > > + > > +#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53) > > +int vas_paste_crb(struct vas_window *txwin, int offset, bool last, bool re) > > +{ > > + int rc; > > + uint64_t val; > > + void *addr; > > + > > + if (!vas_initialized()) > > + return -1; > > This is in the fast path, or at least the runtime path. So I don't think > these checks are wanted, how would we have got this far if vas wasn't > initialised? Yes, I have dropped vas_initialized() now. > > > > cheers
linux-next: manual merge of the xen-tip tree with the tip tree
Hi all, Today's linux-next merge of the xen-tip tree got conflicts in: arch/x86/xen/xen-asm.S arch/x86/xen/xen-asm_64.S between commit: edcb5cf84f05 ("x86/paravirt/xen: Remove xen_patch()") from the tip tree and commits: ad5b8c4ba323("xen: get rid of paravirt op adjust_exception_frame") bd830917233b ("paravirt,xen: remove xen_patch()") from the xen-tip tree. I fixed it up (edcb5cf84f05 and bd830917233b ate more or less the same patch, so I just used the latter version files) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell
Re: [PATCH 3/3] IPI: Avoid to use 2 cache lines for one call_single_data
"Huang, Ying" writes: > Hi, Peter, > > "Huang, Ying" writes: > >> Peter Zijlstra writes: >> >>> On Sat, Aug 05, 2017 at 08:47:02AM +0800, Huang, Ying wrote: Yes. That looks good. So you will prepare the final patch? Or you hope me to do that? >>> >>> I was hoping you'd do it ;-) >> >> Thanks! Here is the updated patch >> >> Best Regards, >> Huang, Ying >> >> -->8-- >> From 957735e9ff3922368286540dab852986fc7b23b5 Mon Sep 17 00:00:00 2001 >> From: Huang Ying >> Date: Mon, 7 Aug 2017 16:55:33 +0800 >> Subject: [PATCH -v3] IPI: Avoid to use 2 cache lines for one >> call_single_data >> >> struct call_single_data is used in IPI to transfer information between >> CPUs. Its size is bigger than sizeof(unsigned long) and less than >> cache line size. Now, it is allocated with no explicit alignment >> requirement. This makes it possible for allocated call_single_data to >> cross 2 cache lines. So that double the number of the cache lines >> that need to be transferred among CPUs. >> >> This is resolved by requiring call_single_data to be aligned with the >> size of call_single_data. Now the size of call_single_data is the >> power of 2. If we add new fields to call_single_data, we may need to >> add pads to make sure the size of new definition is the power of 2. >> Fortunately, this is enforced by gcc, which will report error for not >> power of 2 alignment requirement. >> >> To set alignment requirement of call_single_data to the size of >> call_single_data, a struct definition and a typedef is used. >> >> To test the effect of the patch, we use the vm-scalability multiple >> thread swap test case (swap-w-seq-mt). The test will create multiple >> threads and each thread will eat memory until all RAM and part of swap >> is used, so that huge number of IPI will be triggered when unmapping >> memory. In the test, the throughput of memory writing improves ~5% >> compared with misaligned call_single_data because of faster IPI. > > What do you think about this version? > Ping. Best Regards, Huang, Ying
Re: [PATCH 2/2 v2] sched/wait: Introduce lock breaker in wake_up_page_bit
On Sun, Aug 27, 2017 at 6:29 PM, Nicholas Piggin wrote: > > BTW. since you are looking at this stuff, one other small problem I remember > with exclusive waiters is that losing to a concurrent locker puts them to > the back of the queue. I think that could be fixed with some small change to > the wait loops (first add to tail, then retries add to head). Thoughts? No, not that way. First off, it's oddly complicated, but more importantly, the real unfairness you lose to is not other things on the wait queue, but to other lockers that aren't on the wait-queue at all, but instead just come in and do a "test-and-set" without ever even going through the slow path. So instead of playing queuing games, you'd need to just change the unlock sequence. Right now we basically do: - clear lock bit and atomically test if contended (and we play games with bit numbering to do that atomic test efficiently) - if contended, wake things up and you'd change the logic to be - if contended, don't clear the lock bit at all, just transfer the lock ownership directly to the waiters by walking the wait list - clear the lock bit only once there are no more wait entries (either because there were no waiters at all, or because all the entries were just waiting for the lock to be released) which is certainly doable with a couple of small extensions to the page wait key data structure. But most of my clever schemes the last few days were abject failures, and honestly, it's late in the rc. In fact, this late in the game I probably wouldn't even have committed the small cleanups I did if it wasn't for the fact that thinking of the whole WQ_FLAG_EXCLUSIVE bit made me find the bug. So the cleanups were actually what got me to look at the problem in the first place, and then I went "I'm going to commit the cleanup, and then I can think about the bug I just found". I'm just happy that the fix seems to be trivial. I was afraid I'd have to do something nastier (like have the EINTR case send another explicit wakeup to make up for the lost one, or some ugly hack like that). It was only when I started looking at the history of that code, and I saw the old bit_lock code, and I went "Hmm. That has the _same_ bug - oh wait, no it doesn't!" that I realized that there was that simple fix. You weren't cc'd on the earlier part of the discussion, you only got added when I realized what the history and simple fix was. Linus
Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system
Hi Juergen, At 08/28/2017 12:32 PM, Juergen Gross wrote: On 28/08/17 06:25, Juergen Gross wrote: On 28/08/17 05:20, Dou Liyang wrote: XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus() initializes interrupts in the XEN PV specific way and does not invoke native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is not invoked either. The invocation of x86_init.intr_mode_init() will be moved from native_smp_prepare_cpus() in a follow up patch to solve . Can you be a little bit more precise here, please? :-) That move would cause the invocation of x86_init.intr_mode_init() for XEN PV platforms. To prevent that, override the default x86_init. intr_mode_init() callback with a noop(). [Rewrited by Thomas Gleixner ] Signed-off-by: Dou Liyang Cc: xen-de...@lists.xenproject.org Cc: boris.ostrov...@oracle.com On which tree does this apply? Would be nice to get a hint against which source this can be reviewed. Aah, just found the rest of the series. In case a single patch of a series isn't stand alone it would be nice to receive at least the cover letter of the series in order to know what its all about. Sorry to confuse you, It's my fault. Thank you for your reply. I understood. will CC the cover letter to linux-xen and linux-acpi. Thanks, dou. Juergen
Re: [PATCH v7 10/12] powerpc/vas: Define vas_win_close() interface
Michael Ellerman [m...@ellerman.id.au] wrote: > Hi Suka, > > More comments :) Thanks! > > Sukadev Bhattiprolu writes: > > > diff --git a/arch/powerpc/platforms/powernv/vas-window.c > > b/arch/powerpc/platforms/powernv/vas-window.c > > index 2dd4b63..24288dd 100644 > > --- a/arch/powerpc/platforms/powernv/vas-window.c > > +++ b/arch/powerpc/platforms/powernv/vas-window.c > > @@ -879,11 +887,92 @@ struct vas_window *vas_rx_win_open(int vasid, enum > > vas_cop_type cop, > > } > > EXPORT_SYMBOL_GPL(vas_rx_win_open); > > > > -/* stub for now */ > > +static void poll_window_busy_state(struct vas_window *window) > > +{ > > + int busy; > > + uint64_t val; > > + > > +retry: > > + /* > > +* Poll Window Busy flag > > +*/ > > + val = read_hvwc_reg(window, VREG(WIN_STATUS)); > > + busy = GET_FIELD(VAS_WIN_BUSY, val); > > + if (busy) { > > + val = 0; > > + schedule_timeout(2000); > > What's 2000? > > That's in jiffies, so it's not a fixed amount of time. > > But on a typical config that will be 20 _seconds_ ?! Ok. Should I change to that just HZ and > > But you haven't set the task state, so AFAIK it will just return > instantly. call set_current_state(TASK_UNINTERRUPTIBLE) before the schedule_timeout()? > > And if there's a software/hardware bug and it never stops being busy, > then we have a softlockup. The other option would be print a big fat > warning and just not free the window. But maybe that doesn't work for > other reasons. > > > + goto retry; > > + } > > +} > > + > > +static void poll_window_castout(struct vas_window *window) > > +{ > > + int cached; > > + uint64_t val; > > + > > + /* Cast window context out of the cache */ > > +retry: > > + val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL)); > > + cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val); > > + if (cached) { > > + val = 0ULL; > > + val = SET_FIELD(VAS_CASTOUT_REQ, val, 1); > > + val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0); > > + write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val); > > Sigh, I still don't like that macro :) :-) For one thing, I have used it a lot now and secondly isn't it easier to know that VAS_CASTOUT_REQ bit is set to 1 without worrying about its bit position? When debugging, yes we have to ensure VAS_CASTOUT_REQ is properly defined and we have to work out value in "val". > > or: > write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 1ull << 63); > > > + > > + schedule_timeout(2000); > > + goto retry; > > + } > > +} > > + > > +/* > > + * Close a window. > > + * > > + * See Section 1.12.1 of VAS workbook v1.05 for details on closing window: > > + * - Disable new paste operations (unmap paste address) > > + * - Poll for the "Window Busy" bit to be cleared > > + * - Clear the Open/Enable bit for the Window. > > + * - Poll for return of window Credits (implies FIFO empty for Rx win?) > > + * - Unpin and cast window context out of cache > > + * > > + * Besides the hardware, kernel has some bookkeeping of course. > > + */ > > int vas_win_close(struct vas_window *window) > > { > > - return -1; > > + uint64_t val; > > + > > + if (!window) > > + return 0; > > + > > + if (!window->tx_win && atomic_read(&window->num_txwins) != 0) { > > + pr_devel("VAS: Attempting to close an active Rx window!\n"); > > + WARN_ON_ONCE(1); > > + return -EAGAIN; > > EAGAIN means "if you do the same thing again it might work". > > I don't think that's right here. The window is not in a state where it > can be freed, the caller needs to do something to fix that. > > EBUSY would probably be more appropriate. Ok. Should not happen now (or even with the fast thread-wake up code) since only the kernel should be closing the windows - so its really a bug. Will change to EBUSY though. > > > cheers
Re: [PATCH V1] thermal: qcom-spmi-temp-alarm: add support for GEN2 PMIC peripherals
On 2017-08-26 04:49, Stephen Boyd wrote: On 08/25, Zhang Rui wrote: On Thu, 2017-08-17 at 13:12 +0530, kgu...@codeaurora.org wrote: > On 2017-08-16 17:53, kgu...@codeaurora.org wrote: > > > > On 2017-08-08 13:42, Zhang Rui wrote: > > > > > > On Thu, 2017-07-13 at 17:39 +0530, Kiran Gunda wrote: > > > > > > > > From: David Collins > > > > > > > > Add support for the TEMP_ALARM GEN2 PMIC peripheral > > > > subtype. The > > > > GEN2 subtype defines an over temperature state with hysteresis > > > > instead of stage in the status register. There are two GEN2 > > > > states corresponding to stages 1 and 2. > > > > > > > > Signed-off-by: David Collins > > > > Signed-off-by: Kiran Gunda > > > Ivan, > > > > > > can you please review this patch and let me know your opinion? > > > > > > thanks, > > > rui > > Ivan, > > Could you please review this patch ? > > > > Thanks, > > Kiran > Looks like Ivan is no more reviewing the patches for qcom. > Adding Bjorn and Stephen Boyd for the review. > Given this is a platform specific change, I will queue it for next merge window, and let's see if there is any problem reported. Thanks for that ! FWIW, Reviewed-by: Stephen Boyd
[PATCH] tracing: make dynamic types can use __TRACE_LAST_TYPE
Obviously, trace_events that defined staticly in trace.h won't use __TRACE_LAST_TYPE, so make dynamic types can use it. And some minor changes to trace_search_list() to make code clearer. Signed-off-by: Zhou Chengming --- kernel/trace/trace_output.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bac629a..dcb146f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -19,7 +19,7 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; -static int next_event_type = __TRACE_LAST_TYPE + 1; +static int next_event_type = __TRACE_LAST_TYPE; enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) { @@ -696,7 +696,7 @@ static int trace_search_list(struct list_head **list) if (list_empty(&ftrace_event_list)) { *list = &ftrace_event_list; - return last + 1; + return last; } /* @@ -704,17 +704,17 @@ static int trace_search_list(struct list_head **list) * lets see if somebody freed one. */ list_for_each_entry(e, &ftrace_event_list, list) { - if (e->type != last + 1) + if (e->type != last) break; last++; } /* Did we used up all 65 thousand events??? */ - if ((last + 1) > TRACE_EVENT_TYPE_MAX) + if (last > TRACE_EVENT_TYPE_MAX) return 0; *list = &e->list; - return last + 1; + return last; } void trace_event_read_lock(void) @@ -777,7 +777,7 @@ int register_trace_event(struct trace_event *event) list_add_tail(&event->list, list); - } else if (event->type > __TRACE_LAST_TYPE) { + } else if (event->type >= __TRACE_LAST_TYPE) { printk(KERN_WARNING "Need to add type to trace.h\n"); WARN_ON(1); goto out; -- 1.8.3.1
Re: [PATCH v7 08/12] powerpc/vas: Define vas_win_id()
Michael Ellerman [m...@ellerman.id.au] wrote: > Sukadev Bhattiprolu writes: > > > Define an interface to return a system-wide unique id for a given VAS > > window. > > > > The vas_win_id() will be used in a follow-on patch to generate an unique > > handle for a user space receive window. Applications can use this handle > > to pair send and receive windows for fast thread-wakeup. > > > > The hardware refers to this system-wide unique id as a Partition Send > > Window ID which is expected to be used during fault handling. Hence the > > "pswid" in the function names. > > Same comment as previous patch. Ok will drop them for now. > > cheers
Re: [PATCH v7 06/12] powerpc/vas: Define helpers to alloc/free windows
Michael Ellerman [m...@ellerman.id.au] wrote: > Sukadev Bhattiprolu writes: > > diff --git a/arch/powerpc/platforms/powernv/vas-window.c > > b/arch/powerpc/platforms/powernv/vas-window.c > > + rc = ida_pre_get(ida, GFP_KERNEL); > > + if (!rc) > > + return -EAGAIN; > > + > > + spin_lock(&vas_ida_lock); > > + rc = ida_get_new_above(ida, 0, &winid); > > If you're passing 0 you can just use ida_get_new(). Ok. > > Or did you actually want to exclude 0? In which case you should pass 1. > > > + spin_unlock(&vas_ida_lock); > > + > > + if (rc) > > + return rc; > > You're supposed to handle EAGAIN I thought. Yes, I will retry the pre_get() > > > + > > + if (winid > VAS_WINDOWS_PER_CHIP) { > > + pr_err("VAS: Too many (%d) open windows\n", winid); > > + vas_release_window_id(ida, winid); > > + return -EAGAIN; > > + } > > + > > + return winid; > > +} > > + > > +void vas_window_free(struct vas_window *window) > > static. Ok > > > +{ > > + int winid = window->winid; > > + struct vas_instance *vinst = window->vinst; > > + > > + unmap_winctx_mmio_bars(window); > > + kfree(window); > > + > > + vas_release_window_id(&vinst->ida, winid); > > +} > > + > > +struct vas_window *vas_window_alloc(struct vas_instance *vinst) > > +{ > > + int winid; > > + struct vas_window *window; > > + > > + winid = vas_assign_window_id(&vinst->ida); > > + if (winid < 0) > > + return ERR_PTR(winid); > > + > > + window = kzalloc(sizeof(*window), GFP_KERNEL); > > + if (!window) > > + return ERR_PTR(-ENOMEM); > > You leak an id here. Argh. Yes. > > The error handling would be easier in here if the caller did the alloc, > or if you split alloc and init, and alloc just did the kzalloc(). I was trying to simplify error handling in the callers where they have to only deal with one failure now. > > One of the callers even prints "unable to allocate memory" if this > function fails, but that's not accurate, there's several failure modes. Yes, will fix that message and the leaks. Thanks, Suka
linux-next: manual merge of the kvm tree with the tip tree
Hi all, Today's linux-next merge of the kvm tree got a conflict in: arch/x86/kvm/mmu.c between commit: ea2800ddb20d ("kvm/x86: Avoid clearing the C-bit in rsvd_bits()") from the tip tree and commit: d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers") from the kvm tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc arch/x86/kvm/mmu.c index 04d750813c9d,2a8a6e3e2a31.. --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@@ -4116,21 -4157,11 +4162,21 @@@ reset_shadow_zero_bits_mask(struct kvm_ * Passing "true" to the last argument is okay; it adds a check * on bit 8 of the SPTEs which KVM doesn't use anyway. */ - __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, + shadow_zero_check = &context->shadow_zero_check; + __reset_rsvds_bits_mask(vcpu, shadow_zero_check, boot_cpu_data.x86_phys_bits, context->shadow_root_level, uses_nx, - guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), - true); + guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), + is_pse(vcpu), true); + + if (!shadow_me_mask) + return; + + for (i = context->shadow_root_level; --i >= 0;) { + shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask; + shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask; + } + } EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
Re: [PATCH v7 05/12] powerpc/vas: Define helpers to init window context
Michael Ellerman [m...@ellerman.id.au] wrote: > Sukadev Bhattiprolu writes: > > diff --git a/arch/powerpc/platforms/powernv/vas-window.c > > b/arch/powerpc/platforms/powernv/vas-window.c > > index a3a705a..3a50d6a 100644 > > --- a/arch/powerpc/platforms/powernv/vas-window.c > > +++ b/arch/powerpc/platforms/powernv/vas-window.c > > @@ -11,6 +11,7 @@ > > #include > > #include > > #include > > +#include > > > > #include "vas.h" > > > > @@ -185,6 +186,310 @@ int map_winctx_mmio_bars(struct vas_window *window) > > return 0; > > } > > > > +/* > > + * Reset all valid registers in the HV and OS/User Window Contexts for > > + * the window identified by @window. > > + * > > + * NOTE: We cannot really use a for loop to reset window context. Not all > > + * offsets in a window context are valid registers and the valid > > + * registers are not sequential. And, we can only write to offsets > > + * with valid registers (or is that only in Simics?). > > I assume there's no "reset everything" register we can write to do this > for us? Checked with the hardware team and they said there is no "reset everything" register. While there are some tricky ways to clear the context, writing zeroes is the easiest. > > Also if you can clean up the comment to not mention Simics, I would > assume that applies on real hardware too. > > > + */ > > +void reset_window_regs(struct vas_window *window) > > +{ > > + write_hvwc_reg(window, VREG(LPID), 0ULL); > > + write_hvwc_reg(window, VREG(PID), 0ULL); > > + write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL); > > + write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL); > > + write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL); > > + write_hvwc_reg(window, VREG(AMR), 0ULL); > > + write_hvwc_reg(window, VREG(SEIDR), 0ULL); > > + write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL); > > + write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL); > > + write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL); > > + write_hvwc_reg(window, VREG(PSWID), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE1), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE2), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE3), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE4), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE5), 0ULL); > > + write_hvwc_reg(window, VREG(SPARE6), 0ULL); > > Should we be writing to spare registers? Presumably in a future hardware > revision they might have some unknown purpose. Sure, will skip those. > > > + write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL); > > + write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL); > > + write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL); > > + write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL); > > + write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL); > > + write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL); > > + write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); > > + write_hvwc_reg(window, VREG(TX_WCRED), 0ULL); > > + write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); > > + write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL); > > + write_hvwc_reg(window, VREG(WINCTL), 0ULL); > > + write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL); > > + write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL); > > + write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL); > > + write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL); > > + write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL); > > + write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL); > > + > > + /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */ > > + > > + /* > > +* The send and receive window credit adder registers are also > > +* accessible from HVWC and have been initialized above. We don't > > +* need to initialize from the OS/User Window Context, so skip > > +* following calls: > > +* > > +* write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL); > > +* write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL); > > +*/ > > +} > > + > > +/* > > + * Initialize window context registers related to Address Translation. > > + * These registers are common to send/receive windows although they > > + * differ for user/kernel windows. As we resolve the TODOs we may > > + * want to add fields to vas_winctx and move the initialization to > > + * init_vas_winctx_regs(). > > + */ > > +static void init_xlate_regs(struct vas_window *window, bool user_win) > > +{ > > + uint64_t lpcr, val; > > + > > + /* > > +* MSR_TA, MSR_US are false for both kernel and user. > > +* MSR_DR and MSR_PR are false for kernel. > > +*/ > > + val = 0ULL; > > + val = SET_FIELD(VAS_XLATE_MSR_HV, val, true); > > Using a bool here presumably works, but if you actua
Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce
On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote: > On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote: > > > It seems to me that it would be better to do the anon_inode_getfd() > > call before the kvm_get_kvm() call, and go to the fail label if it > > fails. > > And what happens if another thread does close() on the (guessed) fd? Chaos ensues, but mostly because we don't have proper mutual exclusion on the modifications to the list. I'll add a mutex_lock/unlock to kvm_spapr_tce_release() and move the anon_inode_getfd() call inside the mutex. It looks like the other possible uses of the fd (mmap, and passing it as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd) are safe. Thanks, Paul.
Re: [PATCH] Fix compat_sys_sigpending breakage introduced by v4.13-rc1~6^2~12
On Sun, Aug 06, 2017 at 07:22:03PM +0100, Al Viro wrote: > I would pick it through my tree, but the local network is half-disasembled > for move (containers arrive tomorrow, flight to Boston on 9th, stuff should > arrive there by the weekend, so I hope to be back to normal by the 14th > or so, assuming I'll have any sanity left by that time). ... and that hope had turned out to be far too optimistic. Getting the things back into working shape took two weeks longer than that; by now most of the damage has been dealt with. Dmitry's followups applied to for-next queue, with apologies for delay.
Re: [PATCH v7 04/12] powerpc/vas: Define helpers to access MMIO regions
Michael Ellerman [m...@ellerman.id.au] wrote: > Hi Suka, > > Comments inline. > > Sukadev Bhattiprolu writes: > > diff --git a/arch/powerpc/platforms/powernv/vas-window.c > > b/arch/powerpc/platforms/powernv/vas-window.c > > index 6156fbe..a3a705a 100644 > > --- a/arch/powerpc/platforms/powernv/vas-window.c > > +++ b/arch/powerpc/platforms/powernv/vas-window.c > > @@ -9,9 +9,182 @@ > > > > #include > > #include > > +#include > > +#include > > > > #include "vas.h" > > > > +/* > > + * Compute the paste address region for the window @window using the > > + * ->paste_base_addr and ->paste_win_id_shift we got from device tree. > > + */ > > +void compute_paste_address(struct vas_window *window, uint64_t *addr, int > > *len) > > +{ > > + uint64_t base, shift; > > Please use the kernel types, so u64 here. Ok. > > > + int winid; > > + > > + base = window->vinst->paste_base_addr; > > + shift = window->vinst->paste_win_id_shift; > > + winid = window->winid; > > + > > + *addr = base + (winid << shift); > > + if (len) > > + *len = PAGE_SIZE; > > Having multiple output parameters makes for a pretty awkward API. Is it > really necesssary given len is a constant PAGE_SIZE anyway. > > If you didn't return len, then you could just make the function return > the addr, and you wouldn't need any output parameters. I agree, I went back and forth on it. I was trying to avoid callers making assumptions on the size. But since there are just a couple of places, I guess we could have them assume PAGE_SIZE. > > One of the callers that passes len is unmap_paste_region(), but that > is a bit odd. It would be more natural I think if once a window is > mapped it knows its size. Or if the mapping will always just be one page > then we can just know that. Agree, since the len values are constant I was trying to avoid saving them in each of the 64K windows - so the compute during unmap. Will change to assume PAGE_SIZE. Also agree with other comments here.
Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system
On 28/08/17 06:25, Juergen Gross wrote: > On 28/08/17 05:20, Dou Liyang wrote: >> XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus() >> initializes interrupts in the XEN PV specific way and does not invoke >> native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is >> not invoked either. >> >> The invocation of x86_init.intr_mode_init() will be moved from >> native_smp_prepare_cpus() in a follow up patch to solve > REASON/PROBLEM>. > > Can you be a little bit more precise here, please? :-) > >> That move would cause the invocation of x86_init.intr_mode_init() for XEN >> PV platforms. To prevent that, override the default x86_init. >> intr_mode_init() callback with a noop(). >> >> [Rewrited by Thomas Gleixner ] >> >> Signed-off-by: Dou Liyang >> Cc: xen-de...@lists.xenproject.org >> Cc: boris.ostrov...@oracle.com > > On which tree does this apply? Would be nice to get a hint against which > source this can be reviewed. Aah, just found the rest of the series. In case a single patch of a series isn't stand alone it would be nice to receive at least the cover letter of the series in order to know what its all about. Juergen
[PATCH] IB/rxe: constify vm_operations_struct
vm_operations_struct are not supposed to change at runtime. vm_area_struct structure working with const vm_operations_struct. So mark the non-const vm_operations_struct structs as const. Signed-off-by: Arvind Yadav --- drivers/infiniband/sw/rxe/rxe_mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index bd812e0..d22431e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma) kref_put(&ip->ref, rxe_mmap_release); } -static struct vm_operations_struct rxe_vm_ops = { +static const struct vm_operations_struct rxe_vm_ops = { .open = rxe_vma_open, .close = rxe_vma_close, }; -- 1.9.1
[PATCH] IB/hfi1: constify vm_operations_struct
vm_operations_struct are not supposed to change at runtime. vm_area_struct structure working with const vm_operations_struct. So mark the non-const vm_operations_struct structs as const. Signed-off-by: Arvind Yadav --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 3158128..46db68f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -116,7 +116,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, .llseek = noop_llseek, }; -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .fault = vma_fault, }; -- 1.9.1
Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system
On 28/08/17 05:20, Dou Liyang wrote: > XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus() > initializes interrupts in the XEN PV specific way and does not invoke > native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is > not invoked either. > > The invocation of x86_init.intr_mode_init() will be moved from > native_smp_prepare_cpus() in a follow up patch to solve REASON/PROBLEM>. Can you be a little bit more precise here, please? :-) > That move would cause the invocation of x86_init.intr_mode_init() for XEN > PV platforms. To prevent that, override the default x86_init. > intr_mode_init() callback with a noop(). > > [Rewrited by Thomas Gleixner ] > > Signed-off-by: Dou Liyang > Cc: xen-de...@lists.xenproject.org > Cc: boris.ostrov...@oracle.com On which tree does this apply? Would be nice to get a hint against which source this can be reviewed. Juergen > --- > arch/x86/xen/enlighten_pv.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c > index 811e4dd..07147dd 100644 > --- a/arch/x86/xen/enlighten_pv.c > +++ b/arch/x86/xen/enlighten_pv.c > @@ -1250,6 +1250,7 @@ asmlinkage __visible void __init xen_start_kernel(void) > x86_platform.get_nmi_reason = xen_get_nmi_reason; > > x86_init.resources.memory_setup = xen_memory_setup; > + x86_init.irqs.intr_mode_init= x86_init_noop; > x86_init.oem.arch_setup = xen_arch_setup; > x86_init.oem.banner = xen_banner; > >
linux-next: build failure after merge of the rcu tree
Hi Paul, After merging the rcu tree, today's linux-next build (arm multi_v7_defconfig) failed like this: In file included from arch/arm/kernel/asm-offsets.c:14:0: include/linux/sched.h: In function 'membarrier_sched_out': include/linux/sched.h:1680:3: error: implicit declaration of function 'sync_core' [-Werror=implicit-function-declaration] sync_core(); ^ Caused by commit 0d6eb99818da ("membarrier: Provide register sync core cmd") I have used the rcu tree from next-20170825 for today. -- Cheers, Stephen Rothwell
Re: [PATCH 3.18 00/10] 3.18.67-stable review
On Sun, Aug 27, 2017 at 09:49:48AM -0700, Guenter Roeck wrote: > On Tue, Aug 22, 2017 at 12:09:32PM -0700, Greg Kroah-Hartman wrote: > > This is the start of the stable review cycle for the 3.18.67 release. > > There are 10 patches in this series, all will be posted as a response > > to this one. If anyone has any issues with these being applied, please > > let me know. > > > > Responses should be made by Thu Aug 24 19:08:42 UTC 2017. > > Anything received after that time might be too late. > > > > Better late than never. Sorry, I was out of town. Not a problem, so was I :) > > Build results: > total: 136 pass: 136 fail: 0 > Qemu test results: > total: 111 pass: 111 fail: 0 Thanks for the report for all of these. greg k-h
Re: [PATCH net-next v7 04/10] bpf: Define handle_fs and add a new helper bpf_handle_fs_get_mode()
On Mon, 21 Aug 2017, Mickaël Salaün wrote: > @@ -85,6 +90,8 @@ enum bpf_arg_type { > > ARG_PTR_TO_CTX, /* pointer to context */ > ARG_ANYTHING, /* any (initialized) argument is ok */ > + > + ARG_CONST_PTR_TO_HANDLE_FS, /* pointer to an abstract FS struct */ > }; Looks like a spurious empty line. -- James Morris
linux-next: manual merge of the tip tree with the spi tree
Hi all, Today's linux-next merge of the tip tree got a conflict in: tools/Makefile between commit: e9d4650dcc59 ("spi: tools: add install section") from the spi tree and commit: ecda85e70277 ("x86/lguest: Remove lguest support") from the tip tree. I fixed it up (see below) and can carry the fix as necessary. This is now fixed as far as linux-next is concerned, but any non trivial conflicts should be mentioned to your upstream maintainer when your tree is submitted for merging. You may also want to consider cooperating with the maintainer of the conflicting tree to minimise any particularly complex conflicts. -- Cheers, Stephen Rothwell diff --cc tools/Makefile index 616e7722b327,a19b176b914b.. --- a/tools/Makefile +++ b/tools/Makefile @@@ -90,8 -89,8 +89,8 @@@ freefall: FORC kvm_stat: FORCE $(call descend,kvm/$@) - all: acpi cgroup cpupower gpio hv firewire lguest liblockdep \ + all: acpi cgroup cpupower gpio hv firewire liblockdep \ - perf selftests turbostat usb \ + perf selftests spi turbostat usb \ virtio vm net x86_energy_perf_policy \ tmon freefall objtool kvm_stat @@@ -101,7 -100,7 +100,7 @@@ acpi_install cpupower_install: $(call descend,power/$(@:_install=),install) - cgroup_install firewire_install gpio_install hv_install lguest_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install: -cgroup_install firewire_install gpio_install hv_install perf_install usb_install virtio_install vm_install net_install objtool_install: ++cgroup_install firewire_install gpio_install hv_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install: $(call descend,$(@:_install=),install) liblockdep_install:
Re: [kernel-hardening] Re: [PATCH net-next v7 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier
On Wed, 23 Aug 2017, Mickaël Salaün wrote: > >> + struct { > >> + __u32 abi; /* minimal ABI version, cf. user doc */ > > > > the concept of abi (version) sounds a bit weird to me. > > Why bother with it at all? > > Once the first set of patches lands the kernel as whole will have landlock > > feature > > with a set of helpers, actions, event types. > > Some future patches will extend the landlock feature step by step. > > This abi concept assumes that anyone who adds new helper would need > > to keep incrementing this 'abi'. What value does it give to user or to > > kernel? > > The users will already know that landlock is present in kernel 4.14 or > > whatever > > and the kernel 4.18 has more landlock features. Why bother with extra abi > > number? > > That's right for helpers and context fields, but we can't check the use > of one field's content. The status field is intended to be a bitfield > extendable in the future. For example, one use case is to set a flag to > inform the eBPF program that it was already called with the same context > and can skip most of its check (if not related to maps). Same goes for > the FS action bitfield, one may want to add more of them. Another > example may be the check for abilities. We may want to relax/remove the > capability require to set one of them. With an ABI version, the user can > easily check if the current kernel support that. Don't call it an ABI, perhaps minimum policy version (similar to what SELinux does). Changes need to be made so that any existing userspace still works. -- James Morris
Re: [PATCH net-next v7 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier
On Tue, 22 Aug 2017, Alexei Starovoitov wrote: > more general question: what is the status of security/ bits? > I'm assuming they still need to be reviewed and explicitly acked by James, > right? Yep, along with other core security developers where possible. -- James Morris
Re: [kernel-hardening] [PATCH net-next v7 00/10] Landlock LSM: Toward unprivileged sandboxing
On Mon, 21 Aug 2017, Mickaël Salaün wrote: > ## Why a new LSM? Are SELinux, AppArmor, Smack and Tomoyo not good enough? > > The current access control LSMs are fine for their purpose which is to give > the > *root* the ability to enforce a security policy for the *system*. What is > missing is a way to enforce a security policy for any application by its > developer and *unprivileged user* as seccomp can do for raw syscall filtering. > You could mention here that the first case is Mandatory Access Control, in general terms. -- James Morris
[PATCH v8 01/13] x86/apic: Construct a selector for the interrupt delivery mode
Now, there are many switches in kernel which are used to determine the final interrupt delivery mode, as shown below: 1) kconfig: CONFIG_X86_64; CONFIG_X86_LOCAL_APIC; CONFIG_x86_IO_APIC 2) kernel option: disable_apic; skip_ioapic_setup 3) CPU Capability: boot_cpu_has(X86_FEATURE_APIC) 4) MP table: smp_found_config 5) ACPI: acpi_lapic; acpi_ioapic; nr_ioapic These switches are disordered and scattered and there are also some dependencies with each other. These make the code difficult to maintain and read. Construct a selector to unify them into a single function, then, Use this selector to get an interrupt delivery mode directly. Signed-off-by: Dou Liyang --- arch/x86/kernel/apic/apic.c | 59 + 1 file changed, 59 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 98b3dd8..01bde03 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1235,6 +1235,65 @@ void __init sync_Arb_IDs(void) APIC_INT_LEVELTRIG | APIC_DM_INIT); } +enum apic_intr_mode { + APIC_PIC, + APIC_VIRTUAL_WIRE, + APIC_SYMMETRIC_IO, +}; + +static int __init apic_intr_mode_select(void) +{ + /* Check kernel option */ + if (disable_apic) { + pr_info("APIC disabled via kernel command line\n"); + return APIC_PIC; + } + + /* Check BIOS */ +#ifdef CONFIG_X86_64 + /* On 64-bit, the APIC must be integrated, Check local APIC only */ + if (!boot_cpu_has(X86_FEATURE_APIC)) { + disable_apic = 1; + pr_info("APIC disabled by BIOS\n"); + return APIC_PIC; + } +#else + /* +* On 32-bit, check whether there is a separate chip or integrated +* APIC +*/ + + /* Has a separate chip ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) { + disable_apic = 1; + + return APIC_PIC; + } + + /* Has a local APIC ? */ + if (!boot_cpu_has(X86_FEATURE_APIC) && + APIC_INTEGRATED(boot_cpu_apic_version)) { + disable_apic = 1; + pr_err(FW_BUG "Local APIC %d not detected, force emulation\n", + boot_cpu_physical_apicid); + + return APIC_PIC; + } +#endif + + /* Check MP table or ACPI MADT configuration */ + if (!smp_found_config) { + disable_ioapic_support(); + + if (!acpi_lapic) + pr_info("APIC: ACPI MADT or MP tables are not detected\n"); + + return APIC_VIRTUAL_WIRE; + } + + return APIC_SYMMETRIC_IO; +} + /* * An initial setup of the virtual wire mode. */ -- 2.5.5
Re: [RESEND PATCH v4 2/2] i2c: Add Spreadtrum I2C controller driver
Hi Wolfram, On 27 August 2017 at 23:30, Wolfram Sang wrote: > Hi, > > thanks for your submission. > >> +static void sprd_i2c_dump_reg(struct sprd_i2c *i2c_dev) >> +{ >> + dev_err(&i2c_dev->adap.dev, ": ==dump i2c-%d reg===\n", >> + i2c_dev->adap.nr); >> + dev_err(&i2c_dev->adap.dev, ": I2C_CTRL:0x%x\n", >> + readl(i2c_dev->base + I2C_CTL)); >> + dev_err(&i2c_dev->adap.dev, ": I2C_ADDR_CFG:0x%x\n", >> + readl(i2c_dev->base + I2C_ADDR_CFG)); >> + dev_err(&i2c_dev->adap.dev, ": I2C_COUNT:0x%x\n", >> + readl(i2c_dev->base + I2C_COUNT)); >> + dev_err(&i2c_dev->adap.dev, ": I2C_RX:0x%x\n", >> + readl(i2c_dev->base + I2C_RX)); >> + dev_err(&i2c_dev->adap.dev, ": I2C_STATUS:0x%x\n", >> + readl(i2c_dev->base + I2C_STATUS)); >> + dev_err(&i2c_dev->adap.dev, ": ADDR_DVD0:0x%x\n", >> + readl(i2c_dev->base + ADDR_DVD0)); >> + dev_err(&i2c_dev->adap.dev, ": ADDR_DVD1:0x%x\n", >> + readl(i2c_dev->base + ADDR_DVD1)); >> + dev_err(&i2c_dev->adap.dev, ": ADDR_STA0_DVD:0x%x\n", >> + readl(i2c_dev->base + ADDR_STA0_DVD)); >> + dev_err(&i2c_dev->adap.dev, ": ADDR_RST:0x%x\n", >> + readl(i2c_dev->base + ADDR_RST)); > > I really thing register dumps should be dev_dbg(). OK. Will fix in next version. > >> +} >> + >> +static void sprd_i2c_set_count(struct sprd_i2c *i2c_dev, u32 count) >> +{ >> + writel(count, i2c_dev->base + I2C_COUNT); >> +} >> + >> +static void sprd_i2c_send_stop(struct sprd_i2c *i2c_dev, int stop) >> +{ >> + unsigned int tmp = readl(i2c_dev->base + I2C_CTL); > > u32? Here and in many other places? OK. > > ... > >> +static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id) >> +{ >> + struct sprd_i2c *i2c_dev = dev_id; >> + struct i2c_msg *msg = i2c_dev->msg; >> + int ack = readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK; >> + u32 i2c_count = readl(i2c_dev->base + I2C_COUNT); >> + u32 i2c_tran; >> + >> + if (msg->flags & I2C_M_RD) >> + i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD; >> + else >> + i2c_tran = i2c_count; >> + >> + /* >> + * If we got one ACK from slave when writing data, and we did not > > Here you say: "If we get ack..." > >> + * finish this transmission (i2c_tran is not zero), then we should >> + * continue to write data. >> + * >> + * For reading data, ack is always 0, if i2c_tran is not 0 which >> + * means we still need to contine to read data from slave. >> + */ >> + if (i2c_tran && !ack) { > > ... but the code gives the assumption you did NOT get an ack. So, either > rename the variable to 'ack_err' or keep it 'ack' and invert the logic > when initializing the variable. If ack == 0 means we got one ack. I will invert the logic as you suggested. > >> + sprd_i2c_data_transfer(i2c_dev); >> + return IRQ_HANDLED; >> + } >> + >> + i2c_dev->err = 0; >> + >> + /* >> + * If we did not get one ACK from slave when writing data, we should >> + * dump all registers to check I2C status. > > Why? I would say no. NACK from a slave can always happen, e.g. when an > EEPROM is busy erasing a page. For our I2C controller databook, if the master did not get one ACK from slave when writing data to salve, we should send one STOP signal to abort this data transfer or generate one repeated START signal to start one new data transfer cycle. Considering our I2C usage scenarios, we should dump registers to analyze I2C status and notify to user to re-start new data transfer. > >> + */ >> + if (ack) { >> + i2c_dev->err = -EIO; >> + sprd_i2c_dump_reg(i2c_dev); >> + } else if (msg->flags & I2C_M_RD && i2c_dev->count) { >> + sprd_i2c_read_bytes(i2c_dev, i2c_dev->buf, i2c_dev->count); >> + } >> + >> + /* Transmission is done and clear ack and start operation */ >> + sprd_i2c_clear_ack(i2c_dev); >> + sprd_i2c_clear_start(i2c_dev); >> + complete(&i2c_dev->complete); >> + >> + return IRQ_HANDLED; >> +} > > ... > >> + >> + pm_runtime_set_autosuspend_delay(i2c_dev->dev, SPRD_I2C_PM_TIMEOUT); >> + pm_runtime_use_autosuspend(i2c_dev->dev); >> + pm_runtime_set_active(i2c_dev->dev); >> + pm_runtime_enable(i2c_dev->dev); >> + >> + ret = pm_runtime_get_sync(i2c_dev->dev); >> + if (ret < 0) { >> + dev_err(&pdev->dev, "i2c%d pm runtime resume failed!\n", >> + pdev->id); > > Error message has wrong text. Will fix it. > >> + goto err_rpm_put; >> + } >> + >> +static int sprd_i2c_init(void) >> +{ >> + return platform_driver_register(&sprd_i2c_driver); >> +} >> +arch_initcall_sync(sprd_i2c_init); > > arch_initcall? and no exit() function? Why is it that way and/or why > can't you use platform_module_driver()? As I explained before, in our Spreadtrum platform, our regul
[PATCH v8 04/13] x86/apic: Move logical APIC ID away from apic_bsp_setup()
apic_bsp_setup() sets and returns logical APIC ID for initializing cpu0_logical_apicid in SMP-capable system. The id has nothing to do with the initialization of local APIC and I/O APIC. And apic_bsp_setup() should be called for interrupt mode setup intently. Move the id setup into a separate helper function for cleanup and mark apic_bsp_setup() void. Signed-off-by: Dou Liyang --- arch/x86/include/asm/apic.h | 2 +- arch/x86/kernel/apic/apic.c | 10 +- arch/x86/kernel/smpboot.c | 12 +++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1a970f5..4e550c7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -146,7 +146,7 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern int apic_bsp_setup(bool upmode); +extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 80a273d..0fcbcf3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2426,25 +2426,17 @@ static void __init apic_bsp_up_setup(void) * Returns: * apic_id of BSP APIC */ -int __init apic_bsp_setup(bool upmode) +void __init apic_bsp_setup(bool upmode) { - int id; - connect_bsp_APIC(); if (upmode) apic_bsp_up_setup(); setup_local_APIC(); - if (x2apic_mode) - id = apic_read(APIC_LDR); - else - id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); - enable_IO_APIC(); end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); - return id; } /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 394cd81..4ace4d0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1291,6 +1291,14 @@ static void __init smp_cpu_index_default(void) } } +static void __init smp_get_logical_apicid(void) +{ + if (x2apic_mode) + cpu0_logical_apicid = apic_read(APIC_LDR); + else + cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); +} + /* * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. @@ -1351,11 +1359,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) } default_setup_apic_routing(); - cpu0_logical_apicid = apic_bsp_setup(false); + apic_bsp_setup(false); /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); + smp_get_logical_apicid(); + pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); -- 2.5.5
[PATCH v8 08/13] x86/ioapic: Refactor the delay logic in timer_irq_works()
Kernel use timer_irq_works() to detects the timer IRQs. It calls mdelay(10) to delay ten ticks and check whether the timer IRQ work or not. The mdelay() depends on the loops_per_jiffy which is set up in calibrate_delay(). Current kernel defaults the IRQ 0 is available when it calibrates delay. But it is wrong in the dump-capture kernel with 'notsc' option inherited from 1st kernel option. dump-capture kernel can't make sure the timer IRQ works well. The correct design is making the interrupt mode setup and checking timer IRQ works in advance of calibrate_delay(). That results in the mdelay() being unusable in timer_irq_works(). Preparatory patch to make the setup in advance. Refactor the delay logic by waiting for some cycles. In the system with X86_FEATURE_TSC feature, Use rdtsc(), others will call __delay() directly. Note: regard 4G as the max CPU frequence of current single CPU. Signed-off-by: Dou Liyang --- arch/x86/kernel/apic/io_apic.c | 45 -- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 237e9c2..348ea7e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1585,6 +1585,43 @@ static int __init notimercheck(char *s) } __setup("no_timer_check", notimercheck); +static void __init delay_with_tsc(void) +{ + unsigned long long start, now; + unsigned long end = jiffies + 4; + + start = rdtsc(); + + /* +* We don't know the TSC frequency yet, but waiting for +* 400/HZ TSC cycles is safe: +* 4 GHz == 10 jiffies +* 1 GHz == 40 jiffies +*/ + do { + rep_nop(); + now = rdtsc(); + } while ((now - start) < 400UL / HZ && + time_before_eq(jiffies, end)); +} + +static void __init delay_without_tsc(void) +{ + unsigned long end = jiffies + 4; + int band = 1; + + /* +* We don't know any frequency yet, but waiting for +* 4094000/HZ cycles is safe: +* 4 GHz == 10 jiffies +* 1 GHz == 40 jiffies +* 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094 +*/ + do { + __delay(((1U << band++) * 1000UL) / HZ); + } while (band < 12 && time_before_eq(jiffies, end)); +} + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1603,8 +1640,12 @@ static int __init timer_irq_works(void) local_save_flags(flags); local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); + + if (boot_cpu_has(X86_FEATURE_TSC)) + delay_with_tsc(); + else + delay_without_tsc(); + local_irq_restore(flags); /* -- 2.5.5
[PATCH v8 11/13] ACPI / init: Invoke early ACPI initialization earlier
Linux uses acpi_early_init() to put the ACPI table management into the late stage from the early stage.The two stages are different. the mapped ACPI tables in early stage is temporary and should be unmapped, but in late stage, it permanent and don't need to be unmapped. Originally, mapping and parsing the DMAR table is in the late stage. However, initializing interrupt delivery mode earlier will move it into the early stage. This causes an ACPI error warning when Linux reallocates the ACPI root tables. Because Linux doesn't unmapped the DMAR table after using in the early stage. Invoke acpi_early_init() earlier before late_time_init(), Keep the DMAR be mapped and parsed in late stage like before. Reported-by: Xiaolong Ye Signed-off-by: Dou Liyang Cc: linux-a...@vger.kernel.org Cc: Rafael J. Wysocki Cc: Zheng, Lv --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 052481f..52dee20 100644 --- a/init/main.c +++ b/init/main.c @@ -655,12 +655,12 @@ asmlinkage __visible void __init start_kernel(void) kmemleak_init(); setup_per_cpu_pageset(); numa_policy_init(); + acpi_early_init(); if (late_time_init) late_time_init(); calibrate_delay(); pidmap_init(); anon_vma_init(); - acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); -- 2.5.5
[PATCH v8 05/13] x86/apic: Unify interrupt mode setup for SMP-capable system
In the SMP-capable system, enable and setup the interrupt delivery mode in native_smp_prepare_cpus(). This design mixs the APIC and SMP together, it has highly coupling. Make the initialization of interrupt mode independent, Unify and refine it to apic_intr_mode_init() for SMP-capable system. Signed-off-by: Dou Liyang --- arch/x86/kernel/apic/apic.c | 39 --- arch/x86/kernel/smpboot.c | 14 ++ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0fcbcf3..9038c5f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1238,7 +1238,9 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode { APIC_PIC, APIC_VIRTUAL_WIRE, + APIC_VIRTUAL_WIRE_NO_CONFIG, APIC_SYMMETRIC_IO, + APIC_SYMMETRIC_IO_NO_ROUTING, }; static int __init apic_intr_mode_select(void) @@ -1285,12 +1287,29 @@ static int __init apic_intr_mode_select(void) if (!smp_found_config) { disable_ioapic_support(); - if (!acpi_lapic) + if (!acpi_lapic) { pr_info("APIC: ACPI MADT or MP tables are not detected\n"); + return APIC_VIRTUAL_WIRE_NO_CONFIG; + } + return APIC_VIRTUAL_WIRE; } +#ifdef CONFIG_SMP + /* If SMP should be disabled, then really disable it! */ + if (!setup_max_cpus) { + pr_info("APIC: SMP mode deactivated\n"); + return APIC_SYMMETRIC_IO_NO_ROUTING; + } + + if (read_apic_id() != boot_cpu_physical_apicid) { + panic("Boot APIC ID in local APIC unexpected (%d vs %d)", +read_apic_id(), boot_cpu_physical_apicid); + /* Or can we switch back to PIC here? */ + } +#endif + return APIC_SYMMETRIC_IO; } @@ -1346,17 +1365,31 @@ void __init init_bsp_APIC(void) /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { + bool upmode = false; + switch (apic_intr_mode_select()) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); return; case APIC_VIRTUAL_WIRE: pr_info("APIC: Switch to virtual wire mode setup\n"); - return; + default_setup_apic_routing(); + break; + case APIC_VIRTUAL_WIRE_NO_CONFIG: + pr_info("APIC: Switch to virtual wire mode setup with no configuration\n"); + upmode = true; + default_setup_apic_routing(); + break; case APIC_SYMMETRIC_IO: pr_info("APIC: Switch to symmectic I/O mode setup\n"); - return; + default_setup_apic_routing(); + break; + case APIC_SYMMETRIC_IO_NO_ROUTING: + pr_info("APIC: Switch to symmectic I/O mode setup in no SMP routine\n"); + break; } + + apic_bsp_setup(upmode); } static void lapic_setup_esr(void) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4ace4d0..8301b75 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1333,18 +1333,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); + apic_intr_mode_init(); + switch (smp_sanity_check(max_cpus)) { case SMP_NO_CONFIG: disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); return; case SMP_NO_APIC: disable_smp(); return; case SMP_FORCE_UP: disable_smp(); - apic_bsp_setup(false); /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); return; @@ -1352,15 +1351,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) break; } - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", -read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } - - default_setup_apic_routing(); - apic_bsp_setup(false); - /* Setup local timer */ x86_init.timers.setup_percpu_clockev(); -- 2.5.5
[PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system
XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus() initializes interrupts in the XEN PV specific way and does not invoke native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is not invoked either. The invocation of x86_init.intr_mode_init() will be moved from native_smp_prepare_cpus() in a follow up patch to solve . That move would cause the invocation of x86_init.intr_mode_init() for XEN PV platforms. To prevent that, override the default x86_init. intr_mode_init() callback with a noop(). [Rewrited by Thomas Gleixner ] Signed-off-by: Dou Liyang Cc: xen-de...@lists.xenproject.org Cc: boris.ostrov...@oracle.com --- arch/x86/xen/enlighten_pv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 811e4dd..07147dd 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1250,6 +1250,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_init= x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; -- 2.5.5
[PATCH v8 12/13] x86/time: Initialize interrupt mode behind timer init
In start_kernel(), firstly, it works on the default interrupy mode, then switch to the final mode. Normally, Booting with BIOS reset is OK. But, At dump-capture kernel, it boot up without BIOS reset, default mode may not be compatible with the actual registers, that causes the delivery interrupt to fail. Try to set up the final mode as soon as possible. according to the parts which split from that initialization: 1) Set up the APIC/IOAPIC (including testing whether the timer interrupt works) 2) Calibrate TSC 3) Set up the local APIC timer -- From Thomas Gleixner Initializing the mode should be earlier than calibrating TSC as soon as possible and needs testing whether the timer interrupt works at the same time. call it behind timers init, which meets the above conditions. Signed-off-by: Dou Liyang --- arch/x86/kernel/apic/apic.c | 2 -- arch/x86/kernel/smpboot.c | 2 -- arch/x86/kernel/time.c | 5 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 47b67f9..7fb5cde 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2471,8 +2471,6 @@ void __init apic_bsp_setup(bool upmode) #ifdef CONFIG_UP_LATE_INIT void __init up_late_init(void) { - x86_init.irqs.intr_mode_init(); - if (apic_intr_mode == APIC_PIC) return; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2e0eaf2..4f63afc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1293,8 +1293,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); - x86_init.irqs.intr_mode_init(); - smp_sanity_check(); switch (apic_intr_mode) { diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e0754cd..3ceb834 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -84,6 +84,11 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { x86_init.timers.timer_init(); + /* +* After PIT/HPET timers init, select and setup +* the final interrupt mode for delivering IRQs. +*/ + x86_init.irqs.intr_mode_init(); tsc_init(); } -- 2.5.5