[patch V3 29/44] x86/idt: Move 32bit idt_descr to C code

2017-08-27 Thread Thomas Gleixner
32bit has the idt_descr sitting in the low level assembly entry code. There
is no reason for that. Move it into the C file and use the 64bit version of
it.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/head_32.S |6 --
 arch/x86/kernel/idt.c |   10 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -622,7 +622,6 @@ ENTRY(initial_stack)
 
.data
 .globl boot_gdt_descr
-.globl idt_descr
 
ALIGN
 # early boot GDT descriptor (must use 1:1 address mapping)
@@ -631,11 +630,6 @@ ENTRY(initial_stack)
.word __BOOT_DS+7
.long boot_gdt - __PAGE_OFFSET
 
-   .word 0 # 32-bit align idt_desc.address
-idt_descr:
-   .word IDT_ENTRIES*8-1   # idt contains 256 entries
-   .long idt_table
-
 # boot GDT descriptor (later on used by CPU#0):
.word 0 # 32 bit align gdt_desc.address
 ENTRY(early_gdt_descr)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -10,15 +10,15 @@
 /* Must be page-aligned because the real IDT is used in a fixmap. */
 gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
 
-#ifdef CONFIG_X86_64
-/* No need to be aligned, but done to keep all IDTs defined the same way. */
-gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
-
 struct desc_ptr idt_descr __ro_after_init = {
-   .size   = IDT_ENTRIES * 16 - 1,
+   .size   = (IDT_ENTRIES * 2 * sizeof(unsigned long)) - 1,
.address= (unsigned long) idt_table,
 };
 
+#ifdef CONFIG_X86_64
+/* No need to be aligned, but done to keep all IDTs defined the same way. */
+gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
+
 const struct desc_ptr debug_idt_descr = {
.size   = IDT_ENTRIES * 16 - 1,
.address= (unsigned long) debug_idt_table,




[patch V3 19/44] x86/ipi: Make platform IPI depend on APIC

2017-08-27 Thread Thomas Gleixner
The platform IPI vector is only installed when the local APIC is enabled. All
users of it depend on the local APIC anyway.

Make the related code conditional on CONFIG_X86_LOCAL_APIC.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/entry_arch.h |3 +--
 arch/x86/kernel/irq.c |   11 ++-
 2 files changed, 7 insertions(+), 7 deletions(-)

--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -17,8 +17,6 @@ BUILD_INTERRUPT(irq_move_cleanup_interru
 BUILD_INTERRUPT(reboot_interrupt, REBOOT_VECTOR)
 #endif
 
-BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
-
 #ifdef CONFIG_HAVE_KVM
 BUILD_INTERRUPT(kvm_posted_intr_ipi, POSTED_INTR_VECTOR)
 BUILD_INTERRUPT(kvm_posted_intr_wakeup_ipi, POSTED_INTR_WAKEUP_VECTOR)
@@ -37,6 +35,7 @@ BUILD_INTERRUPT(kvm_posted_intr_nested_i
 BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_INTERRUPT(x86_platform_ipi, X86_PLATFORM_IPI_VECTOR)
 
 #ifdef CONFIG_IRQ_WORK
 BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -29,9 +29,6 @@ EXPORT_PER_CPU_SYMBOL(irq_regs);
 
 atomic_t irq_err_count;
 
-/* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
-
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
@@ -87,13 +84,13 @@ int arch_show_interrupts(struct seq_file
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
seq_puts(p, "  APIC ICR read retries\n");
-#endif
if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
seq_puts(p, "  Platform interrupts\n");
}
+#endif
 #ifdef CONFIG_SMP
seq_printf(p, "%*s: ", prec, "RES");
for_each_online_cpu(j)
@@ -183,9 +180,9 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_perf_irqs;
sum += irq_stats(cpu)->apic_irq_work_irqs;
sum += irq_stats(cpu)->icr_read_retry_count;
-#endif
if (x86_platform_ipi_callback)
sum += irq_stats(cpu)->x86_platform_ipis;
+#endif
 #ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
sum += irq_stats(cpu)->irq_call_count;
@@ -259,6 +256,9 @@ u64 arch_irq_stat(void)
return 1;
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC
+/* Function pointer for generic interrupt vector handling */
+void (*x86_platform_ipi_callback)(void) = NULL;
 /*
  * Handler for X86_PLATFORM_IPI_VECTOR.
  */
@@ -275,6 +275,7 @@ u64 arch_irq_stat(void)
exiting_irq();
set_irq_regs(old_regs);
 }
+#endif
 
 #ifdef CONFIG_HAVE_KVM
 static void dummy_handler(void) {}




[patch V3 30/44] x86/idt: Remove unused set_trap_gate()

2017-08-27 Thread Thomas Gleixner
This inline is not used at all.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |   12 
 1 file changed, 12 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -446,18 +446,6 @@ static inline void set_system_intr_gate(
_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
 }
 
-static inline void set_system_trap_gate(unsigned int n, void *addr)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_trap_gate(unsigned int n, void *addr)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
-}
-
 static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
 {
BUG_ON((unsigned)n > 0xFF);




[patch V3 25/44] x86: Replace access to desc_struct:a/b fields

2017-08-27 Thread Thomas Gleixner
The union inside of desc_struct which allows access to the raw u32 parts of
the descriptors. This raw access part is about to go away.

Replace the few code parts which access those fields.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Boris Ostrovsky 
Cc: Juergen Gross 
---
 arch/x86/include/asm/xen/hypercall.h |6 --
 arch/x86/kernel/tls.c|2 +-
 arch/x86/xen/enlighten_pv.c  |2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -552,6 +552,8 @@ static inline void
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
 {
+   u32 *p = (u32 *) &desc;
+
mcl->op = __HYPERVISOR_update_descriptor;
if (sizeof(maddr) == sizeof(long)) {
mcl->args[0] = maddr;
@@ -559,8 +561,8 @@ MULTI_update_descriptor(struct multicall
} else {
mcl->args[0] = maddr;
mcl->args[1] = maddr >> 32;
-   mcl->args[2] = desc.a;
-   mcl->args[3] = desc.b;
+   mcl->args[2] = *p++;
+   mcl->args[3] = *p;
}
 
trace_xen_mc_entry(mcl, sizeof(maddr) == sizeof(long) ? 2 : 4);
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -93,7 +93,7 @@ static void set_tls_desc(struct task_str
 
while (n-- > 0) {
if (LDT_empty(info) || LDT_zero(info)) {
-   desc->a = desc->b = 0;
+   memset(desc, 0, sizeof(*desc));
} else {
fill_ldt(desc, info);
 
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -494,7 +494,7 @@ static void __init xen_load_gdt_boot(con
 static inline bool desc_equal(const struct desc_struct *d1,
  const struct desc_struct *d2)
 {
-   return d1->a == d2->a && d1->b == d2->b;
+   return !memcmp(d1, d2, sizeof(*d1));
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,




[patch V3 33/44] x86/idt: Move early IDT setup out of 32bit asm

2017-08-27 Thread Thomas Gleixner
The early IDT setup can be done in C code like it's done on 64 bit. Reuse
the 64 bit version.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/segment.h |1 +
 arch/x86/kernel/head32.c   |4 
 arch/x86/kernel/head_32.S  |   36 ++--
 arch/x86/kernel/idt.c  |4 
 4 files changed, 11 insertions(+), 34 deletions(-)

--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -238,6 +238,7 @@
 #ifndef __ASSEMBLY__
 
 extern const char 
early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
+extern void early_ignore_irq(void);
 
 /*
  * Load a segment. Fall back on loading the zero segment if something goes
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -30,6 +31,9 @@ static void __init i386_default_early_se
 asmlinkage __visible void __init i386_start_kernel(void)
 {
cr4_init_shadow();
+
+   idt_setup_early_handler();
+
sanitize_boot_params(&boot_params);
 
x86_early_init_platform_quirks();
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -345,7 +345,6 @@ ENTRY(startup_32_smp)
movl %eax,%cr0
 
lgdt early_gdt_descr
-   lidt idt_descr
ljmp $(__KERNEL_CS),$1f
 1: movl $(__KERNEL_DS),%eax# reload all the segment registers
movl %eax,%ss   # after changing gdt.
@@ -378,37 +377,6 @@ ENDPROC(startup_32_smp)
  */
 __INIT
 setup_once:
-   /*
-* Set up a idt with 256 interrupt gates that push zero if there
-* is no error code and then jump to early_idt_handler_common.
-* It doesn't actually load the idt - that needs to be done on
-* each CPU. Interrupts are enabled elsewhere, when we can be
-* relatively sure everything is ok.
-*/
-
-   movl $idt_table,%edi
-   movl $early_idt_handler_array,%eax
-   movl $NUM_EXCEPTION_VECTORS,%ecx
-1:
-   movl %eax,(%edi)
-   movl %eax,4(%edi)
-   /* interrupt gate, dpl=0, present */
-   movl $(0x8E00 + __KERNEL_CS),2(%edi)
-   addl $EARLY_IDT_HANDLER_SIZE,%eax
-   addl $8,%edi
-   loop 1b
-
-   movl $256 - NUM_EXCEPTION_VECTORS,%ecx
-   movl $ignore_int,%edx
-   movl $(__KERNEL_CS << 16),%eax
-   movw %dx,%ax/* selector = 0x0010 = cs */
-   movw $0x8E00,%dx/* interrupt gate - dpl=0, present */
-2:
-   movl %eax,(%edi)
-   movl %edx,4(%edi)
-   addl $8,%edi
-   loop 2b
-
 #ifdef CONFIG_CC_STACKPROTECTOR
/*
 * Configure the stack canary. The linker can't handle this by
@@ -498,7 +466,7 @@ ENDPROC(early_idt_handler_common)
 
 /* This is the default interrupt "handler" :-) */
ALIGN
-ignore_int:
+ENTRY(early_ignore_irq)
cld
 #ifdef CONFIG_PRINTK
pushl %eax
@@ -533,7 +501,7 @@ ENDPROC(early_idt_handler_common)
 hlt_loop:
hlt
jmp hlt_loop
-ENDPROC(ignore_int)
+ENDPROC(early_ignore_irq)
 __INITDATA
.align 4
 GLOBAL(early_recursion_flag)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -34,6 +34,10 @@ void __init idt_setup_early_handler(void
 
for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
set_intr_gate(i, early_idt_handler_array[i]);
+#ifdef CONFIG_X86_32
+   for ( ; i < NR_VECTORS; i++)
+   set_intr_gate(i, early_ignore_irq);
+#endif
load_idt(&idt_descr);
 }
 




Re: [PATCH] leds/trigger/activity: add a system activity LED trigger

2017-08-27 Thread Willy Tarreau
Hi Jacek,

On Sun, Aug 27, 2017 at 06:44:05PM +0200, Jacek Anaszewski wrote:
> Hi Willy,
> 
> Thanks for the updated patch.
> 
> One formal note: please send the patches with git send-email instead
> of attaching them to the message.

Yep, I hesitated and wanted to reply. Will do it the other way next
time, sorry for the hassle.

> > diff --git a/drivers/leds/trigger/ledtrig-activity.c 
> > b/drivers/leds/trigger/ledtrig-activity.c
> > new file mode 100644
> > index 000..6f00235
> > --- /dev/null
> > +++ b/drivers/leds/trigger/ledtrig-activity.c
> > @@ -0,0 +1,297 @@
> > +/*
> > + * Activity LED trigger
> > + *
> > + * Copyright (C) 2017 Willy Tarreau 
> > + * Partially based on Atsushi Nemoto's ledtrig-heartbeat.c.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + *
> > + */
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> Please sort the includes alphabetically.

I'm amazed I did this, I suspect I inherited it from the original file
because I'm also used to annoy people for the same thing! Shame on me!

> > +   activity_data->time_left -= 100;
> > +   if (activity_data->time_left <= 0) {
> > +   activity_data->time_left = 0;
> > +   activity_data->state = !activity_data->state;
> > +   led_set_brightness_nosleep(led_cdev,
> > +   (activity_data->state ^ activity_data->invert) ?
> > +   led_cdev->max_brightness : LED_OFF);
> 
> Have you considered making the top brightness adjustable? I'd make it
> possible especially that we have a similar solution in the
> ledtrig-heartbeat.c already - see the following patch in 4.12:
> 
> commit fb3d769173d26268d7bf068094a599bb28b2ac63
> Author: Jacek Anaszewski 
> Date:   Wed Nov 9 11:43:46 2016 +0100
(...)

I never thought about it and it makes a lot of sense actually. I'll check
this commit, thanks for the pointer.

> > +   switch (pm_event) {
> > +   case PM_SUSPEND_PREPARE:
> > +   case PM_HIBERNATION_PREPARE:
> > +   case PM_RESTORE_PREPARE:
> > +   led_trigger_unregister(&activity_led_trigger);
> > +   break;
> > +   case PM_POST_SUSPEND:
> > +   case PM_POST_HIBERNATION:
> > +   case PM_POST_RESTORE:
> > +   rc = led_trigger_register(&activity_led_trigger);
> > +   if (rc)
> > +   pr_err("could not re-register activity trigger\n");
> > +   break;
> > +   default:
> > +   break;
> > +   }
> > +   return NOTIFY_DONE;
> > +}
> 
> It turned out to cause problems in ledtrig-heartbeat.c and was reverted.
> Please don't register pm notifier and remove related facilities from the
> patch according to the following revert patch:
> 
> commit 436c4c45b5b9562b59cedbb51b7343ab4a6dd8cc
> Author: Zhang Bo 
> Date:   Tue Jun 13 10:39:20 2017 +0800

OK fine for me. I thought it was mandatory to properly handle pm
eventhough I was not particularly interested in this for this
specific purpose.

I'll send you an updated patch ASAP.

Thanks very much for your review,
Willy


[patch V3 31/44] x86/idt: Consolidate IDT invalidation

2017-08-27 Thread Thomas Gleixner
kexec and reboot have both code to invalidate IDT. Create a common function
and use it.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h|3 +++
 arch/x86/kernel/idt.c  |   11 +++
 arch/x86/kernel/machine_kexec_32.c |   14 +-
 arch/x86/kernel/reboot.c   |4 +---
 4 files changed, 16 insertions(+), 16 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -503,4 +503,7 @@ static inline void load_current_idt(void
else
load_idt((const struct desc_ptr *)&idt_descr);
 }
+
+extern void idt_invalidate(void *addr);
+
 #endif /* _ASM_X86_DESC_H */
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -24,3 +24,14 @@ const struct desc_ptr debug_idt_descr =
.address= (unsigned long) debug_idt_table,
 };
 #endif
+
+/**
+ * idt_invalidate - Invalidate interrupt descriptor table
+ * @addr:  The virtual address of the 'invalid' IDT
+ */
+void idt_invalidate(void *addr)
+{
+   struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
+
+   load_idt(&idt);
+}
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -26,18 +26,6 @@
 #include 
 #include 
 
-static void set_idt(void *newidt, __u16 limit)
-{
-   struct desc_ptr curidt;
-
-   /* ia32 supports unaliged loads & stores */
-   curidt.size= limit;
-   curidt.address = (unsigned long)newidt;
-
-   load_idt(&curidt);
-}
-
-
 static void set_gdt(void *newgdt, __u16 limit)
 {
struct desc_ptr curgdt;
@@ -245,7 +233,7 @@ void machine_kexec(struct kimage *image)
 * If you want to load them you must set up your own idt & gdt.
 */
set_gdt(phys_to_virt(0), 0);
-   set_idt(phys_to_virt(0), 0);
+   idt_invalidate(phys_to_virt(0));
 
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -38,8 +38,6 @@
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
-static const struct desc_ptr no_idt = {};
-
 /*
  * This is set if we need to go through the 'emergency' path.
  * When machine_emergency_restart() is called, we may be on
@@ -638,7 +636,7 @@ static void native_machine_emergency_res
break;
 
case BOOT_TRIPLE:
-   load_idt(&no_idt);
+   idt_invalidate(NULL);
__asm__ __volatile__("int3");
 
/* We're probably dead after this, but... */




[patch V3 32/44] x86/idt: Move early IDT handler setup to IDT code

2017-08-27 Thread Thomas Gleixner
The early IDT handler setup is done in C entry code for 64 bit and in ASM
entry code for 32 bit. Move the 64bit variant to the IDT code so it can be
shared with 32bit in the next step.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |9 +
 arch/x86/kernel/head64.c|6 +-
 arch/x86/kernel/idt.c   |   12 
 3 files changed, 22 insertions(+), 5 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -504,6 +504,15 @@ static inline void load_current_idt(void
load_idt((const struct desc_ptr *)&idt_descr);
 }
 
+extern void idt_setup_early_handler(void);
+extern void idt_setup_early_traps(void);
+
+#ifdef CONFIG_X86_64
+extern void idt_setup_early_pf(void);
+#else
+static inline void idt_setup_early_pf(void) { }
+#endif
+
 extern void idt_invalidate(void *addr);
 
 #endif /* _ASM_X86_DESC_H */
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -255,8 +255,6 @@ static void __init copy_bootdata(char *r
 
 asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 {
-   int i;
-
/*
 * Build-time sanity checks on the kernel image and module
 * area mappings. (these are purely build-time and produce no code)
@@ -282,9 +280,7 @@ asmlinkage __visible void __init x86_64_
 
kasan_early_init();
 
-   for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
-   set_intr_gate(i, early_idt_handler_array[i]);
-   load_idt((const struct desc_ptr *)&idt_descr);
+   idt_setup_early_handler();
 
copy_bootdata(__va(real_mode_data));
 
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -26,6 +26,18 @@ const struct desc_ptr debug_idt_descr =
 #endif
 
 /**
+ * idt_setup_early_handler - Initializes the idt table with early handlers
+ */
+void __init idt_setup_early_handler(void)
+{
+   int i;
+
+   for (i = 0; i < NUM_EXCEPTION_VECTORS; i++)
+   set_intr_gate(i, early_idt_handler_array[i]);
+   load_idt(&idt_descr);
+}
+
+/**
  * idt_invalidate - Invalidate interrupt descriptor table
  * @addr:  The virtual address of the 'invalid' IDT
  */




[patch V3 35/44] x86/idt: Switch early trap init to IDT tables

2017-08-27 Thread Thomas Gleixner
Add the initialization table for the early trap setup and replace the early
trap init code.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/idt.c   |   53 
 arch/x86/kernel/setup.c |4 +--
 arch/x86/kernel/traps.c |   27 
 3 files changed, 55 insertions(+), 29 deletions(-)

--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -48,6 +48,28 @@ struct idt_data {
 #define TSKG(_vector, _gdt)\
G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
 
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_idts[] = {
+   INTG(X86_TRAP_DB,   debug),
+   SYSG(X86_TRAP_BP,   int3),
+#ifdef CONFIG_X86_32
+   INTG(X86_TRAP_PF,   page_fault),
+#endif
+};
+
+#ifdef CONFIG_X86_64
+/*
+ * Early traps running on the DEFAULT_STACK because the other interrupt
+ * stacks work only after cpu_init().
+ */
+static const __initdata struct idt_data early_pf_idts[] = {
+   INTG(X86_TRAP_PF,   page_fault),
+};
+#endif
+
 /* Must be page-aligned because the real IDT is used in a fixmap. */
 gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
 
@@ -93,6 +115,37 @@ idt_setup_from_table(gate_desc *idt, con
 }
 
 /**
+ * idt_setup_early_traps - Initialize the idt table with early traps
+ *
+ * On X8664 these traps do not use interrupt stacks as they can't work
+ * before cpu_init() is invoked and sets up TSS. The IST variants are
+ * installed after that.
+ */
+void __init idt_setup_early_traps(void)
+{
+   idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts));
+   load_idt(&idt_descr);
+}
+
+#ifdef CONFIG_X86_64
+/**
+ * idt_setup_early_pf - Initialize the idt table with early pagefault handler
+ *
+ * On X8664 this does not use interrupt stacks as they can't work before
+ * cpu_init() is invoked and sets up TSS. The IST variant is installed
+ * after that.
+ *
+ * FIXME: Why is 32bit and 64bit installing the PF handler at different
+ * places in the early setup code?
+ */
+void __init idt_setup_early_pf(void)
+{
+   idt_setup_from_table(idt_table, early_pf_idts,
+ARRAY_SIZE(early_pf_idts));
+}
+#endif
+
+/**
  * idt_setup_early_handler - Initializes the idt table with early handlers
  */
 void __init idt_setup_early_handler(void)
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -891,7 +891,7 @@ void __init setup_arch(char **cmdline_p)
 */
olpc_ofw_detect();
 
-   early_trap_init();
+   idt_setup_early_traps();
early_cpu_init();
early_ioremap_init();
 
@@ -1162,7 +1162,7 @@ void __init setup_arch(char **cmdline_p)
 
init_mem_mapping();
 
-   early_trap_pf_init();
+   idt_setup_early_pf();
 
/*
 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -923,33 +923,6 @@ dotraplinkage void do_iret_error(struct
 }
 #endif
 
-/* Set of traps needed for early debugging. */
-void __init early_trap_init(void)
-{
-   /*
-* Don't use IST to set DEBUG_STACK as it doesn't work until TSS
-* is ready in cpu_init() <-- trap_init(). Before trap_init(),
-* CPU runs at ring 0 so it is impossible to hit an invalid
-* stack.  Using the original stack works well enough at this
-* early stage. DEBUG_STACK will be equipped after cpu_init() in
-* trap_init().
-*/
-   set_intr_gate(X86_TRAP_DB, debug);
-   /* int3 can be called from all */
-   set_system_intr_gate(X86_TRAP_BP, &int3);
-#ifdef CONFIG_X86_32
-   set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
-   load_idt(&idt_descr);
-}
-
-void __init early_trap_pf_init(void)
-{
-#ifdef CONFIG_X86_64
-   set_intr_gate(X86_TRAP_PF, page_fault);
-#endif
-}
-
 void __init trap_init(void)
 {
int i;




[patch V3 34/44] x86/idt: Prepare for table based init

2017-08-27 Thread Thomas Gleixner
The IDT setup code is handled in several places. All of them use variants
of set_intr_gate() inlines. This can be done with a table based
initialization, which allows to reduce the inline zoo and puts all IDT
related code and information into a single place.

Add the infrastructure.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/idt.c |   67 ++
 1 file changed, 67 insertions(+)

--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -5,8 +5,49 @@
  */
 #include 
 
+#include 
+#include 
 #include 
 
+struct idt_data {
+   unsigned intvector;
+   unsigned intsegment;
+   struct idt_bits bits;
+   const void  *addr;
+};
+
+#define DPL0   0x0
+#define DPL3   0x3
+
+#define DEFAULT_STACK  0
+
+#define G(_vector, _addr, _ist, _type, _dpl, _segment) \
+   {   \
+   .vector = _vector,  \
+   .bits.ist   = _ist, \
+   .bits.type  = _type,\
+   .bits.dpl   = _dpl, \
+   .bits.p = 1,\
+   .addr   = _addr,\
+   .segment= _segment, \
+   }
+
+/* Interrupt gate */
+#define INTG(_vector, _addr)   \
+   G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* System interrupt gate */
+#define SYSG(_vector, _addr)   \
+   G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+
+/* Interrupt gate with interrupt stack */
+#define ISTG(_vector, _addr, _ist) \
+   G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+
+/* Task gate */
+#define TSKG(_vector, _gdt)\
+   G(_vector, NULL, DEFAULT_STACK, GATE_TASK, DPL0, _gdt << 3)
+
 /* Must be page-aligned because the real IDT is used in a fixmap. */
 gate_desc idt_table[IDT_ENTRIES] __page_aligned_bss;
 
@@ -25,6 +66,32 @@ const struct desc_ptr debug_idt_descr =
 };
 #endif
 
+static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
+{
+   unsigned long addr = (unsigned long) d->addr;
+
+   gate->offset_low= (u16) addr;
+   gate->segment   = (u16) d->segment;
+   gate->bits  = d->bits;
+   gate->offset_middle = (u16) (addr >> 16);
+#ifdef CONFIG_X86_64
+   gate->offset_high   = (u32) (addr >> 32);
+   gate->reserved  = 0;
+#endif
+}
+
+static __init void
+idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size)
+{
+   gate_desc desc;
+
+   for (; size > 0; t++, size--) {
+   idt_init_desc(&desc, t);
+   set_bit(t->vector, used_vectors);
+   write_idt_entry(idt, t->vector, &desc);
+   }
+}
+
 /**
  * idt_setup_early_handler - Initializes the idt table with early handlers
  */




[patch V3 26/44] x86/gdt: Use bitfields for initialization

2017-08-27 Thread Thomas Gleixner
The GDT entry related code uses partially bitfields and macros which
initialize the two 16 bit parts of the entry by magic shift and mask
operations.

Clean it up and use the bitfields to initialize and access entries.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/entry/vdso/vma.c|2 -
 arch/x86/include/asm/desc.h  |   26 ++-
 arch/x86/include/asm/desc_defs.h |   44 +--
 arch/x86/math-emu/fpu_system.h   |2 -
 4 files changed, 38 insertions(+), 36 deletions(-)

--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -351,7 +351,7 @@ static void vgetcpu_cpu_init(void *arg)
 * and 8 bits for the node)
 */
d.limit0 = cpu | ((node & 0xf) << 12);
-   d.limit = node >> 4;
+   d.limit1 = node >> 4;
d.type = 5; /* RO data, expand down, accessed */
d.dpl = 3;  /* Visible to user code */
d.s = 1;/* Not a system segment */
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -23,7 +23,7 @@ static inline void fill_ldt(struct desc_
desc->s = 1;
desc->dpl   = 0x3;
desc->p = info->seg_not_present ^ 1;
-   desc->limit = (info->limit & 0xf) >> 16;
+   desc->limit1= (info->limit & 0xf) >> 16;
desc->avl   = info->useable;
desc->d = info->seg_32bit;
desc->g = info->limit_in_pages;
@@ -170,14 +170,20 @@ static inline void pack_descriptor(struc
   unsigned long limit, unsigned char type,
   unsigned char flags)
 {
-   desc->a = ((base & 0x) << 16) | (limit & 0x);
-   desc->b = (base & 0xff00) | ((base & 0xff) >> 16) |
-   (limit & 0x000f) | ((type & 0xff) << 8) |
-   ((flags & 0xf) << 20);
-   desc->p = 1;
+   desc->limit0= (u16) limit;
+   desc->base0 = (u16) base;
+   desc->base1 = (base >> 16) & 0xFF;
+   desc->type  = type & 0x0F;
+   desc->s = 0;
+   desc->dpl   = 0;
+   desc->p = 1;
+   desc->limit1= (limit >> 16) & 0xF;
+   desc->avl   = (flags >> 0) & 0x01;
+   desc->l = (flags >> 1) & 0x01;
+   desc->d = (flags >> 2) & 0x01;
+   desc->g = (flags >> 3) & 0x01;
 }
 
-
 static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 unsigned type, unsigned size)
 {
@@ -195,7 +201,7 @@ static inline void set_tssldt_descriptor
desc->base2 = (addr >> 24) & 0xFF;
desc->base3 = (u32) (addr >> 32);
 #else
-   pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
+   pack_descriptor((struct desc_struct *)d, addr, size, type, 0);
 #endif
 }
 
@@ -395,13 +401,13 @@ static inline void set_desc_base(struct
 
 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
 {
-   return desc->limit0 | (desc->limit << 16);
+   return desc->limit0 | (desc->limit1 << 16);
 }
 
 static inline void set_desc_limit(struct desc_struct *desc, unsigned long 
limit)
 {
desc->limit0 = limit & 0x;
-   desc->limit = (limit >> 16) & 0xf;
+   desc->limit1 = (limit >> 16) & 0xf;
 }
 
 #ifdef CONFIG_X86_64
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -11,34 +11,30 @@
 
 #include 
 
-/*
- * FIXME: Accessing the desc_struct through its fields is more elegant,
- * and should be the one valid thing to do. However, a lot of open code
- * still touches the a and b accessors, and doing this allow us to do it
- * incrementally. We keep the signature as a struct, rather than a union,
- * so we can get rid of it transparently in the future -- glommer
- */
 /* 8 byte segment descriptor */
 struct desc_struct {
-   union {
-   struct {
-   unsigned int a;
-   unsigned int b;
-   };
-   struct {
-   u16 limit0;
-   u16 base0;
-   unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
-   unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
-   };
-   };
+   u16 limit0;
+   u16 base0;
+   u16 base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+   u16 limit1: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
 } __attribute__((packed));
 
-#define GDT_ENTRY_INIT(flags, base, limit) { { { \
-   .a = ((limit) & 0x) | (((base) & 0x) << 16), \
-   .b = (((base) & 0xff) >> 16) | (((flags) & 0xf0ff) << 8) | \
-   ((limit) & 0xf) | ((base) & 0xff00), \
-

[patch V3 37/44] x86/idt: Move ist stack based traps to table init

2017-08-27 Thread Thomas Gleixner
Initialize the IST based traps via a table

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |2 ++
 arch/x86/kernel/idt.c   |   22 ++
 arch/x86/kernel/traps.c |9 +
 3 files changed, 25 insertions(+), 8 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -509,9 +509,11 @@ extern void idt_setup_early_traps(void);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
+extern void idt_setup_ist_traps(void);
 extern void idt_setup_debugidt_traps(void);
 #else
 static inline void idt_setup_early_pf(void) { }
+static inline void idt_setup_ist_traps(void) { }
 static inline void idt_setup_debugidt_traps(void) { }
 #endif
 
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -92,6 +92,20 @@ struct desc_ptr idt_descr __ro_after_ini
 gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
 
 /*
+ * The exceptions which use Interrupt stacks. They are setup after
+ * cpu_init() when the TSS has been initialized.
+ */
+static const __initdata struct idt_data ist_idts[] = {
+   ISTG(X86_TRAP_DB,   debug,  DEBUG_STACK),
+   ISTG(X86_TRAP_NMI,  nmi,NMI_STACK),
+   ISTG(X86_TRAP_BP,   int3,   DEBUG_STACK),
+   ISTG(X86_TRAP_DF,   double_fault,   DOUBLEFAULT_STACK),
+#ifdef CONFIG_X86_MCE
+   ISTG(X86_TRAP_MC,   &machine_check, MCE_STACK),
+#endif
+};
+
+/*
  * Override for the debug_idt. Same as the default, but with interrupt
  * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
  */
@@ -158,6 +172,14 @@ void __init idt_setup_early_pf(void)
 }
 
 /**
+ * idt_setup_ist_traps - Initialize the idt table with traps using IST
+ */
+void __init idt_setup_ist_traps(void)
+{
+   idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts));
+}
+
+/**
  * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
  */
 void __init idt_setup_debugidt_traps(void)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -979,14 +979,7 @@ void __init trap_init(void)
 */
cpu_init();
 
-   /*
-* X86_TRAP_DB and X86_TRAP_BP have been set
-* in early_trap_init(). However, ITS works only after
-* cpu_init() loads TSS. See comments in early_trap_init().
-*/
-   set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
-   /* int3 can be called from all */
-   set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+   idt_setup_ist_traps();
 
x86_init.irqs.trap_init();
 




Re: [PATCH net-next v2 05/14] net: mvpp2: do not force the link mode

2017-08-27 Thread Antoine Tenart
Hi Russell,

On Fri, Aug 25, 2017 at 11:43:13PM +0100, Russell King - ARM Linux wrote:
> On Fri, Aug 25, 2017 at 04:48:12PM +0200, Antoine Tenart wrote:
> > The link mode (speed, duplex) was forced based on what the phylib
> > returns. This should not be the case, and only forced by ethtool
> > functions manually. This patch removes the link mode enforcement from
> > the phylib link_event callback.
> 
> So how does RGMII work (which has no in-band signalling between the PHY
> and MAC)?
> 
> phylib expects the network driver to configure it according to the PHY
> state at link_event time - I think you need to explain more why you
> think that this is not necessary.

Good catch, this won't work properly with RGMII. This could be done
out-of-band according to the spec, but that would use PHY polling and we
do not want that (the same concern was raised by Andrew on another
patch).

I'll keep this mode enforcement for RGMII then.

Thanks!
Antoine

-- 
Antoine Ténart, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature


[patch V3 39/44] x86/idt: Move APIC gate initialization to tables

2017-08-27 Thread Thomas Gleixner
Replace the APIC/SMP vector gate initialization with the table based
mechanism.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |1 
 arch/x86/kernel/idt.c   |   48 ++
 arch/x86/kernel/irqinit.c   |   69 
 3 files changed, 50 insertions(+), 68 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -507,6 +507,7 @@ static inline void load_current_idt(void
 extern void idt_setup_early_handler(void);
 extern void idt_setup_early_traps(void);
 extern void idt_setup_traps(void);
+extern void idt_setup_apic_and_irq_gates(void);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -103,6 +103,46 @@ static const __initdata struct idt_data
 #endif
 };
 
+/*
+ * The APIC and SMP idt entries
+ */
+static const __initdata struct idt_data apic_idts[] = {
+#ifdef CONFIG_SMP
+   INTG(RESCHEDULE_VECTOR, reschedule_interrupt),
+   INTG(CALL_FUNCTION_VECTOR,  call_function_interrupt),
+   INTG(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt),
+   INTG(IRQ_MOVE_CLEANUP_VECTOR,   irq_move_cleanup_interrupt),
+   INTG(REBOOT_VECTOR, reboot_interrupt),
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
+   INTG(THERMAL_APIC_VECTOR,   thermal_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_THRESHOLD
+   INTG(THRESHOLD_APIC_VECTOR, threshold_interrupt),
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+   INTG(DEFERRED_ERROR_VECTOR, deferred_error_interrupt),
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+   INTG(LOCAL_TIMER_VECTOR,apic_timer_interrupt),
+   INTG(X86_PLATFORM_IPI_VECTOR,   x86_platform_ipi),
+# ifdef CONFIG_HAVE_KVM
+   INTG(POSTED_INTR_VECTOR,kvm_posted_intr_ipi),
+   INTG(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi),
+   INTG(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi),
+# endif
+# ifdef CONFIG_IRQ_WORK
+   INTG(IRQ_WORK_VECTOR,   irq_work_interrupt),
+# endif
+   INTG(SPURIOUS_APIC_VECTOR,  spurious_interrupt),
+   INTG(ERROR_APIC_VECTOR, error_interrupt),
+#endif
+};
+
 #ifdef CONFIG_X86_64
 /*
  * Early traps running on the DEFAULT_STACK because the other interrupt
@@ -242,6 +282,14 @@ void __init idt_setup_debugidt_traps(voi
 #endif
 
 /**
+ * idt_setup_apic_and_irq_gates - Setup APIC/SMP and normal interrupt gates
+ */
+void __init idt_setup_apic_and_irq_gates(void)
+{
+   idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts));
+}
+
+/**
  * idt_setup_early_handler - Initializes the idt table with early handlers
  */
 void __init idt_setup_early_handler(void)
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -87,73 +87,6 @@ void __init init_IRQ(void)
x86_init.irqs.intr_init();
 }
 
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-   /*
-* The reschedule interrupt is a CPU-to-CPU reschedule-helper
-* IPI, driven by wakeup.
-*/
-   alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-   /* IPI for generic function call */
-   alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-   /* IPI for generic single function call */
-   alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-   call_function_single_interrupt);
-
-   /* Low priority IPI to cleanup after moving an irq */
-   set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-   set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-
-   /* IPI used for rebooting/stopping */
-   alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif /* CONFIG_SMP */
-}
-
-static void __init apic_intr_init(void)
-{
-   smp_intr_init();
-
-#ifdef CONFIG_X86_THERMAL_VECTOR
-   alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
-   alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-   alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt);
-#endif
-
-#ifdef CONFIG_X86_LOCAL_APIC
-   /* self generated IPI for local APIC timer */
-   alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-   /* IPI for X86 platform specific use */
-   alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
-#ifdef CONFIG_HAVE_KVM
-   /* IPI for KVM to deliver posted interrupt */
-   alloc_intr_gate(POSTED_INTR_VECTOR, kvm_posted_intr_ipi);
-   /* IPI for KVM to deliver interrupt to wake up tasks */
-   alloc_intr_gate(POSTED_INTR_WAKEUP_VECTOR, kvm_posted_intr_wakeup_ipi);
-   /* IPI for KVM to deliver nested posted interrupt */
-   alloc_intr_gate(POSTED_INTR_NESTED_VECTOR, kvm_posted_intr_nested_ipi);
-#endif
-
-   /* IPI vectors for APIC spurious and error interrupts */
-   alloc_intr_gate(SPURIOUS_APIC_VECTOR, spuriou

Re: [RFC] workqueue: remove manual lockdep uses to detect deadlocks

2017-08-27 Thread Peter Zijlstra
On Fri, Aug 25, 2017 at 05:41:03PM +0900, Byungchul Park wrote:
> Hello all,
> 
> This is _RFC_.
> 
> I want to request for comments about if it's reasonable conceptually. If
> yes, I want to resend after working it more carefully.
> 
> Could you let me know your opinions about this?
> 
> ->8-
> From 448360c343477fff63df766544eec4620657a59e Mon Sep 17 00:00:00 2001
> From: Byungchul Park 
> Date: Fri, 25 Aug 2017 17:35:07 +0900
> Subject: [RFC] workqueue: remove manual lockdep uses to detect deadlocks
> 
> We introduced the following commit to detect deadlocks caused by
> wait_for_completion() in flush_{workqueue, work}() and other locks. But
> now LOCKDEP_COMPLETIONS is introduced, such works are automatically done
> by LOCKDEP_COMPLETIONS. So it doesn't have to be done manually anymore.
> Removed it.
> 

No.. the existing annotation is strictly better because it will _always_
warn. It doesn't need to first observe things just right.


[patch V3 36/44] x86/idt: Move debug stack init to table based

2017-08-27 Thread Thomas Gleixner
Add the debug_idt init table and make use of it.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |2 ++
 arch/x86/kernel/idt.c   |   23 +++
 arch/x86/kernel/traps.c |6 +-
 3 files changed, 26 insertions(+), 5 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -509,8 +509,10 @@ extern void idt_setup_early_traps(void);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
+extern void idt_setup_debugidt_traps(void);
 #else
 static inline void idt_setup_early_pf(void) { }
+static inline void idt_setup_debugidt_traps(void) { }
 #endif
 
 extern void idt_invalidate(void *addr);
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -68,6 +68,15 @@ static const __initdata struct idt_data
 static const __initdata struct idt_data early_pf_idts[] = {
INTG(X86_TRAP_PF,   page_fault),
 };
+
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
+static const __initdata struct idt_data dbg_idts[] = {
+   INTG(X86_TRAP_DB,   debug),
+   INTG(X86_TRAP_BP,   int3),
+};
 #endif
 
 /* Must be page-aligned because the real IDT is used in a fixmap. */
@@ -82,6 +91,10 @@ struct desc_ptr idt_descr __ro_after_ini
 /* No need to be aligned, but done to keep all IDTs defined the same way. */
 gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
 
+/*
+ * Override for the debug_idt. Same as the default, but with interrupt
+ * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
+ */
 const struct desc_ptr debug_idt_descr = {
.size   = IDT_ENTRIES * 16 - 1,
.address= (unsigned long) debug_idt_table,
@@ -143,6 +156,16 @@ void __init idt_setup_early_pf(void)
idt_setup_from_table(idt_table, early_pf_idts,
 ARRAY_SIZE(early_pf_idts));
 }
+
+/**
+ * idt_setup_debugidt_traps - Initialize the debug idt table with debug traps
+ */
+void __init idt_setup_debugidt_traps(void)
+{
+   memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
+
+   idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts));
+}
 #endif
 
 /**
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -990,9 +990,5 @@ void __init trap_init(void)
 
x86_init.irqs.trap_init();
 
-#ifdef CONFIG_X86_64
-   memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
-   set_nmi_gate(X86_TRAP_DB, &debug);
-   set_nmi_gate(X86_TRAP_BP, &int3);
-#endif
+   idt_setup_debugidt_traps();
 }




[patch V3 40/44] x86/idt: Move interrupt gate initialization to IDT code

2017-08-27 Thread Thomas Gleixner
Move the gate intialization from interrupt init to the IDT code so all IDT
related operations are at a single place.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/idt.c |   18 ++
 arch/x86/kernel/irqinit.c |   18 --
 2 files changed, 18 insertions(+), 18 deletions(-)

--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -286,7 +286,25 @@ void __init idt_setup_debugidt_traps(voi
  */
 void __init idt_setup_apic_and_irq_gates(void)
 {
+   int i = FIRST_EXTERNAL_VECTOR;
+   void *entry;
+
idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts));
+
+   for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+   entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+   set_intr_gate(i, entry);
+   }
+
+   for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifdef CONFIG_X86_LOCAL_APIC
+   set_bit(i, used_vectors);
+   set_intr_gate(i, spurious_interrupt);
+#else
+   entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
+   set_intr_gate(i, entry);
+#endif
+   }
 }
 
 /**
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -89,29 +89,11 @@ void __init init_IRQ(void)
 
 void __init native_init_IRQ(void)
 {
-   int i;
-
/* Execute any quirks before the call gates are initialised: */
x86_init.irqs.pre_vector_init();
 
idt_setup_apic_and_irq_gates();
 
-   /*
-* Cover the whole vector space, no vector can escape
-* us. (some of these will be overridden and become
-* 'special' SMP interrupts)
-*/
-   i = FIRST_EXTERNAL_VECTOR;
-   for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
-   /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-   set_intr_gate(i, irq_entries_start +
-   8 * (i - FIRST_EXTERNAL_VECTOR));
-   }
-#ifdef CONFIG_X86_LOCAL_APIC
-   for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
-   set_intr_gate(i, spurious_interrupt);
-#endif
-
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
 




[patch V3 38/44] x86/idt: Move regular trap init to tables

2017-08-27 Thread Thomas Gleixner
Initialize the regular traps with a table.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |1 
 arch/x86/kernel/idt.c   |   51 
 arch/x86/kernel/traps.c |   41 ---
 3 files changed, 53 insertions(+), 40 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -506,6 +506,7 @@ static inline void load_current_idt(void
 
 extern void idt_setup_early_handler(void);
 extern void idt_setup_early_traps(void);
+extern void idt_setup_traps(void);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -60,6 +60,49 @@ static const __initdata struct idt_data
 #endif
 };
 
+/*
+ * The default IDT entries which are set up in trap_init() before
+ * cpu_init() is invoked. Interrupt stacks cannot be used at that point and
+ * the traps which use them are reinitialized with IST after cpu_init() has
+ * set up TSS.
+ */
+static const __initdata struct idt_data def_idts[] = {
+   INTG(X86_TRAP_DE,   divide_error),
+   INTG(X86_TRAP_NMI,  nmi),
+   INTG(X86_TRAP_BR,   bounds),
+   INTG(X86_TRAP_UD,   invalid_op),
+   INTG(X86_TRAP_NM,   device_not_available),
+   INTG(X86_TRAP_OLD_MF,   coprocessor_segment_overrun),
+   INTG(X86_TRAP_TS,   invalid_TSS),
+   INTG(X86_TRAP_NP,   segment_not_present),
+   INTG(X86_TRAP_SS,   stack_segment),
+   INTG(X86_TRAP_GP,   general_protection),
+   INTG(X86_TRAP_SPURIOUS, spurious_interrupt_bug),
+   INTG(X86_TRAP_MF,   coprocessor_error),
+   INTG(X86_TRAP_AC,   alignment_check),
+   INTG(X86_TRAP_XF,   simd_coprocessor_error),
+
+#ifdef CONFIG_X86_32
+   TSKG(X86_TRAP_DF,   GDT_ENTRY_DOUBLEFAULT_TSS),
+#else
+   INTG(X86_TRAP_DF,   double_fault),
+#endif
+   INTG(X86_TRAP_DB,   debug),
+   INTG(X86_TRAP_NMI,  nmi),
+   INTG(X86_TRAP_BP,   int3),
+
+#ifdef CONFIG_X86_MCE
+   INTG(X86_TRAP_MC,   &machine_check),
+#endif
+
+   SYSG(X86_TRAP_OF,   overflow),
+#if defined(CONFIG_IA32_EMULATION)
+   SYSG(IA32_SYSCALL_VECTOR,   entry_INT80_compat),
+#elif defined(CONFIG_X86_32)
+   SYSG(IA32_SYSCALL_VECTOR,   entry_INT80_32),
+#endif
+};
+
 #ifdef CONFIG_X86_64
 /*
  * Early traps running on the DEFAULT_STACK because the other interrupt
@@ -154,6 +197,14 @@ void __init idt_setup_early_traps(void)
load_idt(&idt_descr);
 }
 
+/**
+ * idt_setup_traps - Initialize the idt table with default traps
+ */
+void __init idt_setup_traps(void)
+{
+   idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts));
+}
+
 #ifdef CONFIG_X86_64
 /**
  * idt_setup_early_pf - Initialize the idt table with early pagefault handler
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -925,46 +925,7 @@ dotraplinkage void do_iret_error(struct
 
 void __init trap_init(void)
 {
-   int i;
-
-   set_intr_gate(X86_TRAP_DE, divide_error);
-   set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
-   /* int4 can be called from all */
-   set_system_intr_gate(X86_TRAP_OF, &overflow);
-   set_intr_gate(X86_TRAP_BR, bounds);
-   set_intr_gate(X86_TRAP_UD, invalid_op);
-   set_intr_gate(X86_TRAP_NM, device_not_available);
-#ifdef CONFIG_X86_32
-   set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
-#else
-   set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
-#endif
-   set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
-   set_intr_gate(X86_TRAP_TS, invalid_TSS);
-   set_intr_gate(X86_TRAP_NP, segment_not_present);
-   set_intr_gate(X86_TRAP_SS, stack_segment);
-   set_intr_gate(X86_TRAP_GP, general_protection);
-   set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
-   set_intr_gate(X86_TRAP_MF, coprocessor_error);
-   set_intr_gate(X86_TRAP_AC, alignment_check);
-#ifdef CONFIG_X86_MCE
-   set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
-#endif
-   set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
-
-   /* Reserve all the builtin and the syscall vector: */
-   for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
-   set_bit(i, used_vectors);
-
-#ifdef CONFIG_IA32_EMULATION
-   set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
-   set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
-
-#ifdef CONFIG_X86_32
-   set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
-   set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#endif
+   idt_setup_traps();
 
/*
 * Set the IDT descriptor to a fixed read-only location, so that the




[patch V3 27/44] x86/ldttss: Cleanup 32bit descriptors

2017-08-27 Thread Thomas Gleixner
Like the IDT descriptors the LDT/TSS descriptors are pointlessly different
on 32 and 64 bit.

Unify them and get rid of the duplicated code.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h  |   26 +++---
 arch/x86/include/asm/desc_defs.h |   27 ---
 2 files changed, 15 insertions(+), 38 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -166,42 +166,22 @@ native_write_gdt_entry(struct desc_struc
memcpy(&gdt[entry], desc, size);
 }
 
-static inline void pack_descriptor(struct desc_struct *desc, unsigned long 
base,
-  unsigned long limit, unsigned char type,
-  unsigned char flags)
-{
-   desc->limit0= (u16) limit;
-   desc->base0 = (u16) base;
-   desc->base1 = (base >> 16) & 0xFF;
-   desc->type  = type & 0x0F;
-   desc->s = 0;
-   desc->dpl   = 0;
-   desc->p = 1;
-   desc->limit1= (limit >> 16) & 0xF;
-   desc->avl   = (flags >> 0) & 0x01;
-   desc->l = (flags >> 1) & 0x01;
-   desc->d = (flags >> 2) & 0x01;
-   desc->g = (flags >> 3) & 0x01;
-}
-
 static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 unsigned type, unsigned size)
 {
-#ifdef CONFIG_X86_64
-   struct ldttss_desc64 *desc = d;
+   struct ldttss_desc *desc = d;
 
memset(desc, 0, sizeof(*desc));
 
-   desc->limit0= size & 0x;
+   desc->limit0= (u16) size;
desc->base0 = (u16) addr;
desc->base1 = (addr >> 16) & 0xFF;
desc->type  = type;
desc->p = 1;
desc->limit1= (size >> 16) & 0xF;
desc->base2 = (addr >> 24) & 0xFF;
+#ifdef CONFIG_X86_64
desc->base3 = (u32) (addr >> 32);
-#else
-   pack_descriptor((struct desc_struct *)d, addr, size, type, 0);
 #endif
 }
 
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -49,24 +49,21 @@ enum {
DESCTYPE_S = 0x10,  /* !system */
 };
 
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct ldttss_desc64 {
-   u16 limit0;
-   u16 base0;
-   unsigned base1 : 8, type : 5, dpl : 2, p : 1;
-   unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
-   u32 base3;
-   u32 zero1;
-} __attribute__((packed));
-
+/* LDT or TSS descriptor in the GDT. */
+struct ldttss_desc {
+   u16 limit0;
+   u16 base0;
 
+   u16 base1 : 8, type : 5, dpl : 2, p : 1;
+   u16 limit1 : 4, zero0 : 3, g : 1, base2 : 8;
 #ifdef CONFIG_X86_64
-typedef struct ldttss_desc64 ldt_desc;
-typedef struct ldttss_desc64 tss_desc;
-#else
-typedef struct desc_struct ldt_desc;
-typedef struct desc_struct tss_desc;
+   u32 base3;
+   u32 zero1;
 #endif
+} __attribute__((packed));
+
+typedef struct ldttss_desc ldt_desc;
+typedef struct ldttss_desc tss_desc;
 
 struct idt_bits {
u16 ist : 3,




[patch V3 42/44] x86/idt: Deinline setup functions

2017-08-27 Thread Thomas Gleixner
Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |   37 ++---
 arch/x86/kernel/idt.c   |   43 ++-
 2 files changed, 36 insertions(+), 44 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -390,44 +390,11 @@ static inline void set_desc_limit(struct
desc->limit1 = (limit >> 16) & 0xf;
 }
 
-static inline void _set_gate(int gate, unsigned type, const void *addr,
-unsigned dpl, unsigned ist, unsigned seg)
-{
-   gate_desc s;
-
-   pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
-   /*
-* does not need to be atomic because it is only done once at
-* setup time
-*/
-   write_idt_entry(idt_table, gate, &s);
-}
-
-static inline void set_intr_gate(unsigned int n, const void *addr)
-{
-   BUG_ON(n > 0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
-}
+void set_intr_gate(unsigned int n, const void *addr);
+void alloc_intr_gate(unsigned int n, const void *addr);
 
 extern unsigned long used_vectors[];
 
-static inline void alloc_system_vector(int vector)
-{
-   BUG_ON(vector < FIRST_SYSTEM_VECTOR);
-   if (!test_bit(vector, used_vectors)) {
-   set_bit(vector, used_vectors);
-   } else {
-   BUG();
-   }
-}
-
-#define alloc_intr_gate(n, addr)   \
-   do {\
-   alloc_system_vector(n); \
-   set_intr_gate(n, addr); \
-   } while (0)
-
-
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u32, debug_idt_ctr);
 static inline bool is_debug_idt_enabled(void)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -212,15 +212,16 @@ static inline void idt_init_desc(gate_de
 #endif
 }
 
-static __init void
-idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size)
+static void
+idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool 
sys)
 {
gate_desc desc;
 
for (; size > 0; t++, size--) {
idt_init_desc(&desc, t);
-   set_bit(t->vector, used_vectors);
write_idt_entry(idt, t->vector, &desc);
+   if (sys)
+   set_bit(t->vector, used_vectors);
}
 }
 
@@ -233,7 +234,8 @@ idt_setup_from_table(gate_desc *idt, con
  */
 void __init idt_setup_early_traps(void)
 {
-   idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts));
+   idt_setup_from_table(idt_table, early_idts, ARRAY_SIZE(early_idts),
+true);
load_idt(&idt_descr);
 }
 
@@ -242,7 +244,7 @@ void __init idt_setup_early_traps(void)
  */
 void __init idt_setup_traps(void)
 {
-   idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts));
+   idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true);
 }
 
 #ifdef CONFIG_X86_64
@@ -259,7 +261,7 @@ void __init idt_setup_traps(void)
 void __init idt_setup_early_pf(void)
 {
idt_setup_from_table(idt_table, early_pf_idts,
-ARRAY_SIZE(early_pf_idts));
+ARRAY_SIZE(early_pf_idts), true);
 }
 
 /**
@@ -267,7 +269,7 @@ void __init idt_setup_early_pf(void)
  */
 void __init idt_setup_ist_traps(void)
 {
-   idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts));
+   idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
 }
 
 /**
@@ -277,7 +279,7 @@ void __init idt_setup_debugidt_traps(voi
 {
memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
 
-   idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts));
+   idt_setup_from_table(debug_idt_table, dbg_idts, ARRAY_SIZE(dbg_idts), 
false);
 }
 #endif
 
@@ -289,7 +291,7 @@ void __init idt_setup_apic_and_irq_gates
int i = FIRST_EXTERNAL_VECTOR;
void *entry;
 
-   idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts));
+   idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
 
for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
@@ -333,3 +335,26 @@ void idt_invalidate(void *addr)
 
load_idt(&idt);
 }
+
+void set_intr_gate(unsigned int n, const void *addr)
+{
+   struct idt_data data;
+
+   BUG_ON(n > 0xFF);
+
+   memset(&data, 0, sizeof(data));
+   data.vector = n;
+   data.addr   = addr;
+   data.segment= __KERNEL_CS;
+   data.bits.type  = GATE_INTERRUPT;
+   data.bits.p = 1;
+
+   idt_setup_from_table(idt_table, &data, 1, false);
+}
+
+void alloc_intr_gate(unsigned int n, const void *addr)
+{
+   BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR);
+   set_bit(n, used_vectors);
+   set_intr_gate(n, addr);
+}

[patch V3 41/44] x86/idt: Remove unused functions/inlines

2017-08-27 Thread Thomas Gleixner
The IDT related inlines are not longer used. Remove them.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/include/asm/desc.h |   36 
 1 file changed, 36 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -390,16 +390,6 @@ static inline void set_desc_limit(struct
desc->limit1 = (limit >> 16) & 0xf;
 }
 
-#ifdef CONFIG_X86_64
-static inline void set_nmi_gate(int gate, void *addr)
-{
-   gate_desc s;
-
-   pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
-   write_idt_entry(debug_idt_table, gate, &s);
-}
-#endif
-
 static inline void _set_gate(int gate, unsigned type, const void *addr,
 unsigned dpl, unsigned ist, unsigned seg)
 {
@@ -437,32 +427,6 @@ static inline void alloc_system_vector(i
set_intr_gate(n, addr); \
} while (0)
 
-/*
- * This routine sets up an interrupt gate at directory privilege level 3.
- */
-static inline void set_system_intr_gate(unsigned int n, void *addr)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
-}
-
-static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
-}
-
-static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
-}
-
-static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
-{
-   BUG_ON((unsigned)n > 0xFF);
-   _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
-}
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u32, debug_idt_ctr);




[patch V3 44/44] x86/idt: Hide set_intr_gate()

2017-08-27 Thread Thomas Gleixner
set_intr_gate() is an internal function of the IDT code. The only user left
is the KVM code which replaces the pagefault handler eventually.

Provide an explicit update_intr_gate() function and make set_intr_gate()
static. While at it replace the magic number 14 in the KVM code with the
proper trap define.

Signed-off-by: Thomas Gleixner 
Acked-by: Paolo Bonzini 
---
 arch/x86/include/asm/desc.h |2 +-
 arch/x86/kernel/idt.c   |   33 -
 arch/x86/kernel/kvm.c   |2 +-
 3 files changed, 22 insertions(+), 15 deletions(-)

--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -390,7 +390,7 @@ static inline void set_desc_limit(struct
desc->limit1 = (limit >> 16) & 0xf;
 }
 
-void set_intr_gate(unsigned int n, const void *addr);
+void update_intr_gate(unsigned int n, const void *addr);
 void alloc_intr_gate(unsigned int n, const void *addr);
 
 extern unsigned long used_vectors[];
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -225,6 +225,22 @@ idt_setup_from_table(gate_desc *idt, con
}
 }
 
+static void set_intr_gate(unsigned int n, const void *addr)
+{
+   struct idt_data data;
+
+   BUG_ON(n > 0xFF);
+
+   memset(&data, 0, sizeof(data));
+   data.vector = n;
+   data.addr   = addr;
+   data.segment= __KERNEL_CS;
+   data.bits.type  = GATE_INTERRUPT;
+   data.bits.p = 1;
+
+   idt_setup_from_table(idt_table, &data, 1, false);
+}
+
 /**
  * idt_setup_early_traps - Initialize the idt table with early traps
  *
@@ -336,20 +352,11 @@ void idt_invalidate(void *addr)
load_idt(&idt);
 }
 
-void set_intr_gate(unsigned int n, const void *addr)
+void __init update_intr_gate(unsigned int n, const void *addr)
 {
-   struct idt_data data;
-
-   BUG_ON(n > 0xFF);
-
-   memset(&data, 0, sizeof(data));
-   data.vector = n;
-   data.addr   = addr;
-   data.segment= __KERNEL_CS;
-   data.bits.type  = GATE_INTERRUPT;
-   data.bits.p = 1;
-
-   idt_setup_from_table(idt_table, &data, 1, false);
+   if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+   return;
+   set_intr_gate(n, addr);
 }
 
 void alloc_intr_gate(unsigned int n, const void *addr)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -455,7 +455,7 @@ static int kvm_cpu_down_prepare(unsigned
 
 static void __init kvm_apf_trap_init(void)
 {
-   set_intr_gate(14, async_page_fault);
+   update_intr_gate(X86_TRAP_PF, async_page_fault);
 }
 
 void __init kvm_guest_init(void)




[patch V3 24/44] x86/fpu: Use bitfield accessors for desc_struct

2017-08-27 Thread Thomas Gleixner
desc_struct is a union of u32 fields and bitfields. The access to the u32
fields is done with magic macros.

Convert it to use the bitfields and replace the macro magic with parseable
inline functions.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/math-emu/fpu_entry.c   |   11 -
 arch/x86/math-emu/fpu_system.h  |   48 ++--
 arch/x86/math-emu/get_address.c |   17 +++---
 3 files changed, 51 insertions(+), 25 deletions(-)

--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -147,7 +147,7 @@ void math_emulate(struct math_emu_info *
}
 
code_descriptor = FPU_get_ldt_descriptor(FPU_CS);
-   if (SEG_D_SIZE(code_descriptor)) {
+   if (code_descriptor.d) {
/* The above test may be wrong, the book is not clear */
/* Segmented 32 bit protected mode */
addr_modes.default_mode = SEG32;
@@ -155,11 +155,10 @@ void math_emulate(struct math_emu_info *
/* 16 bit protected mode */
addr_modes.default_mode = PM16;
}
-   FPU_EIP += code_base = SEG_BASE_ADDR(code_descriptor);
-   code_limit = code_base
-   + (SEG_LIMIT(code_descriptor) +
-  1) * SEG_GRANULARITY(code_descriptor)
-   - 1;
+   FPU_EIP += code_base = seg_get_base(&code_descriptor);
+   code_limit = seg_get_limit(&code_descriptor) + 1;
+   code_limit *= seg_get_granularity(&code_descriptor);
+   code_limit += code_base - 1;
if (code_limit < code_base)
code_limit = 0x;
}
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -34,17 +34,43 @@ static inline struct desc_struct FPU_get
return ret;
 }
 
-#define SEG_D_SIZE(x)  ((x).b & (3 << 21))
-#define SEG_G_BIT(x)   ((x).b & (1 << 23))
-#define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1)
-#define SEG_286_MODE(x)((x).b & ( 0xff00 | 0xf | (1 << 
23)))
-#define SEG_BASE_ADDR(s)   (((s).b & 0xff00) \
-| (((s).b & 0xff) << 16) | ((s).a >> 16))
-#define SEG_LIMIT(s)   (((s).b & 0xff) | ((s).a & 0x))
-#define SEG_EXECUTE_ONLY(s)(((s).b & ((1 << 11) | (1 << 9))) == (1 << 11))
-#define SEG_WRITE_PERM(s)  (((s).b & ((1 << 11) | (1 << 9))) == (1 << 9))
-#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
-== (1 << 10))
+#define SEG_TYPE_WRITABLE  (1U << 1)
+#define SEG_TYPE_EXPANDS_DOWN  (1U << 2)
+#define SEG_TYPE_EXECUTE   (1U << 3)
+#define SEG_TYPE_EXPAND_MASK   (SEG_TYPE_EXPANDS_DOWN | SEG_TYPE_EXECUTE)
+#define SEG_TYPE_EXECUTE_MASK  (SEG_TYPE_WRITABLE | SEG_TYPE_EXECUTE)
+
+static inline unsigned long seg_get_base(struct desc_struct *d)
+{
+   unsigned long base = (unsigned long)d->base2 << 24;
+
+   return base | ((unsigned long)d->base1 << 16) | d->base0;
+}
+
+static inline unsigned long seg_get_limit(struct desc_struct *d)
+{
+   return ((unsigned long)d->limit << 16) | d->limit0;
+}
+
+static inline unsigned long seg_get_granularity(struct desc_struct *d)
+{
+   return d->g ? 4096 : 1;
+}
+
+static inline bool seg_expands_down(struct desc_struct *d)
+{
+   return (d->type & SEG_TYPE_EXPAND_MASK) == SEG_TYPE_EXPANDS_DOWN;
+}
+
+static inline bool seg_execute_only(struct desc_struct *d)
+{
+   return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_EXECUTE;
+}
+
+static inline bool seg_writable(struct desc_struct *d)
+{
+   return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
+}
 
 #define I387   (¤t->thread.fpu.state)
 #define FPU_info   (I387->soft.info)
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -159,17 +159,18 @@ static long pm_address(u_char FPU_modrm,
}
 
descriptor = FPU_get_ldt_descriptor(addr->selector);
-   base_address = SEG_BASE_ADDR(descriptor);
+   base_address = seg_get_base(&descriptor);
address = base_address + offset;
-   limit = base_address
-   + (SEG_LIMIT(descriptor) + 1) * SEG_GRANULARITY(descriptor) - 1;
+   limit = seg_get_limit(&descriptor) + 1;
+   limit *= seg_get_granularity(&descriptor);
+   limit += base_address - 1;
if (limit < base_address)
limit = 0x;
 
-   if (SEG_EXPAND_DOWN(descriptor)) {
-   if (SEG_G_BIT(descriptor))
+   if (seg_expands_down(&descriptor)) {
+   if (descriptor.g) {
seg_top = 0x;
-   else {
+   } else {
seg_top = base_address + (1 << 20);
if (seg_top < base_address)
seg_top = 0xff

[patch V3 11/44] x86/apic: Remove the duplicated tracing versions of interrupts

2017-08-27 Thread Thomas Gleixner
The error and the spurious interrupt are really rare events and not at all
so performance sensitive that two NOP5s can not be tolerated when tracing
is disabled.

Remove the nonsense.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Steven Rostedt (VMware) 
---
 arch/x86/include/asm/hw_irq.h |4 +--
 arch/x86/kernel/apic/apic.c   |   43 +-
 2 files changed, 12 insertions(+), 35 deletions(-)

--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -48,15 +48,15 @@ extern asmlinkage void call_function_sin
 
 #ifdef CONFIG_TRACING
 /* Interrupt handlers registered during init_IRQ */
-extern void trace_error_interrupt(void);
 extern void trace_irq_work_interrupt(void);
-extern void trace_spurious_interrupt(void);
 extern void trace_thermal_interrupt(void);
 extern void trace_reschedule_interrupt(void);
 extern void trace_threshold_interrupt(void);
 extern void trace_deferred_error_interrupt(void);
 extern void trace_call_function_interrupt(void);
 extern void trace_call_function_single_interrupt(void);
+#define trace_error_interrupt error_interrupt
+#define trace_spurious_interrupt spurious_interrupt
 #define trace_x86_platform_ipi x86_platform_ipi
 #define trace_apic_timer_interrupt apic_timer_interrupt
 #define trace_irq_move_cleanup_interrupt  irq_move_cleanup_interrupt
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1899,10 +1899,14 @@ void __init register_lapic_address(unsig
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static void __smp_spurious_interrupt(u8 vector)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
 {
+   u8 vector = ~regs->orig_ax;
u32 v;
 
+   entering_irq();
+   trace_spurious_apic_entry(vector);
+
/*
 * Check if this really is a spurious interrupt and ACK it
 * if it is a vectored one.  Just in case...
@@ -1917,22 +1921,7 @@ static void __smp_spurious_interrupt(u8
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
"should never happen.\n", vector, smp_processor_id());
-}
 
-__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
-{
-   entering_irq();
-   __smp_spurious_interrupt(~regs->orig_ax);
-   exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
-{
-   u8 vector = ~regs->orig_ax;
-
-   entering_irq();
-   trace_spurious_apic_entry(vector);
-   __smp_spurious_interrupt(vector);
trace_spurious_apic_exit(vector);
exiting_irq();
 }
@@ -1940,10 +1929,8 @@ static void __smp_spurious_interrupt(u8
 /*
  * This interrupt should never happen with our APIC/SMP architecture
  */
-static void __smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
 {
-   u32 v;
-   u32 i = 0;
static const char * const error_interrupt_reason[] = {
"Send CS error",/* APIC Error Bit 0 */
"Receive CS error", /* APIC Error Bit 1 */
@@ -1954,6 +1941,10 @@ static void __smp_error_interrupt(struct
"Received illegal vector",  /* APIC Error Bit 6 */
"Illegal register address", /* APIC Error Bit 7 */
};
+   u32 v, i = 0;
+
+   entering_irq();
+   trace_error_apic_entry(ERROR_APIC_VECTOR);
 
/* First tickle the hardware, only then report what went on. -- REW */
if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */
@@ -1975,20 +1966,6 @@ static void __smp_error_interrupt(struct
 
apic_printk(APIC_DEBUG, KERN_CONT "\n");
 
-}
-
-__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
-{
-   entering_irq();
-   __smp_error_interrupt(regs);
-   exiting_irq();
-}
-
-__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
-{
-   entering_irq();
-   trace_error_apic_entry(ERROR_APIC_VECTOR);
-   __smp_error_interrupt(regs);
trace_error_apic_exit(ERROR_APIC_VECTOR);
exiting_irq();
 }




[patch V3 43/44] x86/idt: Simplify alloc_intr_gate

2017-08-27 Thread Thomas Gleixner
The only users of alloc_intr_gate() are hypervisors, which both check the
used_vectors bitmap whether they have allocated the gate already. Move that
check into alloc_intr_gate() and simplify the users.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Juergen Gross 
Cc: "K. Y. Srinivasan" 
Cc: Stephen Hemminger 
Cc: Boris Ostrovsky 
Cc: Juergen Gross 
---
 arch/x86/kernel/cpu/mshyperv.c   |9 ++---
 arch/x86/kernel/idt.c|6 +++---
 drivers/xen/events/events_base.c |6 ++
 3 files changed, 7 insertions(+), 14 deletions(-)

--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -59,13 +59,8 @@ void hyperv_vector_handler(struct pt_reg
 void hv_setup_vmbus_irq(void (*handler)(void))
 {
vmbus_handler = handler;
-   /*
-* Setup the IDT for hypervisor callback. Prevent reallocation
-* at module reload.
-*/
-   if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
-   alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
-   hyperv_callback_vector);
+   /* Setup the IDT for hypervisor callback */
+   alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 }
 
 void hv_remove_vmbus_irq(void)
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -354,7 +354,7 @@ void set_intr_gate(unsigned int n, const
 
 void alloc_intr_gate(unsigned int n, const void *addr)
 {
-   BUG_ON(test_bit(n, used_vectors) || n < FIRST_SYSTEM_VECTOR);
-   set_bit(n, used_vectors);
-   set_intr_gate(n, addr);
+   BUG_ON(n < FIRST_SYSTEM_VECTOR);
+   if (!test_and_set_bit(n, used_vectors))
+   set_intr_gate(n, addr);
 }
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -1653,10 +1653,8 @@ void xen_callback_vector(void)
return;
}
pr_info("Xen HVM callback vector for event delivery is 
enabled\n");
-   /* in the restore case the vector has already been allocated */
-   if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
-   alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
-   xen_hvm_callback_vector);
+   alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
+   xen_hvm_callback_vector);
}
 }
 #else




[patch V3 23/44] x86/percpu: Use static initializer for GDT entry

2017-08-27 Thread Thomas Gleixner
The IDT cleanup is about to remove pack_descriptor(). The GDT setup for the
percpu storage can be achieved with the static initializer as well. Replace
it.

Signed-off-by: Thomas Gleixner 
---
 arch/x86/kernel/setup_percpu.c |9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -155,13 +155,10 @@ static void __init pcpup_populate_pte(un
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
-   struct desc_struct gdt;
+   struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
+ 0xF);
 
-   pack_descriptor(&gdt, per_cpu_offset(cpu), 0xF,
-   0x2 | DESCTYPE_S, 0x8);
-   gdt.s = 1;
-   write_gdt_entry(get_cpu_gdt_rw(cpu),
-   GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+   write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
 #endif
 }
 




Re: [PATCH] zram: add zstd to the supported algorithms list

2017-08-27 Thread Minchan Kim
Hi Nick,

On Fri, Aug 25, 2017 at 07:31:14PM +, Nick Terrell wrote:
> On 8/24/17, 10:19 PM, "Minchan Kim"  wrote:
> > On Fri, Aug 25, 2017 at 01:35:35AM +, Nick Terrell wrote:
> [..]
> > > I think using dictionaries in zram could be very interesting. We could for
> > > example, take a random sample of the RAM and use that as the dictionary
> > > for compression. E.g. take 32 512B samples from RAM and build a 16 KB
> > > dictionary (sizes may vary).
> > 
> > For static option, could we create the dictionary with data in zram
> > and dump the dictionary into file. And then, rebuiling zram or kernel
> > includes the dictionary into images.
> > 
> > For it, we would need some knob like
> > 
> > cat /sys/block/zram/zstd_dict > dict.data
> > 
> > CONFIG_ZSTD_DICT_DIR=
> > CONFIG_ZSTD_DICT_FILE= 
> 
> My guess is that a static dictionary won't cut it, since different
> workloads will have drastically different RAM contents, so we won't be able
> to construct a single dictionary that works for them all. I'd love to be
> proven wrong though.

zRAM is popular for system swap in embedded world. In mobile phone,
there would be different workloads as you said but other scenario
like refrigerator, TV and so will have very specific scenario
so it would be a great to have.

> 
> > For dynamic option, could we make the dictionary with data
> > in zram dynamically? So, upcoming pages will use the newly
> > created dictionary but old compressed pages will use own dictionary.
> 
> Yeah thats totally possible on the compression side, we would just need to
> save which pages were compressed with which dictionary somewhere.

Great. We have zram->table for object based and zspage for pages unit
so I expect it wouldn't be hard to implement.

> 
> > I'm not sure it's possible, anyway, if predefined dict can help
> > comp ratio a lot in 4K data, I really love the feature and will support
> > to have it. ;)
> > 
> > > 
> > > I'm not sure how you would pass a dictionary into the crypto compression
> > > API, but I'm sure we can make something work if dictionary compression
> > > proves to be beneficial enough.
> > 
> > Yes, it would be better to integrate the feature crypto but Please, don't 
> > tie to
> > crypto API. If it's hard to support with current cypto API in short time,
> > I really want to support it with zcomp_zstd.c.
> > 
> > Please look at old zcomp model.
> > http://elixir.free-electrons.com/linux/v4.7/source/drivers/block/zram/zcomp_lz4.c
> 
> Thanks for the link, we could definitely make zcomp work with dictionaries.
> 
> > > What data have you, or anyone, used for benchmarking compression ratio 
> > > and 
> > > speed for RAM? Since it is such a specialized application, the standard
> > > compression benchmarks aren't very applicable.
> > 
> > I have used my image dumped from desktop swap device.
> > Of course, it doesn't cover all of cases in the world but it would be better
> > to use IO benchmark buffer, IMHO. :)
> 
> Since adding dictionary support won't be quite as easy as adding zstd
> support, I think the first step is building a set of benchmarks that
> represent some common real world scenarios. We can easily test different
> dictionary construction algorithms in userspace, and determine if the work
> will pay off for some workloads. I'll collect some RAM samples from my
> device and run some preliminary tests.

Sweet. I am looking forward to seeing your result.
Thanks!




Re: [PATCH net-next v2 09/14] net: mvpp2: dynamic reconfiguration of the PHY mode

2017-08-27 Thread Antoine Tenart
Hi Russell,

On Fri, Aug 25, 2017 at 11:46:16PM +0100, Russell King - ARM Linux wrote:
> On Fri, Aug 25, 2017 at 04:48:16PM +0200, Antoine Tenart wrote:
> > This patch adds logic to reconfigure the comphy/gop when the link status
> > change at runtime. This is very useful on boards such as the mcbin which
> > have SFP and Ethernet ports connected to the same MAC port: depending on
> > what the user connects the driver will automatically reconfigure the
> > link mode.
> 
> This commit commentry needs updating - as I've already pointed out in
> the previous round, the need to reconfigure things has *nothing* to do
> with there being SFP and "Ethernet" ports present.  Hence, your commit
> message is entirely misleading.

That's right. I'll update the commit message.

Thanks!
Antoine

-- 
Antoine Ténart, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature


[PATCH v4] ACPI / PMIC: Add opregion driver for Intel Dollar Cove TI PMIC

2017-08-27 Thread Takashi Iwai
This patch adds the opregion driver for Dollar Cove TI PMIC on Intel
Cherry Trail devices.  The patch is based on the original work by
Intel, found at:
  https://github.com/01org/ProductionKernelQuilts
with many cleanups and rewrites.

The driver is currently provided only as built-in to follow other
PMIC opregion drivers convention.

The re-enumeration of devices at probe is required for fixing the
issues on HP x2 210 G2.  See bug#195689.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=193891
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195689
Reviewed-by: Mika Westerberg 
Reviewed-by: Andy Shevchenko 
Signed-off-by: Takashi Iwai 
---

I'm resending only this one as v4 patch.

v3->v4:
* Rename CHTDC_* with CHT_DC_* in Kconfig/Makefile
* add cht_ prefix to the driver name string to align with others
v2->v3:
* Rename dc_ti with chtdc_ti in all places
* Driver/kconfig renames accordingly
* Constification
* Added acks by Andy and Mika
v1->v2:
* get_raw_temp cleanup in opregion driver, mention about register
  endianess

 drivers/acpi/Kconfig|   6 ++
 drivers/acpi/Makefile   |   1 +
 drivers/acpi/pmic/intel_pmic_chtdc_ti.c | 137 
 3 files changed, 144 insertions(+)
 create mode 100644 drivers/acpi/pmic/intel_pmic_chtdc_ti.c

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 1ce52f84dc23..176fae699891 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -521,6 +521,12 @@ config CHT_WC_PMIC_OPREGION
help
  This config adds ACPI operation region support for CHT Whiskey Cove 
PMIC.
 
+config CHT_DC_TI_PMIC_OPREGION
+   bool "ACPI operation region support for Dollar Cove TI PMIC"
+   depends on INTEL_SOC_PMIC_CHTDC_TI
+   help
+ This config adds ACPI operation region support for Dollar Cove TI 
PMIC.
+
 endif
 
 config ACPI_CONFIGFS
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index b1aacfc62b1f..cd228822d4a3 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_CRC_PMIC_OPREGION) += pmic/intel_pmic_crc.o
 obj-$(CONFIG_XPOWER_PMIC_OPREGION) += pmic/intel_pmic_xpower.o
 obj-$(CONFIG_BXT_WC_PMIC_OPREGION) += pmic/intel_pmic_bxtwc.o
 obj-$(CONFIG_CHT_WC_PMIC_OPREGION) += pmic/intel_pmic_chtwc.o
+obj-$(CONFIG_CHT_DC_TI_PMIC_OPREGION) += pmic/intel_pmic_chtdc_ti.o
 
 obj-$(CONFIG_ACPI_CONFIGFS)+= acpi_configfs.o
 
diff --git a/drivers/acpi/pmic/intel_pmic_chtdc_ti.c 
b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
new file mode 100644
index ..109c1e9c9c7a
--- /dev/null
+++ b/drivers/acpi/pmic/intel_pmic_chtdc_ti.c
@@ -0,0 +1,137 @@
+/*
+ * Dollar Cove TI PMIC operation region driver
+ * Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *
+ * Rewritten and cleaned up
+ * Copyright (C) 2017 Takashi Iwai 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "intel_pmic.h"
+
+/* registers stored in 16bit BE (high:low, total 10bit) */
+#define CHTDC_TI_VBAT  0x54
+#define CHTDC_TI_DIETEMP   0x56
+#define CHTDC_TI_BPTHERM   0x58
+#define CHTDC_TI_GPADC 0x5a
+
+static struct pmic_table chtdc_ti_power_table[] = {
+   { .address = 0x00, .reg = 0x41 },
+   { .address = 0x04, .reg = 0x42 },
+   { .address = 0x08, .reg = 0x43 },
+   { .address = 0x0c, .reg = 0x45 },
+   { .address = 0x10, .reg = 0x46 },
+   { .address = 0x14, .reg = 0x47 },
+   { .address = 0x18, .reg = 0x48 },
+   { .address = 0x1c, .reg = 0x49 },
+   { .address = 0x20, .reg = 0x4a },
+   { .address = 0x24, .reg = 0x4b },
+   { .address = 0x28, .reg = 0x4c },
+   { .address = 0x2c, .reg = 0x4d },
+   { .address = 0x30, .reg = 0x4e },
+};
+
+static struct pmic_table chtdc_ti_thermal_table[] = {
+   {
+   .address = 0x00,
+   .reg = CHTDC_TI_GPADC
+   },
+   {
+   .address = 0x0c,
+   .reg = CHTDC_TI_GPADC
+   },
+   /* TMP2 -> SYSTEMP */
+   {
+   .address = 0x18,
+   .reg = CHTDC_TI_GPADC
+   },
+   /* TMP3 -> BPTHERM */
+   {
+   .address = 0x24,
+   .reg = CHTDC_TI_BPTHERM
+   },
+   {
+   .address = 0x30,
+   .reg = CHTDC_TI_GPADC
+   },
+   /* TMP5 -> DIETEMP */
+   {
+   .address = 0x3c,
+   .reg = CHTDC_TI_DIETEMP
+   },
+};
+
+static int chtdc_ti_pmic_get_power(struct regmap *regmap, int reg, int bit,
+  u64 *value)
+{
+   int data;
+
+   if (regmap_read(regmap, reg, &data))
+   return -EIO;
+
+   *value = data & 1;
+   return 0;
+}
+
+static int chtdc_ti_pmic_update_power(struct regmap *regmap, int reg, int bit,
+ bool on)
+{
+   return regmap_update_bits(regmap, reg, 1, on);
+}
+
+static int chtdc_ti_pmic_get_raw_temp(struct regmap *regmap, int reg)
+{
+

Re: linux-next: manual merge of the scsi tree with the staging tree

2017-08-27 Thread Greg KH
On Mon, Aug 28, 2017 at 04:41:27PM +1000, Stephen Rothwell wrote:
> Hi James,
> 
> Today's linux-next merge of the scsi tree got a conflict in:
> 
>   drivers/staging/unisys/visorhba/visorhba_main.c
> 
> between commits:
> 
>   781facd05eb9 ("staging: unisys: visorhba: visorhba_main.c: fixed comment 
> formatting issues")
> 
> from the staging tree and commit:
> 
>   7bc4e528d9f6 ("scsi: visorhba: sanitze private device data allocation")
> 
> from the scsi tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.

Ick, messy merge, thanks for doing this.

greg k-h


Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag

2017-08-27 Thread Mike Galbraith
On Mon, 2017-08-28 at 08:10 +0200, Mike Galbraith wrote:
> Iff deeper cstate etc for
> longer does make a big difference, I can imagine wakeup time migrate
> leftward if capacity exists as an "on battery" tactic. (though that
> thought also invokes some unpleasant bounce fest images)

(consolidate left would have to be LB global to avoid fight with self)


Re: [PATCH] DSA support for Micrel KSZ8895

2017-08-27 Thread Pavel Machek
Hi!

> >No, tag_ksz part probably is not acceptable. Do you see solution
> >better than just copying it into tag_ksz1 file?
> 
> You could have all Micrel tag implementations live under net/dsa/tag_ksz.c 
> and have e.g: DSA_TAG_PROTO_KSZ for the current (newer) switches and 
> DSA_TAG_PROTO_KSZ_LEGACY (or any other name) for the older switches and you 
> would provide two sets of function pointers depending on which protocol is 
> requested by the switch.
> 
> Considering the minor difference needed in tagging here, it might be 
> acceptable to actually keep the current functions and just have the xmit() 
> call check what get_tag_protocol returns and use word 1 or 0 based on that. 
> Even though that's a fast path it shouldn't hurt performance too much. If it 
> does, we can always copy the tagging protocol into dsa_slave_priv so you have 
> a fast access to it.
> 

Actually I believe I can do optimizer tricks to keep this zero-cost
with clean code, if needed.

> >
> >Any more comments, etc?
> 
> The MII emulation bits are interesting, was it not sufficient if you 
> implemented phy_read and phy_write operations that perform the necessary 
> internal PHY accesses or maybe you don't get access to standard MII 
> registers? b53 does such a thing and we merely just need to do a simple shift 
> to access the MII register number, thus avoiding the translation.
> 

We don't get standard MII registers over SPI bus.

> >Help would be welcome.
> 
> I concur with Andrew, try to get a patch series, even an RFC one together so 
> we can review things individually. 
> 
> How functional is your driver so far? I'd say the basic stuff to get working: 
> counters (debugging), link management (auto-negotiation, forced, etc.) and 
> basic bridging: all ports separate by default and working port to port 
> switching when brought together in a bridge. VLAN, FDB, MDB, other ethtool 
> goodies can be added later on.
>

Which counters are essential? Link management and basic bridging
should work, not sure if I'll have time to do more than that.

Best regards,
Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH v2 4/5] cramfs: add mmap support

2017-08-27 Thread Al Viro
On Wed, Aug 16, 2017 at 01:35:35PM -0400, Nicolas Pitre wrote:

> +static const struct vm_operations_struct cramfs_vmasplit_ops;
> +static int cramfs_vmasplit_fault(struct vm_fault *vmf)
> +{
> + struct mm_struct *mm = vmf->vma->vm_mm;
> + struct vm_area_struct *vma, *new_vma;
> + unsigned long split_val, split_addr;
> + unsigned int split_pgoff, split_page;
> + int ret;
> +
> + /* Retrieve the vma split address and validate it */
> + vma = vmf->vma;
> + split_val = (unsigned long)vma->vm_private_data;
> + split_pgoff = split_val & 0x;
> + split_page = split_val >> 16;
> + split_addr = vma->vm_start + split_page * PAGE_SIZE;
> + pr_debug("fault: addr=%#lx vma=%#lx-%#lx split=%#lx\n",
> +  vmf->address, vma->vm_start, vma->vm_end, split_addr);
> + if (!split_val || split_addr >= vma->vm_end || vmf->address < 
> split_addr)
> + return VM_FAULT_SIGSEGV;
> +
> + /* We have some vma surgery to do and need the write lock. */
> + up_read(&mm->mmap_sem);
> + if (down_write_killable(&mm->mmap_sem))
> + return VM_FAULT_RETRY;
> +
> + /* Make sure the vma didn't change between the locks */
> + vma = find_vma(mm, vmf->address);
> + if (vma->vm_ops != &cramfs_vmasplit_ops) {
> + /*
> +  * Someone else raced with us and could have handled the fault.
> +  * Let it go back to user space and fault again if necessary.
> +  */
> + downgrade_write(&mm->mmap_sem);
> + return VM_FAULT_NOPAGE;
> + }
> +
> + /* Split the vma between the directly mapped area and the rest */
> + ret = split_vma(mm, vma, split_addr, 0);

Egads...  Everything else aside, who said that your split_... will have
anything to do with the vma you get from find_vma()?


Re: [PATCH v5] iio: accel: mma8452: improvements to handle multiple events

2017-08-27 Thread Martin Kepplinger

Am 28.08.2017 02:23 schrieb Harinath Nampally:

This driver supports multiple devices like mma8653,
mma8652, mma8452, mma8453 and fxls8471. Almost all
these devices have more than one event.

Current driver design hardcodes the event specific
information, so only one event can be supported by this
driver at any given time.
Also current design doesn't have the flexibility to
add more events.

This patch improves by detaching the event related
information from chip_info struct,and based on channel
type and event direction the corresponding event
configuration registers are picked dynamically.
Hence both transient and freefall events can be
handled in read/write callbacks.

Changes are thoroughly tested on fxls8471 device on imx6UL
Eval board using iio_event_monitor user space program.

After this fix both Freefall and Transient events are
handled by the driver without any conflicts.

Changes since v4 -> v5
-Add supported_events and enabled_events
 in chip_info structure so that devices(mma865x)
 which has no support for transient event will
 fallback to freefall event. Hence this patch changes
 won't break for devices that can't support
 transient events

Changes since v3 -> v4
-Add 'const struct ev_regs_accel_falling'
-Add 'const struct ev_regs_accel_rising'
-Refactor mma8452_get_event_regs function to
 remove the fill in the struct and return above structs
-Condense the commit's subject message

Changes since v2 -> v3
-Fix typo in commit message
-Replace word 'Bugfix' with 'Improvements'
-Describe more accurate commit message
-Replace breaks with returns
-Initialise transient event threshold mask
-Remove unrelated change of IIO_ACCEL channel type
 check in read/write event callbacks

Changes since v1 -> v2
-Fix indentations
-Remove unused fields in mma8452_event_regs struct
-Remove redundant return statement
-Remove unrelated changes like checkpatch.pl warning fixes

Signed-off-by: Harinath Nampally 
---
 drivers/iio/accel/mma8452.c | 349 
+++-

 1 file changed, 183 insertions(+), 166 deletions(-)

diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
index eb6e3dc..0a97e61b 100644
--- a/drivers/iio/accel/mma8452.c
+++ b/drivers/iio/accel/mma8452.c
@@ -59,7 +59,9 @@
 #define MMA8452_FF_MT_THS  0x17
 #define  MMA8452_FF_MT_THS_MASK0x7f
 #define MMA8452_FF_MT_COUNT0x18
+#define MMA8452_FF_MT_CHAN_SHIFT   3
 #define MMA8452_TRANSIENT_CFG  0x1d
+#define  MMA8452_TRANSIENT_CFG_CHAN(chan)  BIT(chan + 1)
 #define  MMA8452_TRANSIENT_CFG_HPF_BYP BIT(0)
 #define  MMA8452_TRANSIENT_CFG_ELE BIT(4)
 #define MMA8452_TRANSIENT_SRC  0x1e
@@ -69,6 +71,7 @@
 #define MMA8452_TRANSIENT_THS  0x1f
 #define  MMA8452_TRANSIENT_THS_MASKGENMASK(6, 0)
 #define MMA8452_TRANSIENT_COUNT0x20
+#define MMA8452_TRANSIENT_CHAN_SHIFT 1
 #define MMA8452_CTRL_REG1  0x2a
 #define  MMA8452_CTRL_ACTIVE   BIT(0)
 #define  MMA8452_CTRL_DR_MASK  GENMASK(5, 3)
@@ -107,6 +110,42 @@ struct mma8452_data {
const struct mma_chip_info *chip_info;
 };

+ /**
+  * struct mma8452_event_regs - chip specific data related to events
+  * @ev_cfg:   event config register address
+  * @ev_src:   event source register address
+  * @ev_ths:   event threshold register address
+  * @ev_ths_mask:  mask for the threshold value
+  * @ev_count: event count (period) register address
+  *
+  * Since not all chips supported by the driver support comparing high 
pass
+  * filtered data for events (interrupts), different interrupt sources 
are
+  * used for different chips and the relevant registers are included 
here.

+  */
+struct mma8452_event_regs {
+   u8 ev_cfg;
+   u8 ev_src;
+   u8 ev_ths;
+   u8 ev_ths_mask;
+   u8 ev_count;
+};
+
+static const struct mma8452_event_regs ev_regs_accel_falling = {
+   .ev_cfg = MMA8452_FF_MT_CFG,
+   .ev_src = MMA8452_FF_MT_SRC,
+   .ev_ths = MMA8452_FF_MT_THS,
+   .ev_ths_mask = MMA8452_FF_MT_THS_MASK,
+   .ev_count = MMA8452_FF_MT_COUNT
+};
+
+static const struct mma8452_event_regs ev_regs_accel_rising = {
+   .ev_cfg = MMA8452_TRANSIENT_CFG,
+   .ev_src = MMA8452_TRANSIENT_SRC,
+   .ev_ths = MMA8452_TRANSIENT_THS,
+   .ev_ths_mask = MMA8452_TRANSIENT_THS_MASK,
+   .ev_count = MMA8452_TRANSIENT_COUNT,
+};
+
 /**
  * struct mma_chip_info - chip specific data
  * @chip_id:   WHO_AM_I register's value
@@ -116,40 +

Re: [PATCH] Revert "pinctrl: sunxi: Don't enforce bias disable (for now)"

2017-08-27 Thread Maxime Ripard
1;4803;0c
On Sun, Aug 27, 2017 at 03:55:23PM +0300, Priit Laes wrote:
> This reverts commit 2154d94b40ea2a5de05245521371d0461bb0d669.
> 
> The original patch was intented to avoid some issues with the sunxi
> gpio rework and was supposed to be reverted after all the required
> DT bits had been merged around v4.10.
> 
> Signed-off-by: Priit Laes 

Acked-by: Maxime Ripard 

Thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature


Re: [PATCH] mm/page_alloc: don't reserve ZONE_HIGHMEM for ZONE_MOVABLE request

2017-08-27 Thread Vlastimil Babka
+CC linux-api

On 08/28/2017 02:28 AM, Joonsoo Kim wrote:
> On Fri, Aug 25, 2017 at 09:56:10AM +0200, Vlastimil Babka wrote:
>> On 08/25/2017 02:20 AM, Joonsoo Kim wrote:
>>> On Thu, Aug 24, 2017 at 11:41:58AM +0200, Vlastimil Babka wrote:
>>>
>>> Hmm, this is already pointed by Minchan and I have answered that.
>>>
>>> lkml.kernel.org/r/<20170421013243.GA13966@js1304-desktop>
>>>
>>> If you have a better idea, please let me know.
>>
>> My idea is that size of sysctl_lowmem_reserve_ratio is ZONE_NORMAL+1 and
>> it has no entries for zones > NORMAL. The
>> setup_per_zone_lowmem_reserve() is adjusted to only set
>> lower_zone->lowmem_reserve[j] for idx <= ZONE_NORMAL.
>>
>> I can't imagine somebody would want override the ratio for HIGHMEM or
>> MOVABLE
>> (where it has no effect anyway) so the simplest thing is not to expose
>> it at all.
> 
> Seems reasonable. However, if there is a user who checks
> sysctl_lowmem_reserve_ratio entry for HIGHMEM and change it, suggested
> interface will cause a problem since it doesn't expose ratio for
> HIGHMEM. Am I missing something?

As you explained, it makes little sense to change it for HIGHMEM which
only affects MOVABLE allocations. Also I doubt there are many systems
with both HIGHMEM (implies 32bit) *and* MOVABLE (implies NUMA, memory
hotplug...) zones. So I would just remove it, and if somebody will
really miss it, we can always add it back. In any case, please CC
linux-api on the next version.

> Thanks.
> 
> 
>>
>>> Thanks.
>>>
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majord...@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 



linux-next: manual merge of the scsi tree with the staging tree

2017-08-27 Thread Stephen Rothwell
Hi James,

Today's linux-next merge of the scsi tree got a conflict in:

  drivers/staging/unisys/visorhba/visorhba_main.c

between commits:

  781facd05eb9 ("staging: unisys: visorhba: visorhba_main.c: fixed comment 
formatting issues")

from the staging tree and commit:

  7bc4e528d9f6 ("scsi: visorhba: sanitze private device data allocation")

from the scsi tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/staging/unisys/visorhba/visorhba_main.c
index 8567e447891e,ddce92552ff5..
--- a/drivers/staging/unisys/visorhba/visorhba_main.c
+++ b/drivers/staging/unisys/visorhba/visorhba_main.c
@@@ -44,12 -44,11 +44,11 @@@ static struct visor_channeltype_descrip
  };
  
  MODULE_DEVICE_TABLE(visorbus, visorhba_channel_types);
 -MODULE_ALIAS("visorbus:" VISOR_VHBA_CHANNEL_UUID_STR);
 +MODULE_ALIAS("visorbus:" VISOR_VHBA_CHANNEL_GUID_STR);
  
  struct visordisk_info {
+   struct scsi_device *sdev;
u32 valid;
-   /* Disk Path */
-   u32 channel, id, lun;
atomic_t ios_threshold;
atomic_t error_count;
struct visordisk_info *next;
@@@ -105,25 -101,19 +104,19 @@@ struct visorhba_devices_open 
struct visorhba_devdata *devdata;
  };
  
- #define for_each_vdisk_match(iter, list, match) \
-   for (iter = &list->head; iter->next; iter = iter->next) \
-   if ((iter->channel == match->channel) && \
-   (iter->id == match->id) && \
-   (iter->lun == match->lun))
- 
  /*
 - *visor_thread_start - starts a thread for the device
 - *@threadfn: Function the thread starts
 - *@thrcontext: Context to pass to the thread, i.e. devdata
 - *@name: string describing name of thread
 + * visor_thread_start - Starts a thread for the device
 + * @threadfn:   Function the thread starts
 + * @thrcontext: Context to pass to the thread, i.e. devdata
 + * @name: String describing name of thread
   *
 - *Starts a thread for the device.
 + * Starts a thread for the device.
   *
 - *Return the task_struct * denoting the thread on success,
 - * or NULL on failure
 + * Return: The task_struct * denoting the thread on success,
 + *   or NULL on failure
   */
 -static struct task_struct *visor_thread_start
 -(int (*threadfn)(void *), void *thrcontext, char *name)
 +static struct task_struct *visor_thread_start(int (*threadfn)(void *),
 +void *thrcontext, char *name)
  {
struct task_struct *task;
  
@@@ -302,21 -280,19 +295,20 @@@ static void cleanup_scsitaskmgmt_handle
  }
  
  /*
 - *forward_taskmgmt_command - send taskmegmt command to the Service
 - *   Partition
 - *@tasktype: Type of taskmgmt command
 - *@scsidev: Scsidev that issued command
 + * forward_taskmgmt_command - Send taskmegmt command to the Service
 + *  Partition
 + * @tasktype: Type of taskmgmt command
 + * @scsidev:  Scsidev that issued command
   *
 - *Create a cmdrsp packet and send it to the Serivce Partition
 - *that will service this request.
 - *Returns whether the command was queued successfully or not.
 + * Create a cmdrsp packet and send it to the Serivce Partition
 + * that will service this request.
 + *
 + * Return: Int representing whether command was queued successfully or not
   */
  static int forward_taskmgmt_command(enum task_mgmt_types tasktype,
-   struct scsi_cmnd *scsicmd)
+   struct scsi_device *scsidev)
  {
struct uiscmdrsp *cmdrsp;
-   struct scsi_device *scsidev = scsicmd->device;
struct visorhba_devdata *devdata =
(struct visorhba_devdata *)scsidev->host->hostdata;
int notifyresult = 0x;
@@@ -607,24 -570,19 +604,21 @@@ static int visorhba_slave_alloc(struct 
struct visorhba_devdata *devdata;
struct Scsi_Host *scsihost = (struct Scsi_Host *)scsidev->host;
  
++  /* already allocated return success */
+   if (scsidev->hostdata)
 -  return 0; /* already allocated return success */
++  return 0;
+ 
 +  /* even though we errored, treat as success */
devdata = (struct visorhba_devdata *)scsihost->hostdata;
if (!devdata)
 -  return 0; /* even though we errored, treat as success */
 +  return 0;
  
-   /* already allocated return success */
-   for_each_vdisk_match(vdisk, devdata, scsidev)
-   return 0;
- 
-   tmpvdisk = kzalloc(sizeof(*tmpvdisk), GFP_ATOMIC);
-   if (!tmpvdisk)
+   vdisk =

Re: [PATCH] DSA support for Micrel KSZ8895

2017-08-27 Thread Pavel Machek
Hi!

> > No, tag_ksz part probably is not acceptable. Do you see solution
> > better than just copying it into tag_ksz1 file?
> 
> How about something like this, which needs further work to actually
> compile, but should give you the idea.

If that's acceptable, yes, I can do something similar. I don't think
CONFIG_NET_DSA_TAG_KSZ_8K / CONFIG_NET_DSA_TAG_KSZ_9K is suitable
naming (these will probably differ according to number of ports), what
about keeping CONFIG_NET_DSA_TAG_KSZ and adding
CONFIG_NET_DSA_TAG_KSZ_1B (for one byte)?

Thanks,
Pavel

>Andrew
> 
> index 99e38af85fc5..843e77b7c270 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -49,8 +49,11 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] 
> = {
>  #ifdef CONFIG_NET_DSA_TAG_EDSA
> [DSA_TAG_PROTO_EDSA] = &edsa_netdev_ops,
>  #endif
> -#ifdef CONFIG_NET_DSA_TAG_KSZ
> -   [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
> +#ifdef CONFIG_NET_DSA_TAG_KSZ_8K
> +   [DSA_TAG_PROTO_KSZ8K] = &ksz8k_netdev_ops,
> +#endif
> +#ifdef CONFIG_NET_DSA_TAG_KSZ_9K
> +   [DSA_TAG_PROTO_KSZ9K] = &ksz9k_netdev_ops,
>  #endif
>  #ifdef CONFIG_NET_DSA_TAG_LAN9303
> [DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
> diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
> index de66ca8e6201..398b833889f1 100644
> --- a/net/dsa/tag_ksz.c
> +++ b/net/dsa/tag_ksz.c
> @@ -35,6 +35,9 @@
>  static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
>  {
> struct dsa_slave_priv *p = netdev_priv(dev);
> +   struct dsa_port *dp = p->dp;
> +   struct dsa_switch *ds = dp->ds;
> +   struct dsa_switch_tree *dst = ds->dst;
> struct sk_buff *nskb;
> int padlen;
> u8 *tag;
> @@ -69,8 +72,14 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, 
> struct net_device *dev)
> }
>  
> tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
> -   tag[0] = 0;
> -   tag[1] = 1 << p->dp->index; /* destination port */
> +   if (dst->tag_ops == ksz8k_netdev_ops) {
> +   tag[0] = 1 << p->dp->index; /* destination port */0;
> +   tag[1] = 0;
> +   }
> +
> +   if (dst->tag_ops == ksz9k_netdev_ops) {
> +   tag[0] = 0;
> +   tag[1] = 1 << p->dp->index; /* destination port */
>  
> return nskb;
>  }
> @@ -98,7 +107,12 @@ static struct sk_buff *ksz_rcv(struct sk_buff *skb, 
> struct net_device *dev,
> return skb;
>  }
>  
> -const struct dsa_device_ops ksz_netdev_ops = {
> +const struct dsa_device_ops ksz8k_netdev_ops = {
> +   .xmit   = ksz_xmit,
> +   .rcv= ksz_rcv,
> +};
> +
> +const struct dsa_device_ops ksz9k_netdev_ops = {
> .xmit   = ksz_xmit,
> .rcv= ksz_rcv,
>  };

-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) 
http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


signature.asc
Description: Digital signature


Re: [PATCH] s390/zcrypt: make CPRBX const

2017-08-27 Thread Harald Freudenberger
On 08/25/2017 03:10 PM, Bhumika Goyal wrote:
> Make this const as it is only used in a copy operation.
>
> Signed-off-by: Bhumika Goyal 
> ---
>  drivers/s390/crypto/zcrypt_msgtype6.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c 
> b/drivers/s390/crypto/zcrypt_msgtype6.c
> index 4fddb43..afd20ce 100644
> --- a/drivers/s390/crypto/zcrypt_msgtype6.c
> +++ b/drivers/s390/crypto/zcrypt_msgtype6.c
> @@ -140,7 +140,7 @@ struct function_and_rules_block {
>   *   + 0x000A 'MRP ' (MCL3 'PK' or CEX2C 'PK')
>   * - VUD block
>   */
> -static struct CPRBX static_cprbx = {
> +static const struct CPRBX static_cprbx = {
>   .cprb_len   =  0x00DC,
>   .cprb_ver_id=  0x02,
>   .func_id= {0x54, 0x32},
Applied. Will be available with the next merge.
Thanks and have a nice day.

Harald Freudenberger



[PATCH v2 2/2] ARM: dts: sun7i: Add dts file for A20-OLinuXino-MICRO-eMMC

2017-08-27 Thread Stefan Mavrodiev
A20-OLinuXino-MICRO has option with onboard eMMC chip. For
now it's only shipped with 4BG chip, but in the future this
may change.

Signed-off-by: Stefan Mavrodiev 
---
 arch/arm/boot/dts/Makefile |  1 +
 .../boot/dts/sun7i-a20-olinuxino-micro-emmc.dts| 70 ++
 2 files changed, 71 insertions(+)
 create mode 100644 arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 4b17f35..e1d1e93 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -880,6 +880,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \
sun7i-a20-olinuxino-lime2.dtb \
sun7i-a20-olinuxino-lime2-emmc.dtb \
sun7i-a20-olinuxino-micro.dtb \
+   sun7i-a20-olinuxino-micro-emmc.dtb \
sun7i-a20-orangepi.dtb \
sun7i-a20-orangepi-mini.dtb \
sun7i-a20-pcduino3.dtb \
diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts 
b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts
new file mode 100644
index 000..d99e7b1
--- /dev/null
+++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts
@@ -0,0 +1,70 @@
+ /*
+ * Copyright 2017 Olimex Ltd.
+ * Stefan Mavrodiev 
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sun7i-a20-olinuxino-micro.dts"
+
+/ {
+   model = "Olimex A20-OLinuXino-MICRO-eMMC";
+   compatible = "olimex,a20-olinuxino-micro-emmc", "allwinner,sun7i-a20";
+
+   mmc2_pwrseq: pwrseq {
+   compatible = "mmc-pwrseq-emmc";
+   reset-gpios = <&pio 2 16 GPIO_ACTIVE_LOW>;
+   };
+};
+
+&mmc2 {
+   pinctrl-names = "default";
+   pinctrl-0 = <&mmc2_pins_a>;
+   vmmc-supply = <®_vcc3v3>;
+   bus-width = <4>;
+   non-removable;
+   mmc-pwrseq = <&mmc2_pwrseq>;
+   status = "okay";
+
+   emmc: emmc@0 {
+   reg = <0>;
+   compatible = "mmc-card";
+   broken-hpi;
+   };
+};
-- 
2.7.4



[PATCH v2 1/2] ARM: dts: sun7i: Fix A20-OLinuXino-MICRO dts for LAN8710

2017-08-27 Thread Stefan Mavrodiev
>From revision J the board uses new phy chip LAN8710. Compared
with RTL8201, RA17 pin is TXERR. It has pullup which causes phy
not to work. To fix this PA17 is muxed with GMAC function. This
makes the pin output-low.

This patch is compatible with earlier board revisions, since this
pin wasn't connected to phy.

Signed-off-by: Stefan Mavrodiev 
---
 arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts 
b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
index 0b7403e..cb1b081 100644
--- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
@@ -102,7 +102,7 @@
 
 &gmac {
pinctrl-names = "default";
-   pinctrl-0 = <&gmac_pins_mii_a>;
+   pinctrl-0 = <&gmac_pins_mii_a>,<&gmac_txerr>;
phy = <&phy1>;
phy-mode = "mii";
status = "okay";
@@ -229,6 +229,11 @@
 };
 
 &pio {
+   gmac_txerr: gmac_txerr@0 {
+   pins = "PA17";
+   function = "gmac";
+   };
+
mmc3_cd_pin_olinuxinom: mmc3_cd_pin@0 {
pins = "PH11";
function = "gpio_in";
-- 
2.7.4



[PATCH v2 0/2] Update board support for A20-OLinuXino-MICRO

2017-08-27 Thread Stefan Mavrodiev
>From rev.J of A20-OLinuXino-MICRO, the board has new PHY chip
(LAN8710) which replace RTL8201. Also there is option for 4GB
eMMC chip.

Changes in v2:
* Remove pinctrl request for eMMC reset pin
* Dump the idea of renaming boards with emmc
* Using txerr as gmac function

Stefan Mavrodiev (2):
  ARM: dts: sun7i: Fix A20-OLinuXino-MICRO dts for LAN8710
  ARM: dts: sun7i: Add dts file for A20-OLinuXino-MICRO-eMMC

 arch/arm/boot/dts/Makefile |  1 +
 .../boot/dts/sun7i-a20-olinuxino-micro-emmc.dts| 70 ++
 arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts|  7 ++-
 3 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/boot/dts/sun7i-a20-olinuxino-micro-emmc.dts

-- 
2.7.4



Re: [PATCH 12/12] dma-mapping: turn dma_cache_sync into a dma_map_ops method

2017-08-27 Thread Geert Uytterhoeven
Hi Christoph,

On Sun, Aug 27, 2017 at 6:10 PM, Christoph Hellwig  wrote:
> After we removed all the dead wood it turns out only two architectures
> actually implement dma_cache_sync as a no-op: mips and parisc.  Add

s/no-op/real op/

> a cache_sync method to struct dma_map_ops and implement it for the
> mips defualt DMA ops, and the parisc pa11 ops.

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


[PATCH net-next v3 1/3] net/ncsi: Fix several packet definitions

2017-08-27 Thread Samuel Mendoza-Jonas
Signed-off-by: Samuel Mendoza-Jonas 
---
v2: Rebased on latest net-next

 net/ncsi/ncsi-cmd.c | 10 +-
 net/ncsi/ncsi-pkt.h |  2 +-
 net/ncsi/ncsi-rsp.c |  3 ++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 5e03ed190e18..7567ca63aae2 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb,
struct ncsi_cmd_svf_pkt *cmd;
 
cmd = skb_put_zero(skb, sizeof(*cmd));
-   cmd->vlan = htons(nca->words[0]);
-   cmd->index = nca->bytes[2];
-   cmd->enable = nca->bytes[3];
+   cmd->vlan = htons(nca->words[1]);
+   cmd->index = nca->bytes[6];
+   cmd->enable = nca->bytes[7];
ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
return 0;
@@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb,
struct ncsi_cmd_ev_pkt *cmd;
 
cmd = skb_put_zero(skb, sizeof(*cmd));
-   cmd->mode = nca->bytes[0];
+   cmd->mode = nca->bytes[3];
ncsi_cmd_build_header(&cmd->cmd.common, nca);
 
return 0;
@@ -228,7 +228,7 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae  },
{ NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl  },
{ NCSI_PKT_CMD_GLS,0, ncsi_cmd_handler_default },
-   { NCSI_PKT_CMD_SVF,4, ncsi_cmd_handler_svf },
+   { NCSI_PKT_CMD_SVF,8, ncsi_cmd_handler_svf },
{ NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev  },
{ NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_SMA,8, ncsi_cmd_handler_sma },
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 3ea49ed0a935..91b4b66438df 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt {
unsigned char   index; /* VLAN table index  */
unsigned char   enable;/* Enable or disable */
__be32  checksum;  /* Checksum  */
-   unsigned char   pad[14];
+   unsigned char   pad[18];
 };
 
 /* Enable VLAN */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 087db775b3dc..c1a191d790e2 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr)
 
/* Add or remove the VLAN filter */
if (!(cmd->enable & 0x1)) {
-   ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index);
+   /* HW indexes from 1 */
+   ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1);
} else {
vlan = ntohs(cmd->vlan);
ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan);
-- 
2.14.0



Re: [PATCH] staging: rtl8723bs: remove memset before memcpy

2017-08-27 Thread Dan Carpenter
On Mon, Aug 28, 2017 at 01:43:31AM +0530, Himanshu Jha wrote:
> calling memcpy immediately after memset with the same region of memory
> makes memset redundant.
> 
> Build successfully.
> 

Thanks for the patch, it looks good.  You don't need to say that it
builds successfully, because we already assume that's true.

> Signed-off-by: Himanshu Jha 
> ---

Sometimes I put a comment here under the cut off line if I want people
to know that I haven't tested a patch.

Anyway, don't resend the patch.  It's fine as-is (unless Greg
complains) but it's just for future reference.

regards,
dan carpenter



[PATCH net-next v3 3/3] ftgmac100: Support NCSI VLAN filtering when available

2017-08-27 Thread Samuel Mendoza-Jonas
Register the ndo_vlan_rx_{add,kill}_vid callbacks and set the
NETIF_F_HW_VLAN_CTAG_FILTER if NCSI is available.
This allows the VLAN core to notify the NCSI driver when changes occur
so that the remote NCSI channel can be properly configured to filter on
the set VLAN tags.

Signed-off-by: Samuel Mendoza-Jonas 
---
v2: Moved ftgmac100 change into same patch and reordered

 drivers/net/ethernet/faraday/ftgmac100.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c 
b/drivers/net/ethernet/faraday/ftgmac100.c
index 34dae51effd4..05fe7123d5ae 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1623,6 +1623,8 @@ static const struct net_device_ops ftgmac100_netdev_ops = 
{
 #ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller= ftgmac100_poll_controller,
 #endif
+   .ndo_vlan_rx_add_vid= ncsi_vlan_rx_add_vid,
+   .ndo_vlan_rx_kill_vid   = ncsi_vlan_rx_kill_vid,
 };
 
 static int ftgmac100_setup_mdio(struct net_device *netdev)
@@ -1837,6 +1839,9 @@ static int ftgmac100_probe(struct platform_device *pdev)
NETIF_F_GRO | NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_TX;
 
+   if (priv->use_ncsi)
+   netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
/* AST2400  doesn't have working HW checksum generation */
if (np && (of_device_is_compatible(np, "aspeed,ast2400-mac")))
netdev->hw_features &= ~NETIF_F_HW_CSUM;
-- 
2.14.0



[PATCH net-next v3 2/3] net/ncsi: Configure VLAN tag filter

2017-08-27 Thread Samuel Mendoza-Jonas
Make use of the ndo_vlan_rx_{add,kill}_vid callbacks to have the NCSI
stack process new VLAN tags and configure the channel VLAN filter
appropriately.
Several VLAN tags can be set and a "Set VLAN Filter" packet must be sent
for each one, meaning the ncsi_dev_state_config_svf state must be
repeated. An internal list of VLAN tags is maintained, and compared
against the current channel's ncsi_channel_filter in order to keep track
within the state. VLAN filters are removed in a similar manner, with the
introduction of the ncsi_dev_state_config_clear_vids state. The maximum
number of VLAN tag filters is determined by the "Get Capabilities"
response from the channel.

Signed-off-by: Samuel Mendoza-Jonas 
---
v3: - Add comment describing change to ncsi_find_filter()
- Catch NULL in clear_one_vid() from ncsi_get_filter()
- Simplify state changes when kicking updated channel

 include/net/ncsi.h |   2 +
 net/ncsi/internal.h|  11 ++
 net/ncsi/ncsi-manage.c | 308 -
 net/ncsi/ncsi-rsp.c|   9 +-
 4 files changed, 326 insertions(+), 4 deletions(-)

diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 68680baac0fd..1f96af46df49 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -28,6 +28,8 @@ struct ncsi_dev {
 };
 
 #ifdef CONFIG_NET_NCSI
+int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid);
+int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid);
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
   void (*notifier)(struct ncsi_dev *nd));
 int ncsi_start_dev(struct ncsi_dev *nd);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1308a56f2591..af3d636534ef 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -180,6 +180,7 @@ struct ncsi_channel {
 #define NCSI_CHANNEL_INACTIVE  1
 #define NCSI_CHANNEL_ACTIVE2
 #define NCSI_CHANNEL_INVISIBLE 3
+   boolreconfigure_needed;
spinlock_t  lock;   /* Protect filters etc */
struct ncsi_package *package;
struct ncsi_channel_version version;
@@ -235,6 +236,9 @@ enum {
ncsi_dev_state_probe_dp,
ncsi_dev_state_config_sp= 0x0301,
ncsi_dev_state_config_cis,
+   ncsi_dev_state_config_clear_vids,
+   ncsi_dev_state_config_svf,
+   ncsi_dev_state_config_ev,
ncsi_dev_state_config_sma,
ncsi_dev_state_config_ebf,
 #if IS_ENABLED(CONFIG_IPV6)
@@ -253,6 +257,12 @@ enum {
ncsi_dev_state_suspend_done
 };
 
+struct vlan_vid {
+   struct list_head list;
+   __be16 proto;
+   u16 vid;
+};
+
 struct ncsi_dev_priv {
struct ncsi_dev ndev;/* Associated NCSI device */
unsigned intflags;   /* NCSI device flags  */
@@ -276,6 +286,7 @@ struct ncsi_dev_priv {
struct work_struct  work;/* For channel management */
struct packet_type  ptype;   /* NCSI packet Rx handler */
struct list_headnode;/* Form NCSI device list  */
+   struct list_headvlan_vids;   /* List of active VLAN IDs */
 };
 
 struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index a3bd5fa8ad09..11904b3b702d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table)
return sizes[table];
 }
 
+u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+{
+   struct ncsi_channel_filter *ncf;
+   int size;
+
+   ncf = nc->filters[table];
+   if (!ncf)
+   return NULL;
+
+   size = ncsi_filter_size(table);
+   if (size < 0)
+   return NULL;
+
+   return ncf->data + size * index;
+}
+
+/* Find the first active filter in a filter table that matches the given
+ * data parameter. If data is NULL, this returns the first active filter.
+ */
 int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data)
 {
struct ncsi_channel_filter *ncf;
@@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void 
*data)
index = -1;
while ((index = find_next_bit(bitmap, ncf->total, index + 1))
   < ncf->total) {
-   if (!memcmp(ncf->data + size * index, data, size)) {
+   if (!data || !memcmp(ncf->data + size * index, data, size)) {
spin_unlock_irqrestore(&nc->lock, flags);
return index;
}
@@ -639,6 +658,95 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_functional;
 }
 
+/* Check the VLAN filter bitmap for a set filter, and construct a
+ * "Set VLAN Filter - Disable" packet if found.
+ */
+static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
+

[PATCH net-next v3 0/3] NCSI VLAN Filtering Support

2017-08-27 Thread Samuel Mendoza-Jonas
This series (mainly patch 2) adds VLAN filtering to the NCSI implementation.
A fair amount of code already exists in the NCSI stack for VLAN filtering but
none of it is actually hooked up. This goes the final mile and fixes a few
bugs in the existing code found along the way (patch 1).

Patch 3 adds the appropriate flag and callbacks to the ftgmac100 driver to
enable filtering as it's a large consumer of NCSI (and what I've been
testing on).

v3: - Add comment describing change to ncsi_find_filter()
- Catch NULL in clear_one_vid() from ncsi_get_filter()
- Simplify state changes when kicking updated channel

Samuel Mendoza-Jonas (3):
  net/ncsi: Fix several packet definitions
  net/ncsi: Configure VLAN tag filter
  ftgmac100: Support NCSI VLAN filtering when available

 drivers/net/ethernet/faraday/ftgmac100.c |   5 +
 include/net/ncsi.h   |   2 +
 net/ncsi/internal.h  |  11 ++
 net/ncsi/ncsi-cmd.c  |  10 +-
 net/ncsi/ncsi-manage.c   | 308 ++-
 net/ncsi/ncsi-pkt.h  |   2 +-
 net/ncsi/ncsi-rsp.c  |  12 +-
 7 files changed, 339 insertions(+), 11 deletions(-)

-- 
2.14.0



[PATCH] [media] uvcvideo: zero seq number when disabling stream

2017-08-27 Thread Hans Yang
For bulk-based devices, when disabling the video stream,
in addition to issue CLEAR_FEATURE(HALT), it is better to set
alternate setting 0 as well or the sequnce number in host
side will probably not reset to zero.

Then in next time video stream start, the device will expect
host starts packet from 0 sequence number but host actually
continue the sequence number from last transaction and this
causes transaction errors.

This commit fixes this by adding set alternate setting 0 back
as what isoch-based devices do.

Below error message will also be eliminated for some devices:
uvcvideo: Non-zero status (-71) in video completion handler.

Signed-off-by: Hans Yang 
---
 drivers/media/usb/uvc/uvc_video.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/uvc/uvc_video.c 
b/drivers/media/usb/uvc/uvc_video.c
index fb86d6af398d..ad80c2a6da6a 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1862,10 +1862,9 @@ int uvc_video_enable(struct uvc_streaming *stream, int 
enable)
 
if (!enable) {
uvc_uninit_video(stream, 1);
-   if (stream->intf->num_altsetting > 1) {
-   usb_set_interface(stream->dev->udev,
+   usb_set_interface(stream->dev->udev,
  stream->intfnum, 0);
-   } else {
+   if (stream->intf->num_altsetting == 1) {
/* UVC doesn't specify how to inform a bulk-based device
 * when the video stream is stopped. Windows sends a
 * CLEAR_FEATURE(HALT) request to the video streaming
-- 
2.1.4



Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag

2017-08-27 Thread Mike Galbraith
On Sun, 2017-08-27 at 22:27 -0700, Joel Fernandes wrote:
> Hi Mike,
> 
> On Sun, Aug 27, 2017 at 11:07 AM, Mike Galbraith  wrote:
> > On Sat, 2017-08-26 at 23:39 -0700, Joel Fernandes wrote:
> >>
> >> Also about real world benchmarks, in Android we have usecases that
> >> show that the graphics performance and we have risk of frame drops if
> >> we don't use the sync flag so this is a real world need.
> >
> > That likely has everything to do with cpufreq not realizing that your
> > CPUs really are quite busy when scheduling cross core at fairly high
> > frequency, and not clocking up properly.
> >
> 
> I'm glad you brought this point up. Since Android O, the userspace
> processes are much more split across procedure calls due to a feature
> called treble (which does this for security, modularity etc). Due to
> this, a lot of things that were happening within a process boundary
> happen now across process boundaries over the binder bus. Early on
> folks noticed that this caused performance issues without sync flag
> being used as a more strong hint. This can happen when there are 2
> threads are in different frequency domains on different CPUs and are
> communicating over binder, due to this the combined load of both
> threads is divided between the individual CPUs and causes them to run
> at lower frequency. Where as if they are running together on the same
> CPUs, then they would run at a higher frequency and perform better as
> their combined load would run at a higher frequency. So a stronger
> sync actually helps this case if we're careful about using it when
> possible.

Sure, but isn't that really a cpufreq issue?  We schedule cross core
quite aggressively for obvious reasons.  Now on mostly idle handheld
devices, you may get better battery life by stacking tasks a bit more,
in which case a sync-me-harder flag may be what you really want/need,
but with modern CPUs, I'm kinda skeptical of that, would have to see
cold hard numbers to become a believer.  Iff deeper cstate etc for
longer does make a big difference, I can imagine wakeup time migrate
leftward if capacity exists as an "on battery" tactic. (though that
thought also invokes some unpleasant bounce fest images)

-Mike


Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce

2017-08-27 Thread Paul Mackerras
On Mon, Aug 28, 2017 at 06:28:08AM +0100, Al Viro wrote:
> On Mon, Aug 28, 2017 at 02:38:37PM +1000, Paul Mackerras wrote:
> > On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote:
> > > On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote:
> > > 
> > > > It seems to me that it would be better to do the anon_inode_getfd()
> > > > call before the kvm_get_kvm() call, and go to the fail label if it
> > > > fails.
> > > 
> > > And what happens if another thread does close() on the (guessed) fd?
> > 
> > Chaos ensues, but mostly because we don't have proper mutual exclusion
> > on the modifications to the list.  I'll add a mutex_lock/unlock to
> > kvm_spapr_tce_release() and move the anon_inode_getfd() call inside
> > the mutex.
> > 
> > It looks like the other possible uses of the fd (mmap, and passing it
> > as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM
> > device fd) are safe.
> 
> Frankly, it's a lot saner to have "no failure points past anon_inode_getfd()"
> policy...

Right.  In my latest patch, there are no failure points past
anon_inode_getfd().

Paul.


Re: [PATCH] connector: Delete an error message for a failed memory allocation in cn_queue_alloc_callback_entry()

2017-08-27 Thread Dan Carpenter
On Sun, Aug 27, 2017 at 11:16:06PM +, Waskiewicz Jr, Peter wrote:
> On 8/27/17 3:26 PM, SF Markus Elfring wrote:
> > From: Markus Elfring 
> > Date: Sun, 27 Aug 2017 21:18:37 +0200
> > 
> > Omit an extra message for a memory allocation failure in this function.
> > 
> > This issue was detected by using the Coccinelle software.
> 
> Did coccinelle trip on the message or the fact you weren't returning NULL?
> 

You've misread the patch somehow.  The existing code has a NULL return
and it's preserved in Markus's patch.  This sort of patch is to fix a
checkpatch.pl warning.  The error message from this kzalloc() isn't going
to get printed because it's a small allocation and small allocations
always succeed in current kernels.  But probably the main reason
checkpatch complains is that kmalloc() already prints a stack trace and
a bunch of other information so the printk doesn't add anyting.
Removing it saves a little memory.

I'm mostly a fan of running checkpatch on new patches or staging and not
on old code...

regards,
dan carpenter



[PATCH] powerpc/512x: clk: constify clk_div_table

2017-08-27 Thread Arvind Yadav
clk_div_table are not supposed to change at runtime.
mpc512x_clk_divtable function working with const
clk_div_table. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav 
---
 arch/powerpc/platforms/512x/clock-commonclk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c 
b/arch/powerpc/platforms/512x/clock-commonclk.c
index add5a53..b3097fe 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -363,7 +363,7 @@ static int get_cpmf_mult_x2(void)
  */
 
 /* applies to the IPS_DIV, and PCI_DIV values */
-static struct clk_div_table divtab_2346[] = {
+static const struct clk_div_table divtab_2346[] = {
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
{ .val = 4, .div = 4, },
@@ -372,7 +372,7 @@ static int get_cpmf_mult_x2(void)
 };
 
 /* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
-static struct clk_div_table divtab_1234[] = {
+static const struct clk_div_table divtab_1234[] = {
{ .val = 1, .div = 1, },
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
-- 
1.9.1



Re: [LKP] [lkp-robot] [sched/cfs] 625ed2bf04: unixbench.score -7.4% regression

2017-08-27 Thread Huang, Ying
kernel test robot  writes:

> Greeting,
>
> FYI, we noticed a -7.4% regression of unixbench.score due to commit:
>
>
> commit: 625ed2bf049d5a352c1bcca962d6e133454eaaff ("sched/cfs: Make 
> util/load_avg more stable")
> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
>
> in testcase: unixbench
> on test machine: 88 threads Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz with 
> 64G memory
> with following parameters:
>
>   runtime: 300s
>   nr_task: 100%
>   test: spawn
>   cpufreq_governor: performance
>
> test-description: UnixBench is the original BYTE UNIX benchmark suite aims to 
> test performance of Unix-like system.
>

This has been merged by v4.13-rc1, so we checked it again.  If my
understanding were correct, the patch changes the algorithm to calculate
the load of CPU, so it influences the load balance behavior for this
test case.

  4.73 ±  8% -31.3%   3.25 ± 10%  sched_debug.cpu.nr_running.max
  0.95 ±  5% -29.0%   0.67 ±  4%  sched_debug.cpu.nr_running.stddev

As above, the effect is that the tasks are distributed into more CPUs,
that is, system is more balanced.  But this triggered more contention on
tasklist_lock, so hurt the unixbench score, as below.

 26.60   -10.6   16.05
perf-profile.calltrace.cycles-pp.intel_idle.cpuidle_enter_state.cpuidle_enter.call_cpuidle.do_idle
 10.10+2.4   12.53
perf-profile.calltrace.cycles-pp._raw_write_lock_irq.do_exit.do_group_exit.sys_exit_group.entry_SYSCALL_64_fastpath
  8.03+2.6   10.63
perf-profile.calltrace.cycles-pp._raw_write_lock_irq.release_task.wait_consider_task.do_wait.sys_wait4
 17.98+5.2   23.14
perf-profile.calltrace.cycles-pp._raw_read_lock.do_wait.sys_wait4.entry_SYSCALL_64_fastpath
  7.47+5.9   13.33
perf-profile.calltrace.cycles-pp._raw_write_lock_irq.copy_process._do_fork.sys_clone.do_syscall_64


The patch makes the tasks distributed more balanced, so I think
scheduler do better job here.  The problem is that the tasklist_lock
isn't scalable.  But considering this is only a micro-benchmark which
specially exercises fork/exit/wait syscall, this may be not a big
problem in reality.

So, all in all, I think we can ignore this regression.

Best Regards,
Huang, Ying


[PATCH] net: stmmac: constify clk_div_table

2017-08-27 Thread Arvind Yadav
clk_div_table are not supposed to change at runtime.
meson8b_dwmac structure is working with const clk_div_table.
So mark the non-const structs as const.

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c 
b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 968..4404650b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -89,7 +89,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
char clk_name[32];
const char *clk_div_parents[1];
const char *mux_parent_names[MUX_CLK_NUM_PARENTS];
-   static struct clk_div_table clk_25m_div_table[] = {
+   static const struct clk_div_table clk_25m_div_table[] = {
{ .val = 0, .div = 5 },
{ .val = 1, .div = 10 },
{ /* sentinel */ },
-- 
1.9.1



[PATCH] leds: pca955x: Don't invert requested value in pca955x_gpio_set_value()

2017-08-27 Thread Andrew Jeffery
The PCA9552 lines can be used either for driving LEDs or as GPIOs. The
manual states that for LEDs, the operation is open-drain:

 The LSn LED select registers determine the source of the LED data.

   00 = output is set LOW (LED on)
   01 = output is set high-impedance (LED off; default)
   10 = output blinks at PWM0 rate
   11 = output blinks at PWM1 rate

For GPIOs it suggests a pull-up so that the open-case drives the line
high:

 For use as output, connect external pull-up resistor to the pin
 and size it according to the DC recommended operating
 characteristics.  LED output pin is HIGH when the output is
 programmed as high-impedance, and LOW when the output is
 programmed LOW through the ‘LED selector’ register.  The output
 can be pulse-width controlled when PWM0 or PWM1 are used.

Now, I have a hardware design that uses the LED controller to control
LEDs. However, for $reasons, we're using the leds-gpio driver to drive
the them. The reasons are here are a tangent but lead to the discovery
of the inversion, which manifested as the LEDs being set to full
brightness at boot when we expected them to be off.

As we're driving the LEDs through leds-gpio, this means wending our way
through the gpiochip abstractions. So with that in mind we need to
describe an active-low GPIO configuration to drive the LEDs as though
they were GPIOs.

The set() gpiochip callback in leds-pca955x does the following:

 ...
 if (val)
pca955x_led_set(&led->led_cdev, LED_FULL);
 else
pca955x_led_set(&led->led_cdev, LED_OFF);
 ...

Where LED_FULL = 255. pca955x_led_set() in turn does:

 ...
 switch (value) {
 case LED_FULL:
ls = pca955x_ledsel(ls, ls_led, PCA955X_LS_LED_ON);
break;
 ...

Where PCA955X_LS_LED_ON is defined as:

 #define PCA955X_LS_LED_ON  0x0 /* Output LOW */

So here we have some type confusion: We've crossed domains from GPIO
behaviour to LED behaviour without accounting for possible inversions
in the process.

Stepping back to leds-gpio for a moment, during probe() we call
create_gpio_led(), which eventually executes:

 if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP) {
state = gpiod_get_value_cansleep(led_dat->gpiod);
if (state < 0)
return state;
 } else {
state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
 }
 ...
 ret = gpiod_direction_output(led_dat->gpiod, state);

In the devicetree the GPIO is annotated as active-low, and
gpiod_get_value_cansleep() handles this for us:

 int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 {
 int value;

 might_sleep_if(extra_checks);
 VALIDATE_DESC(desc);
 value = _gpiod_get_raw_value(desc);
 if (value < 0)
 return value;

 if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
 value = !value;

 return value;
 }

_gpiod_get_raw_value() in turn calls through the get() callback for the
gpiochip implementation, so returning to our get() implementation in
leds-pca955x we find we extract the raw value from hardware:

 static int pca955x_gpio_get_value(struct gpio_chip *gc, unsigned int 
offset)
 {
 struct pca955x *pca955x = gpiochip_get_data(gc);
 struct pca955x_led *led = &pca955x->leds[offset];
 u8 reg = pca955x_read_input(pca955x->client, led->led_num / 8);

 return !!(reg & (1 << (led->led_num % 8)));
 }

This behaviour is not symmetric with that of set(), where the val is
inverted by the driver.

Closing the loop on the GPIO_ACTIVE_LOW inversions,
gpiod_direction_output(), like gpiod_get_value_cansleep(), handles it
for us:

 int gpiod_direction_output(struct gpio_desc *desc, int value)
 {
  VALIDATE_DESC(desc);
  if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
   value = !value;
  else
   value = !!value;
  return _gpiod_direction_output_raw(desc, value);
 }

All-in-all, with a value of 'keep' for default-state property in a
leds-gpio child node, the current state of the hardware will in-fact be
inverted; precisely the opposite of what was intended.

Rework leds-pca955x so that we avoid the incorrect inversion and clarify
the semantics with respect to GPIO.

Signed-off-by: Andrew Jeffery 
---
 drivers/leds/leds-pca955x.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 09303fd1fdc6..8b8f81cf10cb 100644
--- a/drivers/leds/leds-pca955

Re: [PATCH v8 00/13] Unify the interrupt delivery mode and do its setup in advance

2017-08-27 Thread Dou Liyang

Hi,

Follow Juergen's advice, +CC xen-devel and linux-acpi

In case a single patch of a series isn't stand alone it would be nice
to receive at least the cover letter of the series in order to know
what its all about.

Thanks,
dou.

At 08/28/2017 11:20 AM, Dou Liyang wrote:

Changes V7 --> V8:

  - Change the order of [12/13] patch and [11/13]patch suggested by Rafael J. 
Wysocki.
  - Fix some comments.
  - Do more tests in Thinkpad x121e   -- Thanks for Borislav Petkov's help.

[Background]

MP specification defines three different interrupt delivery modes as follows:

 1. PIC Mode
 2. Virtual Wire Mode
 3. Symmetric I/O Mode

They will be setup in the different periods of booting time:
 1. *PIC Mode*, the default interrupt delivery modes, will be set first.
 2. *Virtual Wire Mode* will be setup during ISA IRQ initialization( step 1
in the figure.1).
 3. *Symmetric I/O Mode*'s setup is related to the system
3.1 In SMP-capable system, setup during prepares CPUs(step 2)
3.2 In UP system, setup during initializes itself(step 3).


 start_kernel
+---+
|
+--> ...
|
|setup_arch
+--> +---+
|
|init_IRQ
+-> +--+-+
|  |init_ISA_irqs
|  +--> +-++
| | ++
+---> +-->  | 1.init_bsp_APIC|
| ...   ++
+--->
| rest_init
+--->---+-+
|   |   kernel_init
|   +> +-+
|  |   kernel_init_freeable
|  +->  +-+
|   | smp_prepare_cpus
|   +---> ++-+
|   |  |   +---+
|   |  +-> |2.  apic_bsp_setup |
|   |  +---+
|   |
v   | smp_init
+---> +---++
  |+---+
  +--> |3.  apic_bsp_setup |
   +---+
figure.1 The flow chart of the kernel startup process

[Problem]

1. Cause kernel in an unmatched mode at the beginning of booting time.
2. Cause the dump-capture kernel hangs with 'notsc' option inherited
   from 1st kernel option.
3. Cause the code hard to read and maintain.

As Ingo's and Eric's discusses[1,2], it need to be refactor.

[Solution]

1. Construct a selector to unify these switches

   ++
   |disable_apic++
   ++   true |
  |false |
  |  |
 +v--+   |
 |!boot_cpu_has(X86_FEATURE_APIC)+---+
 +---+  true |
  |false |
  |  |
  +---v-+v
  |!smp_found_config|PIC MODE
  +---+-+
   |false |true
   |  |
   v  +---v-+
SYMMETRIC IO MODE | !acpi_lapic |
  +--+--+
 |
 v
   VIRTUAL WIRE MODE

2. Unifying these setup steps of SMP-capable and UP system

   start_kernel
---+
|
|
|
|x86_late_time_init
+>---++
||
||  ++
|+> | 4. init_interrupt_mode |
|   ++
v


3. Execute the function as soon as possible.

[Test]

1. In a theoretical code analysis, the patchset can wrap the original
logic.

1) The original logic of the interrupt delivery mode setup:

-Step O_1) Keep in PIC mode or virtual wire mode:

  Check (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
true:  PIC mode
false: virtual wire mode

-Step O_2) Try to switch to symmetric IO mode:
  O_2_1) In up system:

-Check disable_apic
  ture: O_S_1 (original situation 1)
-Check whether there is a separate or integrated chip
  don't has: O_S_2
-Check !smp_found_config
  ture: O_S_3
-Others:
  O_S_4

  O_2_2) In smp-capable system:

-Check !smp_found_config && !acpi_lapic
  true: goto O_2_1
-Check if it is LAPIC
  don't has: O_S_5
-Check !max_cpus
  true: O_S_6
-read_apic_id() != boot_cpu_physical_apicid
  true: O_S_7
-Others:
O_S_8

2) After that patchset, the new logic:

-Step N_1) Skip step O_1 and try to switch to the final interrupt mode
   -Check disable_apic
 ture: N_S_1 (New situation 1)
   -Check whether there is a separate or integrated chip
 ture: N_S_2
   -Check if (!smp_found_config)
 ture: N_S_3
   -Check !setup_max_cpus
 ture: N_S_4
   -Check read_apic_id() != boot_cpu_physical_apicid
 ture: N_S_5
   -Others:
   N_S_6

O_S_1 is covered in N_S_1
O_S_2 is covered in N_S_2
O_S_

[PATCH] remoteproc: Introduce rproc handle accessor for children

2017-08-27 Thread Bjorn Andersson
In certain circumstances rpmsg devices needs to acquire a handle to the
ancestor remoteproc instance, e.g. to invoke rproc_report_crash() when a
fatal error is detected. Introduce an interface that walks the device
tree in search for a remoteproc instance and return this.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/remoteproc_core.c | 18 ++
 include/linux/remoteproc.h   |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index 564061dcc019..5b1b19519275 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1296,6 +1296,23 @@ struct rproc *rproc_get_by_phandle(phandle phandle)
 EXPORT_SYMBOL(rproc_get_by_phandle);
 
 /**
+ * rproc_get_by_child() - acquire rproc handle of @dev's ancestor
+ * @dev:   child device to find ancestor of
+ *
+ * Returns the ancestor rproc instance, or NULL if not found.
+ */
+struct rproc *rproc_get_by_child(struct device *dev)
+{
+   for (dev = dev->parent; dev; dev = dev->parent) {
+   if (dev->type && !strcmp(dev->type->name, "remoteproc"))
+   return dev->driver_data;
+   }
+
+   return NULL;
+}
+EXPORT_SYMBOL(rproc_get_by_child);
+
+/**
  * rproc_add() - register a remote processor
  * @rproc: the remote processor handle to register
  *
@@ -1440,6 +1457,7 @@ struct rproc *rproc_alloc(struct device *dev, const char 
*name,
rproc->dev.parent = dev;
rproc->dev.type = &rproc_type;
rproc->dev.class = &rproc_class;
+   rproc->dev.driver_data = rproc;
 
/* Assign a unique device index and name */
rproc->index = ida_simple_get(&rproc_dev_index, 0, 0, GFP_KERNEL);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 81da49564ff4..44e630eb3d94 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -510,6 +510,8 @@ struct rproc_vdev {
 };
 
 struct rproc *rproc_get_by_phandle(phandle phandle);
+struct rproc *rproc_get_by_child(struct device *dev);
+
 struct rproc *rproc_alloc(struct device *dev, const char *name,
  const struct rproc_ops *ops,
  const char *firmware, int len);
-- 
2.12.0



[PATCH] remoteproc: Stop subdevices in reverse order

2017-08-27 Thread Bjorn Andersson
Subdevices might depend on earlier registered subdevices for
communication purposes, as such they should be stopped in reverse order
so that said communication channel is removed after the dependent
subdevice is stopped.

Signed-off-by: Bjorn Andersson 
---
 drivers/remoteproc/remoteproc_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/remoteproc/remoteproc_core.c 
b/drivers/remoteproc/remoteproc_core.c
index e82f60182027..5aaa4c21d14d 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -794,7 +794,7 @@ static void rproc_remove_subdevices(struct rproc *rproc)
 {
struct rproc_subdev *subdev;
 
-   list_for_each_entry(subdev, &rproc->subdevs, node)
+   list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
subdev->remove(subdev);
 }
 
-- 
2.12.0



[PATCH 2/2] clk: zte: constify clk_div_table

2017-08-27 Thread Arvind Yadav
clk_div_table are not supposed to change at runtime. All functions
working with clk_div_table provided by  work
with const clk_div_table. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav 
---
 drivers/clk/zte/clk-zx296718.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
index 27f853d..354dd50 100644
--- a/drivers/clk/zte/clk-zx296718.c
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -451,7 +451,7 @@
FFACTOR(0, "emmc_mux_div2", "emmc_mux", 1, 2, CLK_SET_RATE_PARENT),
 };
 
-static struct clk_div_table noc_div_table[] = {
+static const struct clk_div_table noc_div_table[] = {
{ .val = 1, .div = 2, },
{ .val = 3, .div = 4, },
 };
@@ -644,7 +644,7 @@ static int __init top_clocks_init(struct device_node *np)
return 0;
 }
 
-static struct clk_div_table common_even_div_table[] = {
+static const struct clk_div_table common_even_div_table[] = {
{ .val = 0, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 3, .div = 4, },
@@ -656,7 +656,7 @@ static int __init top_clocks_init(struct device_node *np)
{ .val = 15, .div = 16, },
 };
 
-static struct clk_div_table common_div_table[] = {
+static const struct clk_div_table common_div_table[] = {
{ .val = 0, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 2, .div = 3, },
-- 
1.9.1



[PATCH 1/2] clk: imx: constify clk_div_table

2017-08-27 Thread Arvind Yadav
clk_div_table are not supposed to change at runtime. All functions
working with clk_div_table provided by  work
with const clk_div_table. So mark the non-const structs as const.

Signed-off-by: Arvind Yadav 
---
 drivers/clk/imx/clk-imx6sl.c | 6 +++---
 drivers/clk/imx/clk-imx6sx.c | 6 +++---
 drivers/clk/imx/clk-imx6ul.c | 6 +++---
 drivers/clk/imx/clk-imx7d.c  | 4 ++--
 drivers/clk/imx/clk-vf610.c  | 2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/clk/imx/clk-imx6sl.c b/drivers/clk/imx/clk-imx6sl.c
index 5fd4dda..9642cdf 100644
--- a/drivers/clk/imx/clk-imx6sl.c
+++ b/drivers/clk/imx/clk-imx6sl.c
@@ -71,7 +71,7 @@
 static const char *pll6_bypass_sels[]  = { "pll6", "pll6_bypass_src", };
 static const char *pll7_bypass_sels[]  = { "pll7", "pll7_bypass_src", };
 
-static struct clk_div_table clk_enet_ref_table[] = {
+static const struct clk_div_table clk_enet_ref_table[] = {
{ .val = 0, .div = 20, },
{ .val = 1, .div = 10, },
{ .val = 2, .div = 5, },
@@ -79,14 +79,14 @@
{ }
 };
 
-static struct clk_div_table post_div_table[] = {
+static const struct clk_div_table post_div_table[] = {
{ .val = 2, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 0, .div = 4, },
{ }
 };
 
-static struct clk_div_table video_div_table[] = {
+static const struct clk_div_table video_div_table[] = {
{ .val = 0, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 2, .div = 1, },
diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c
index b5c96de..e6d389e 100644
--- a/drivers/clk/imx/clk-imx6sx.c
+++ b/drivers/clk/imx/clk-imx6sx.c
@@ -105,7 +105,7 @@
IMX6SX_CLK_EPIT2,
 };
 
-static struct clk_div_table clk_enet_ref_table[] = {
+static const struct clk_div_table clk_enet_ref_table[] = {
{ .val = 0, .div = 20, },
{ .val = 1, .div = 10, },
{ .val = 2, .div = 5, },
@@ -113,14 +113,14 @@
{ }
 };
 
-static struct clk_div_table post_div_table[] = {
+static const struct clk_div_table post_div_table[] = {
{ .val = 2, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 0, .div = 4, },
{ }
 };
 
-static struct clk_div_table video_div_table[] = {
+static const struct clk_div_table video_div_table[] = {
{ .val = 0, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 2, .div = 1, },
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index b4e0dff..5e8c18a 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -78,7 +78,7 @@
IMX6UL_CLK_MMDC_P0_FAST, IMX6UL_CLK_MMDC_P0_IPG,
 };
 
-static struct clk_div_table clk_enet_ref_table[] = {
+static const struct clk_div_table clk_enet_ref_table[] = {
{ .val = 0, .div = 20, },
{ .val = 1, .div = 10, },
{ .val = 2, .div = 5, },
@@ -86,14 +86,14 @@
{ }
 };
 
-static struct clk_div_table post_div_table[] = {
+static const struct clk_div_table post_div_table[] = {
{ .val = 2, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 0, .div = 4, },
{ }
 };
 
-static struct clk_div_table video_div_table[] = {
+static const struct clk_div_table video_div_table[] = {
{ .val = 0, .div = 1, },
{ .val = 1, .div = 2, },
{ .val = 2, .div = 1, },
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index 3da1218..2305699 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -27,7 +27,7 @@
 static u32 share_count_sai3;
 static u32 share_count_nand;
 
-static struct clk_div_table test_div_table[] = {
+static const struct clk_div_table test_div_table[] = {
{ .val = 3, .div = 1, },
{ .val = 2, .div = 1, },
{ .val = 1, .div = 2, },
@@ -35,7 +35,7 @@
{ }
 };
 
-static struct clk_div_table post_div_table[] = {
+static const struct clk_div_table post_div_table[] = {
{ .val = 3, .div = 4, },
{ .val = 2, .div = 1, },
{ .val = 1, .div = 2, },
diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c
index 59b1863..6dae543 100644
--- a/drivers/clk/imx/clk-vf610.c
+++ b/drivers/clk/imx/clk-vf610.c
@@ -102,7 +102,7 @@
 static const char *ftm_fix_sels[]  = { "sxosc", "ipg_bus", };
 
 
-static struct clk_div_table pll4_audio_div_table[] = {
+static const struct clk_div_table pll4_audio_div_table[] = {
{ .val = 0, .div = 1 },
{ .val = 1, .div = 2 },
{ .val = 2, .div = 6 },
-- 
1.9.1



[PATCH 0/2] constify clk clk_div_table

2017-08-27 Thread Arvind Yadav
clk_div_table are not supposed to change at runtime. All functions
working with clk_div_table provided by  work
with const clk_div_table. So mark the non-const structs as const.

Arvind Yadav (2):
  [PATCH 1/2] clk: imx: constify clk_div_table
  [PATCH 2/2] clk: zte: constify clk_div_table

 drivers/clk/imx/clk-imx6sl.c   | 6 +++---
 drivers/clk/imx/clk-imx6sx.c   | 6 +++---
 drivers/clk/imx/clk-imx6ul.c   | 6 +++---
 drivers/clk/imx/clk-imx7d.c| 4 ++--
 drivers/clk/imx/clk-vf610.c| 2 +-
 drivers/clk/zte/clk-zx296718.c | 6 +++---
 6 files changed, 15 insertions(+), 15 deletions(-)

-- 
1.9.1



Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce

2017-08-27 Thread Al Viro
On Mon, Aug 28, 2017 at 02:38:37PM +1000, Paul Mackerras wrote:
> On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote:
> > On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote:
> > 
> > > It seems to me that it would be better to do the anon_inode_getfd()
> > > call before the kvm_get_kvm() call, and go to the fail label if it
> > > fails.
> > 
> > And what happens if another thread does close() on the (guessed) fd?
> 
> Chaos ensues, but mostly because we don't have proper mutual exclusion
> on the modifications to the list.  I'll add a mutex_lock/unlock to
> kvm_spapr_tce_release() and move the anon_inode_getfd() call inside
> the mutex.
> 
> It looks like the other possible uses of the fd (mmap, and passing it
> as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM
> device fd) are safe.

Frankly, it's a lot saner to have "no failure points past anon_inode_getfd()"
policy...


Re: [PATCH RFC/RFT] sched/fair: Improve the behavior of sync flag

2017-08-27 Thread Joel Fernandes
Hi Mike,

On Sun, Aug 27, 2017 at 11:07 AM, Mike Galbraith  wrote:
> On Sat, 2017-08-26 at 23:39 -0700, Joel Fernandes wrote:
>>
>> Also about real world benchmarks, in Android we have usecases that
>> show that the graphics performance and we have risk of frame drops if
>> we don't use the sync flag so this is a real world need.
>
> That likely has everything to do with cpufreq not realizing that your
> CPUs really are quite busy when scheduling cross core at fairly high
> frequency, and not clocking up properly.
>

I'm glad you brought this point up. Since Android O, the userspace
processes are much more split across procedure calls due to a feature
called treble (which does this for security, modularity etc). Due to
this, a lot of things that were happening within a process boundary
happen now across process boundaries over the binder bus. Early on
folks noticed that this caused performance issues without sync flag
being used as a more strong hint. This can happen when there are 2
threads are in different frequency domains on different CPUs and are
communicating over binder, due to this the combined load of both
threads is divided between the individual CPUs and causes them to run
at lower frequency. Where as if they are running together on the same
CPUs, then they would run at a higher frequency and perform better as
their combined load would run at a higher frequency. So a stronger
sync actually helps this case if we're careful about using it when
possible.

thanks,

-Joel



> -Mike


Re: [PATCH net-next v7 05/10] landlock: Add LSM hooks related to filesystem

2017-08-27 Thread Alexei Starovoitov
On Sun, Aug 27, 2017 at 03:31:35PM +0200, Mickaël Salaün wrote:
> 
> > How can you add 3rd argument? All FS events would have to get it,
> > but in some LSM hooks such argument will be meaningless, whereas
> > in other places it will carry useful info that rule can operate on.
> > Would that mean that we'll have FS_3 event type and only few LSM
> > hooks will be converted to it. That works, but then we'll lose
> > compatiblity with old rules written for FS event and that given hook.
> > Otherwise we'd need to have fancy logic to accept old FS event
> > into FS_3 LSM hook.
> 
> If we want to add a third argument to the FS event, then it will become
> accessible because its type will be different than NOT_INIT. This keep
> the compatibility with old rules because this new field was then denied.
> 
> If we want to add a new argument but only for a subset of the hooks used
> by the FS event, then we need to create a new event, like FS_FCNTL. For
> example, we may want to add a FS_RENAME event to be able to tie the
> source file and the destination file of a rename call.

that's exactly my point. To add another argument FS event
to a subset of hooks will require either new FS_FOO and
to be backwards compatible these hooks will call _both_ FS and FS_FOO
or some magic logic on kernel side that will allow old FS rules
to be attached to FS_FOO hooks?
Two calls doesn't scale and if we do 'magic logic' can we do it now
and avoid introducing events altogether?
Like all landlock programs can be landlock type and they would need
to declare what arg1, arg2, argN they expect. Then at attach
time the kernel only needs to verify that hook arg types match
what program requested.

> Anyway, I added the subtype/ABI version as a safeguard in case of
> unexpected future evolution.

I don't think that abi/version field adds anything in this context.
I still think it should simply be removed.



Re: [PATCH v2 RESEND 1/2] dt-bindings: serial: 8250: Add MediaTek BTIF controller bindings

2017-08-27 Thread Matthias Brugger



On 08/27/2017 10:39 PM, Sean Wang wrote:

On Sun, 2017-08-27 at 22:00 +0300, Matthias Brugger wrote:


On 08/19/2017 09:06 PM, sean.w...@mediatek.com wrote:

From: Sean Wang 

Document the devicetree bindings in 8250.txt for MediaTek BTIF
controller which could be found on MT7622 and MT7623 SoC.

Signed-off-by: Sean Wang 
---
   Documentation/devicetree/bindings/serial/8250.txt | 3 +++
   1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/serial/8250.txt 
b/Documentation/devicetree/bindings/serial/8250.txt
index 419ff6c..7528d90 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -14,6 +14,9 @@ Required properties:
  tegra132, or tegra210.
- "nxp,lpc3220-uart"
- "ralink,rt2880-uart"
+   - For MediaTek MT7623, must contain "mediatek,mt7623-btif"
+   - For other MediaTek SoCs , must contain "mediatek,-btif",
+ "mediatek,mt7623-btif" where  is mt7622.


Hm, to me that's confusing. What about:
"mediatek,mt7623-btif": for MediaTek MT7623
"mediatek,mt7622-btif", "mediatek,mt7623-btif": for MediaTek MT7622

If in the future we have more SoCs that support the BTIF, we should add them
like the mt7622 case.



I had v3, but it should have similar logic and also got ack from Rob

I knew all your logic of adding binding document for all MediaTek
devices, even I alway added MediaTek device in dt-bindings as the way
you mentioned here, but I felt this way is fine for this kind of
dedicated document.

The reason i don't add it as usual is the following. 8250.txt is common
and shared among all uart like devices, so i don't want btif device
occupies too much section and bloat the document when every new MediaTek
SoC is introduced.

So instead I refer to existing Nvidia device added in 8250.txt  which I
thought its way is simple, elegant and also using pattern I can use to
add btif devices.



Working on my email backlog after vactions I didn't see that this was accepted 
by Rob. Sorry for the noise.


Matthias


Re: [PATCH v7 12/12] powerpc/vas: Define copy/paste interfaces

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Hi Suka,
> 
> A few more things ...
> 
> Sukadev Bhattiprolu  writes:
> 
> > diff --git a/arch/powerpc/platforms/powernv/copy-paste.h 
> > b/arch/powerpc/platforms/powernv/copy-paste.h
> > new file mode 100644
> > index 000..7783bb8
> > --- /dev/null
> > +++ b/arch/powerpc/platforms/powernv/copy-paste.h
> > @@ -0,0 +1,74 @@
> > +/*
> > + * Copyright 2016 IBM Corp.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version
> > + * 2 of the License, or (at your option) any later version.
> > + */
> > +
> > +/*
> > + * Macros taken from 
> > tools/testing/selftests/powerpc/context_switch/cp_abort.c
> > + */
> 
> These are both out of date, they're changed in v3.0B.
> 
> > +#define PASTE(RA, RB, L, RC) \
> > +   .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) \
> > + | (L) << (31-10) | (RC) << (31-31))
> 
> You should define PPC_PASTE() in ppc-opcode.h
> 
> We already have PPC_INST_PASTE, so use that.
> 
> L and RC are gone.

Ok. I thought they would come back later, but of course we can update
these kernel-only calls then.

> 
> > +
> > +#define COPY(RA, RB, L) \
> > +   .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) \
> > + | (L) << (31-10))
> 
> Use PPC_COPY().
> 

Ok

> > +
> > +#define CR0_FXM"0x80"
> 
> I don't think a #define for this helps readability.
> 
> > +#define CR0_SHIFT  28
> > +#define CR0_MASK   0xF
> 
> Not used.

Will need them now to return value in cr0?
> 
> > +/*
> > + * Copy/paste instructions:
> > + *
> > + * copy RA,RB,L
> > + * Copy contents of address (RA) + effective_address(RB)
> > + * to internal copy-buffer.
> > + *
> > + * L == 1 indicates this is the first copy.
> > + *
> > + * L == 0 indicates its a continuation of a prior first copy.
> > + *
> > + * paste RA,RB,L
> > + * Paste contents of internal copy-buffer to the address
> > + * (RA) + effective_address(RB)
> > + *
> > + * L == 0 indicates its a continuation of a prior paste. i.e.
> > + * don't wait for the completion or update status.
> > + *
> > + * L == 1 indicates this is the last paste in the group (i.e.
> > + * wait for the group to complete and update status in CR0).
> > + *
> > + * For Power9, the L bit must be 'true' in both copy and paste.
> > + */
> > +
> > +static inline int vas_copy(void *crb, int offset, int first)
> > +{
> > +   WARN_ON_ONCE(!first);
> 
> Please change the API to not require unused parameters.
> 
> Same for offset.

Ok, Haren's NX patches will need to drop those parameters as well.

> 
> > +
> > +   __asm__ __volatile(stringify_in_c(COPY(%0, %1, %2))";"
> 
> I've never seen __volatile before.
> 
> Just use: asm volatile

ok
> 
> 
> > +   :
> > +   : "b" (offset), "b" (crb), "i" (1)
> > +   : "memory");
> > +
> > +   return 0;
> > +}
> > +
> > +static inline int vas_paste(void *paste_address, int offset, int last)
> > +{
> > +   unsigned long long cr;
> 
> cr is 32-bits actually.

ok
> 
> > +   WARN_ON_ONCE(!last);
> > +
> > +   cr = 0;
> > +   __asm__ __volatile(stringify_in_c(PASTE(%1, %2, 1, 1))";"
> > +   "mfocrf %0," CR0_FXM ";"
> > +   : "=r" (cr)
> > +   : "b" (paste_address), "b" (offset)
> > +   : "memory");
> 
> You need cr0 in the clobbers.

ok
> 
> > +
> > +   return cr;
> 
> I think it would be more natural if you just returned CR0, so if you did
> shift and mask with the CR0 constants you have above.
> 
ok

> 
> > diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
> > b/arch/powerpc/platforms/powernv/vas-window.c
> > index 70762c3..73081b4 100644
> > --- a/arch/powerpc/platforms/powernv/vas-window.c
> > +++ b/arch/powerpc/platforms/powernv/vas-window.c
> > @@ -1040,6 +1041,57 @@ struct vas_window *vas_tx_win_open(int vasid, enum 
> > vas_cop_type cop,
> >  }
> >  EXPORT_SYMBOL_GPL(vas_tx_win_open);
> >  
> > +int vas_copy_crb(void *crb, int offset, bool first)
> > +{
> > +   if (!vas_initialized())
> > +   return -1;
> > +
> > +   return vas_copy(crb, offset, first);
> > +}
> > +EXPORT_SYMBOL_GPL(vas_copy_crb);
> > +
> > +#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
> > +int vas_paste_crb(struct vas_window *txwin, int offset, bool last, bool re)
> > +{
> > +   int rc;
> > +   uint64_t val;
> > +   void *addr;
> > +
> > +   if (!vas_initialized())
> > +   return -1;
> 
> This is in the fast path, or at least the runtime path. So I don't think
> these checks are wanted, how would we have got this far if vas wasn't
> initialised?

Yes, I have dropped vas_initialized() now.
> 
> 
> 
> cheers



linux-next: manual merge of the xen-tip tree with the tip tree

2017-08-27 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the xen-tip tree got conflicts in:

  arch/x86/xen/xen-asm.S
  arch/x86/xen/xen-asm_64.S

between commit:

  edcb5cf84f05 ("x86/paravirt/xen: Remove xen_patch()")

from the tip tree and commits:

  ad5b8c4ba323("xen: get rid of paravirt op adjust_exception_frame")
  bd830917233b ("paravirt,xen: remove xen_patch()")

from the xen-tip tree.

I fixed it up (edcb5cf84f05 and bd830917233b ate more or less the same
patch, so I just used the latter version files) and can carry the fix
as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell


Re: [PATCH 3/3] IPI: Avoid to use 2 cache lines for one call_single_data

2017-08-27 Thread Huang, Ying
"Huang, Ying"  writes:

> Hi, Peter,
>
> "Huang, Ying"  writes:
>
>> Peter Zijlstra  writes:
>>
>>> On Sat, Aug 05, 2017 at 08:47:02AM +0800, Huang, Ying wrote:
 Yes.  That looks good.  So you will prepare the final patch?  Or you
 hope me to do that?
>>>
>>> I was hoping you'd do it ;-)
>>
>> Thanks!  Here is the updated patch
>>
>> Best Regards,
>> Huang, Ying
>>
>> -->8--
>> From 957735e9ff3922368286540dab852986fc7b23b5 Mon Sep 17 00:00:00 2001
>> From: Huang Ying 
>> Date: Mon, 7 Aug 2017 16:55:33 +0800
>> Subject: [PATCH -v3] IPI: Avoid to use 2 cache lines for one
>>  call_single_data
>>
>> struct call_single_data is used in IPI to transfer information between
>> CPUs.  Its size is bigger than sizeof(unsigned long) and less than
>> cache line size.  Now, it is allocated with no explicit alignment
>> requirement.  This makes it possible for allocated call_single_data to
>> cross 2 cache lines.  So that double the number of the cache lines
>> that need to be transferred among CPUs.
>>
>> This is resolved by requiring call_single_data to be aligned with the
>> size of call_single_data.  Now the size of call_single_data is the
>> power of 2.  If we add new fields to call_single_data, we may need to
>> add pads to make sure the size of new definition is the power of 2.
>> Fortunately, this is enforced by gcc, which will report error for not
>> power of 2 alignment requirement.
>>
>> To set alignment requirement of call_single_data to the size of
>> call_single_data, a struct definition and a typedef is used.
>>
>> To test the effect of the patch, we use the vm-scalability multiple
>> thread swap test case (swap-w-seq-mt).  The test will create multiple
>> threads and each thread will eat memory until all RAM and part of swap
>> is used, so that huge number of IPI will be triggered when unmapping
>> memory.  In the test, the throughput of memory writing improves ~5%
>> compared with misaligned call_single_data because of faster IPI.
>
> What do you think about this version?
>

Ping.

Best Regards,
Huang, Ying


Re: [PATCH 2/2 v2] sched/wait: Introduce lock breaker in wake_up_page_bit

2017-08-27 Thread Linus Torvalds
On Sun, Aug 27, 2017 at 6:29 PM, Nicholas Piggin  wrote:
>
> BTW. since you are looking at this stuff, one other small problem I remember
> with exclusive waiters is that losing to a concurrent locker puts them to
> the back of the queue. I think that could be fixed with some small change to
> the wait loops (first add to tail, then retries add to head). Thoughts?

No, not that way.

First off, it's oddly complicated, but more importantly, the real
unfairness you lose to is not other things on the wait queue, but to
other lockers that aren't on the wait-queue at all, but instead just
come in and do a "test-and-set" without ever even going through the
slow path.

So instead of playing queuing games, you'd need to just change the
unlock sequence. Right now we basically do:

 - clear lock bit and atomically test if contended (and we play games
with bit numbering to do that atomic test efficiently)

 - if contended, wake things up

and you'd change the logic to be

 - if contended, don't clear the lock bit at all, just transfer the
lock ownership directly to the waiters by walking the wait list

 - clear the lock bit only once there are no more wait entries (either
because there were no waiters at all, or because all the entries were
just waiting for the lock to be released)

which is certainly doable with a couple of small extensions to the
page wait key data structure.

But most of my clever schemes the last few days were abject failures,
and honestly, it's late in the rc.

In fact, this late in the game I probably wouldn't even have committed
the small cleanups I did if it wasn't for the fact that thinking of
the whole WQ_FLAG_EXCLUSIVE bit made me find the bug.

So the cleanups were actually what got me to look at the problem in
the first place, and then I went "I'm going to commit the cleanup, and
then I can think about the bug I just found".

I'm just happy that the fix seems to be trivial. I was afraid I'd have
to do something nastier (like have the EINTR case send another
explicit wakeup to make up for the lost one, or some ugly hack like
that).

It was only when I started looking at the history of that code, and I
saw the old bit_lock code, and I went "Hmm. That has the _same_ bug -
oh wait, no it doesn't!" that I realized that there was that simple
fix.

You weren't cc'd on the earlier part of the discussion, you only got
added when I realized what the history and simple fix was.

   Linus


Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system

2017-08-27 Thread Dou Liyang

Hi Juergen,

At 08/28/2017 12:32 PM, Juergen Gross wrote:

On 28/08/17 06:25, Juergen Gross wrote:

On 28/08/17 05:20, Dou Liyang wrote:

XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus()
initializes interrupts in the XEN PV specific way and does not invoke
native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is
not invoked either.

The invocation of x86_init.intr_mode_init() will be moved from
native_smp_prepare_cpus() in a follow up patch to solve .


Can you be a little bit more precise here, please? :-)


That move would cause the invocation of x86_init.intr_mode_init() for XEN
PV platforms. To prevent that, override the default x86_init.
intr_mode_init() callback with a noop().

[Rewrited by Thomas Gleixner ]

Signed-off-by: Dou Liyang 
Cc: xen-de...@lists.xenproject.org
Cc: boris.ostrov...@oracle.com


On which tree does this apply? Would be nice to get a hint against which
source this can be reviewed.


Aah, just found the rest of the series. In case a single patch of a
series isn't stand alone it would be nice to receive at least the cover
letter of the series in order to know what its all about.


Sorry to confuse you, It's my fault.

Thank you for your reply. I understood. will CC the cover letter to
linux-xen and linux-acpi.

Thanks,
dou.



Juergen








Re: [PATCH v7 10/12] powerpc/vas: Define vas_win_close() interface

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Hi Suka,
> 
> More comments :)

Thanks!

> 
> Sukadev Bhattiprolu  writes:
> 
> > diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
> > b/arch/powerpc/platforms/powernv/vas-window.c
> > index 2dd4b63..24288dd 100644
> > --- a/arch/powerpc/platforms/powernv/vas-window.c
> > +++ b/arch/powerpc/platforms/powernv/vas-window.c
> > @@ -879,11 +887,92 @@ struct vas_window *vas_rx_win_open(int vasid, enum 
> > vas_cop_type cop,
> >  }
> >  EXPORT_SYMBOL_GPL(vas_rx_win_open);
> >  
> > -/* stub for now */
> > +static void poll_window_busy_state(struct vas_window *window)
> > +{
> > +   int busy;
> > +   uint64_t val;
> > +
> > +retry:
> > +   /*
> > +* Poll Window Busy flag
> > +*/
> > +   val = read_hvwc_reg(window, VREG(WIN_STATUS));
> > +   busy = GET_FIELD(VAS_WIN_BUSY, val);
> > +   if (busy) {
> > +   val = 0;
> > +   schedule_timeout(2000);
> 
> What's 2000?
> 
> That's in jiffies, so it's not a fixed amount of time.
> 
> But on a typical config that will be 20 _seconds_ ?!

Ok. Should I change to that just HZ and

> 
> But you haven't set the task state, so AFAIK it will just return
> instantly.

call set_current_state(TASK_UNINTERRUPTIBLE) before the schedule_timeout()?

> 
> And if there's a software/hardware bug and it never stops being busy,
> then we have a softlockup. The other option would be print a big fat
> warning and just not free the window. But maybe that doesn't work for
> other reasons.
> 
> > +   goto retry;
> > +   }
> > +}
> > +
> > +static void poll_window_castout(struct vas_window *window)
> > +{
> > +   int cached;
> > +   uint64_t val;
> > +
> > +   /* Cast window context out of the cache */
> > +retry:
> > +   val = read_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL));
> > +   cached = GET_FIELD(VAS_WIN_CACHE_STATUS, val);
> > +   if (cached) {
> > +   val = 0ULL;
> > +   val = SET_FIELD(VAS_CASTOUT_REQ, val, 1);
> > +   val = SET_FIELD(VAS_PUSH_TO_MEM, val, 0);
> > +   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
> 
> Sigh, I still don't like that macro :)

:-) For one thing, I have used it a lot now and secondly isn't it easier
to know that VAS_CASTOUT_REQ bit is set to 1 without worrying about its
bit position? When debugging, yes we have to ensure VAS_CASTOUT_REQ is
properly defined and we have to work out value in "val".

> 
> or:
>   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 1ull << 63);
> 
> > +
> > +   schedule_timeout(2000);
> > +   goto retry;
> > +   }
> > +}
> > +
> > +/*
> > + * Close a window.
> > + *
> > + * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
> > + * - Disable new paste operations (unmap paste address)
> > + * - Poll for the "Window Busy" bit to be cleared
> > + * - Clear the Open/Enable bit for the Window.
> > + * - Poll for return of window Credits (implies FIFO empty for Rx win?)
> > + * - Unpin and cast window context out of cache
> > + *
> > + * Besides the hardware, kernel has some bookkeeping of course.
> > + */
> >  int vas_win_close(struct vas_window *window)
> >  {
> > -   return -1;
> > +   uint64_t val;
> > +
> > +   if (!window)
> > +   return 0;
> > +
> > +   if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
> > +   pr_devel("VAS: Attempting to close an active Rx window!\n");
> > +   WARN_ON_ONCE(1);
> > +   return -EAGAIN;
> 
> EAGAIN means "if you do the same thing again it might work".
> 
> I don't think that's right here. The window is not in a state where it
> can be freed, the caller needs to do something to fix that.
> 
> EBUSY would probably be more appropriate.

Ok. Should not happen now (or even with the fast thread-wake up code)
since only the kernel should be closing the windows - so its really a
bug.  Will change to EBUSY though.
> 
> 
> cheers



Re: [PATCH V1] thermal: qcom-spmi-temp-alarm: add support for GEN2 PMIC peripherals

2017-08-27 Thread kgunda

On 2017-08-26 04:49, Stephen Boyd wrote:

On 08/25, Zhang Rui wrote:

On Thu, 2017-08-17 at 13:12 +0530, kgu...@codeaurora.org wrote:
> On 2017-08-16 17:53, kgu...@codeaurora.org wrote:
> >
> > On 2017-08-08 13:42, Zhang Rui wrote:
> > >
> > > On Thu, 2017-07-13 at 17:39 +0530, Kiran Gunda wrote:
> > > >
> > > > From: David Collins 
> > > >
> > > > Add support for the TEMP_ALARM GEN2 PMIC peripheral
> > > > subtype.  The
> > > > GEN2 subtype defines an over temperature state with hysteresis
> > > > instead of stage in the status register.  There are two GEN2
> > > > states corresponding to stages 1 and 2.
> > > >
> > > > Signed-off-by: David Collins 
> > > > Signed-off-by: Kiran Gunda 
> > > Ivan,
> > >
> > > can you please review this patch and let me know your opinion?
> > >
> > > thanks,
> > > rui
> > Ivan,
> > Could you please review this patch ?
> >
> > Thanks,
> > Kiran
> Looks like Ivan is no more reviewing the patches for qcom.
> Adding Bjorn and Stephen Boyd for the review.
>
Given this is a platform specific change, I will queue it for next
merge window, and let's see if there is any problem reported.



Thanks for that !


FWIW,

Reviewed-by: Stephen Boyd 


[PATCH] tracing: make dynamic types can use __TRACE_LAST_TYPE

2017-08-27 Thread Zhou Chengming
Obviously, trace_events that defined staticly in trace.h won't use
__TRACE_LAST_TYPE, so make dynamic types can use it. And some
minor changes to trace_search_list() to make code clearer.

Signed-off-by: Zhou Chengming 
---
 kernel/trace/trace_output.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bac629a..dcb146f 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -19,7 +19,7 @@
 
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
-static int next_event_type = __TRACE_LAST_TYPE + 1;
+static int next_event_type = __TRACE_LAST_TYPE;
 
 enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
 {
@@ -696,7 +696,7 @@ static int trace_search_list(struct list_head **list)
 
if (list_empty(&ftrace_event_list)) {
*list = &ftrace_event_list;
-   return last + 1;
+   return last;
}
 
/*
@@ -704,17 +704,17 @@ static int trace_search_list(struct list_head **list)
 * lets see if somebody freed one.
 */
list_for_each_entry(e, &ftrace_event_list, list) {
-   if (e->type != last + 1)
+   if (e->type != last)
break;
last++;
}
 
/* Did we used up all 65 thousand events??? */
-   if ((last + 1) > TRACE_EVENT_TYPE_MAX)
+   if (last > TRACE_EVENT_TYPE_MAX)
return 0;
 
*list = &e->list;
-   return last + 1;
+   return last;
 }
 
 void trace_event_read_lock(void)
@@ -777,7 +777,7 @@ int register_trace_event(struct trace_event *event)
 
list_add_tail(&event->list, list);
 
-   } else if (event->type > __TRACE_LAST_TYPE) {
+   } else if (event->type >= __TRACE_LAST_TYPE) {
printk(KERN_WARNING "Need to add type to trace.h\n");
WARN_ON(1);
goto out;
-- 
1.8.3.1



Re: [PATCH v7 08/12] powerpc/vas: Define vas_win_id()

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Sukadev Bhattiprolu  writes:
> 
> > Define an interface to return a system-wide unique id for a given VAS
> > window.
> >
> > The vas_win_id() will be used in a follow-on patch to generate an unique
> > handle for a user space receive window. Applications can use this handle
> > to pair send and receive windows for fast thread-wakeup.
> >
> > The hardware refers to this system-wide unique id as a Partition Send
> > Window ID which is expected to be used during fault handling. Hence the
> > "pswid" in the function names.
> 
> Same comment as previous patch.

Ok will drop them for now.

> 
> cheers



Re: [PATCH v7 06/12] powerpc/vas: Define helpers to alloc/free windows

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Sukadev Bhattiprolu  writes:
> > diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
> > b/arch/powerpc/platforms/powernv/vas-window.c

> > +   rc = ida_pre_get(ida, GFP_KERNEL);
> > +   if (!rc)
> > +   return -EAGAIN;
> > +
> > +   spin_lock(&vas_ida_lock);
> > +   rc = ida_get_new_above(ida, 0, &winid);
> 
> If you're passing 0 you can just use ida_get_new().

Ok.

> 
> Or did you actually want to exclude 0? In which case you should pass 1.
> 
> > +   spin_unlock(&vas_ida_lock);
> > +
> > +   if (rc)
> > +   return rc;
> 
> You're supposed to handle EAGAIN I thought.

Yes, I will retry the pre_get()
> 
> > +
> > +   if (winid > VAS_WINDOWS_PER_CHIP) {
> > +   pr_err("VAS: Too many (%d) open windows\n", winid);
> > +   vas_release_window_id(ida, winid);
> > +   return -EAGAIN;
> > +   }
> > +
> > +   return winid;
> > +}
> > +
> > +void vas_window_free(struct vas_window *window)
> 
> static.

Ok

> 
> > +{
> > +   int winid = window->winid;
> > +   struct vas_instance *vinst = window->vinst;
> > +
> > +   unmap_winctx_mmio_bars(window);
> > +   kfree(window);
> > +
> > +   vas_release_window_id(&vinst->ida, winid);
> > +}
> > +
> > +struct vas_window *vas_window_alloc(struct vas_instance *vinst)
> > +{
> > +   int winid;
> > +   struct vas_window *window;
> > +
> > +   winid = vas_assign_window_id(&vinst->ida);
> > +   if (winid < 0)
> > +   return ERR_PTR(winid);
> > +
> > +   window = kzalloc(sizeof(*window), GFP_KERNEL);
> > +   if (!window)
> > +   return ERR_PTR(-ENOMEM);
> 
> You leak an id here.

Argh. Yes.

> 
> The error handling would be easier in here if the caller did the alloc,
> or if you split alloc and init, and alloc just did the kzalloc().

I was trying to simplify error handling in the callers where they have
to only deal with one failure now.
> 
> One of the callers even prints "unable to allocate memory" if this
> function fails, but that's not accurate, there's several failure modes.

Yes, will fix that message and the leaks.

Thanks,

Suka



linux-next: manual merge of the kvm tree with the tip tree

2017-08-27 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the kvm tree got a conflict in:

  arch/x86/kvm/mmu.c

between commit:

  ea2800ddb20d ("kvm/x86: Avoid clearing the C-bit in rsvd_bits()")

from the tip tree and commit:

  d6321d493319 ("KVM: x86: generalize guest_cpuid_has_ helpers")

from the kvm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc arch/x86/kvm/mmu.c
index 04d750813c9d,2a8a6e3e2a31..
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -4116,21 -4157,11 +4162,21 @@@ reset_shadow_zero_bits_mask(struct kvm_
 * Passing "true" to the last argument is okay; it adds a check
 * on bit 8 of the SPTEs which KVM doesn't use anyway.
 */
 -  __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check,
 +  shadow_zero_check = &context->shadow_zero_check;
 +  __reset_rsvds_bits_mask(vcpu, shadow_zero_check,
boot_cpu_data.x86_phys_bits,
context->shadow_root_level, uses_nx,
-   guest_cpuid_has_gbpages(vcpu), is_pse(vcpu),
-   true);
+   guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+   is_pse(vcpu), true);
 +
 +  if (!shadow_me_mask)
 +  return;
 +
 +  for (i = context->shadow_root_level; --i >= 0;) {
 +  shadow_zero_check->rsvd_bits_mask[0][i] &= ~shadow_me_mask;
 +  shadow_zero_check->rsvd_bits_mask[1][i] &= ~shadow_me_mask;
 +  }
 +
  }
  EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
  


Re: [PATCH v7 05/12] powerpc/vas: Define helpers to init window context

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Sukadev Bhattiprolu  writes:
> > diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
> > b/arch/powerpc/platforms/powernv/vas-window.c
> > index a3a705a..3a50d6a 100644
> > --- a/arch/powerpc/platforms/powernv/vas-window.c
> > +++ b/arch/powerpc/platforms/powernv/vas-window.c
> > @@ -11,6 +11,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  
> >  #include "vas.h"
> >  
> > @@ -185,6 +186,310 @@ int map_winctx_mmio_bars(struct vas_window *window)
> > return 0;
> >  }
> >  
> > +/*
> > + * Reset all valid registers in the HV and OS/User Window Contexts for
> > + * the window identified by @window.
> > + *
> > + * NOTE: We cannot really use a for loop to reset window context. Not all
> > + *  offsets in a window context are valid registers and the valid
> > + *  registers are not sequential. And, we can only write to offsets
> > + *  with valid registers (or is that only in Simics?).
> 
> I assume there's no "reset everything" register we can write to do this
> for us?

Checked with the hardware team and they said there is no "reset everything"
register. While there are some tricky ways to clear the context, writing
zeroes is the easiest.

> 
> Also if you can clean up the comment to not mention Simics, I would
> assume that applies on real hardware too.
> 
> > + */
> > +void reset_window_regs(struct vas_window *window)
> > +{
> > +   write_hvwc_reg(window, VREG(LPID), 0ULL);
> > +   write_hvwc_reg(window, VREG(PID), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(AMR), 0ULL);
> > +   write_hvwc_reg(window, VREG(SEIDR), 0ULL);
> > +   write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
> > +   write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
> > +   write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
> > +   write_hvwc_reg(window, VREG(PSWID), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE1), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE2), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE3), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE4), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE5), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE6), 0ULL);
> 
> Should we be writing to spare registers? Presumably in a future hardware
> revision they might have some unknown purpose.

Sure, will skip those.

> 
> > +   write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
> > +   write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
> > +   write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
> > +   write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
> > +   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
> > +   write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
> > +   write_hvwc_reg(window, VREG(WINCTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
> > +   write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
> > +   write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
> > +
> > +   /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
> > +
> > +   /*
> > +* The send and receive window credit adder registers are also
> > +* accessible from HVWC and have been initialized above. We don't
> > +* need to initialize from the OS/User Window Context, so skip
> > +* following calls:
> > +*
> > +*  write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
> > +*  write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
> > +*/
> > +}
> > +
> > +/*
> > + * Initialize window context registers related to Address Translation.
> > + * These registers are common to send/receive windows although they
> > + * differ for user/kernel windows. As we resolve the TODOs we may
> > + * want to add fields to vas_winctx and move the initialization to
> > + * init_vas_winctx_regs().
> > + */
> > +static void init_xlate_regs(struct vas_window *window, bool user_win)
> > +{
> > +   uint64_t lpcr, val;
> > +
> > +   /*
> > +* MSR_TA, MSR_US are false for both kernel and user.
> > +* MSR_DR and MSR_PR are false for kernel.
> > +*/
> > +   val = 0ULL;
> > +   val = SET_FIELD(VAS_XLATE_MSR_HV, val, true);
> 
> Using a bool here presumably works, but if you actua

Re: Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce

2017-08-27 Thread Paul Mackerras
On Sun, Aug 27, 2017 at 10:02:20PM +0100, Al Viro wrote:
> On Wed, Aug 23, 2017 at 04:06:24PM +1000, Paul Mackerras wrote:
> 
> > It seems to me that it would be better to do the anon_inode_getfd()
> > call before the kvm_get_kvm() call, and go to the fail label if it
> > fails.
> 
> And what happens if another thread does close() on the (guessed) fd?

Chaos ensues, but mostly because we don't have proper mutual exclusion
on the modifications to the list.  I'll add a mutex_lock/unlock to
kvm_spapr_tce_release() and move the anon_inode_getfd() call inside
the mutex.

It looks like the other possible uses of the fd (mmap, and passing it
as a parameter to the KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM
device fd) are safe.

Thanks,
Paul.


Re: [PATCH] Fix compat_sys_sigpending breakage introduced by v4.13-rc1~6^2~12

2017-08-27 Thread Al Viro
On Sun, Aug 06, 2017 at 07:22:03PM +0100, Al Viro wrote:

> I would pick it through my tree, but the local network is half-disasembled
> for move (containers arrive tomorrow, flight to Boston on 9th, stuff should
> arrive there by the weekend, so I hope to be back to normal by the 14th
> or so, assuming I'll have any sanity left by that time).

... and that hope had turned out to be far too optimistic.  Getting the things
back into working shape took two weeks longer than that; by now most of the
damage has been dealt with.  Dmitry's followups applied to for-next queue, with
apologies for delay.


Re: [PATCH v7 04/12] powerpc/vas: Define helpers to access MMIO regions

2017-08-27 Thread Sukadev Bhattiprolu
Michael Ellerman [m...@ellerman.id.au] wrote:
> Hi Suka,
> 
> Comments inline.
> 
> Sukadev Bhattiprolu  writes:
> > diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
> > b/arch/powerpc/platforms/powernv/vas-window.c
> > index 6156fbe..a3a705a 100644
> > --- a/arch/powerpc/platforms/powernv/vas-window.c
> > +++ b/arch/powerpc/platforms/powernv/vas-window.c
> > @@ -9,9 +9,182 @@
> >  
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >  
> >  #include "vas.h"
> >  
> > +/*
> > + * Compute the paste address region for the window @window using the
> > + * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
> > + */
> > +void compute_paste_address(struct vas_window *window, uint64_t *addr, int 
> > *len)
> > +{
> > +   uint64_t base, shift;
> 
> Please use the kernel types, so u64 here.

Ok.

> 
> > +   int winid;
> > +
> > +   base = window->vinst->paste_base_addr;
> > +   shift = window->vinst->paste_win_id_shift;
> > +   winid = window->winid;
> > +
> > +   *addr  = base + (winid << shift);
> > +   if (len)
> > +   *len = PAGE_SIZE;
> 
> Having multiple output parameters makes for a pretty awkward API. Is it
> really necesssary given len is a constant PAGE_SIZE anyway.
> 
> If you didn't return len, then you could just make the function return
> the addr, and you wouldn't need any output parameters.

I agree, I went back and forth on it. I was trying to avoid callers
making assumptions on the size. But since there are just a couple
of places, I guess we could have them assume PAGE_SIZE.

> 
> One of the callers that passes len is unmap_paste_region(), but that
> is a bit odd. It would be more natural I think if once a window is
> mapped it knows its size. Or if the mapping will always just be one page
> then we can just know that.

Agree, since the len values are constant I was trying to avoid saving
them in each of the 64K windows - so the compute during unmap. Will change
to assume  PAGE_SIZE.

Also agree with other comments here.



Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system

2017-08-27 Thread Juergen Gross
On 28/08/17 06:25, Juergen Gross wrote:
> On 28/08/17 05:20, Dou Liyang wrote:
>> XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus()
>> initializes interrupts in the XEN PV specific way and does not invoke
>> native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is
>> not invoked either.
>>
>> The invocation of x86_init.intr_mode_init() will be moved from
>> native_smp_prepare_cpus() in a follow up patch to solve > REASON/PROBLEM>.
> 
> Can you be a little bit more precise here, please? :-)
> 
>> That move would cause the invocation of x86_init.intr_mode_init() for XEN
>> PV platforms. To prevent that, override the default x86_init.
>> intr_mode_init() callback with a noop().
>>
>> [Rewrited by Thomas Gleixner ]
>>
>> Signed-off-by: Dou Liyang 
>> Cc: xen-de...@lists.xenproject.org
>> Cc: boris.ostrov...@oracle.com
> 
> On which tree does this apply? Would be nice to get a hint against which
> source this can be reviewed.

Aah, just found the rest of the series. In case a single patch of a
series isn't stand alone it would be nice to receive at least the cover
letter of the series in order to know what its all about.


Juergen


[PATCH] IB/rxe: constify vm_operations_struct

2017-08-27 Thread Arvind Yadav
vm_operations_struct are not supposed to change at runtime.
vm_area_struct structure working with const vm_operations_struct.
So mark the non-const vm_operations_struct structs as const.

Signed-off-by: Arvind Yadav 
---
 drivers/infiniband/sw/rxe/rxe_mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c 
b/drivers/infiniband/sw/rxe/rxe_mmap.c
index bd812e0..d22431e 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, rxe_mmap_release);
 }
 
-static struct vm_operations_struct rxe_vm_ops = {
+static const struct vm_operations_struct rxe_vm_ops = {
.open = rxe_vma_open,
.close = rxe_vma_close,
 };
-- 
1.9.1



[PATCH] IB/hfi1: constify vm_operations_struct

2017-08-27 Thread Arvind Yadav
vm_operations_struct are not supposed to change at runtime.
vm_area_struct structure working with const vm_operations_struct.
So mark the non-const vm_operations_struct structs as const.

Signed-off-by: Arvind Yadav 
---
 drivers/infiniband/hw/hfi1/file_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c 
b/drivers/infiniband/hw/hfi1/file_ops.c
index 3158128..46db68f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -116,7 +116,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int 
cmd,
.llseek = noop_llseek,
 };
 
-static struct vm_operations_struct vm_ops = {
+static const struct vm_operations_struct vm_ops = {
.fault = vma_fault,
 };
 
-- 
1.9.1



Re: [Xen-devel] [PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system

2017-08-27 Thread Juergen Gross
On 28/08/17 05:20, Dou Liyang wrote:
> XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus()
> initializes interrupts in the XEN PV specific way and does not invoke
> native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is
> not invoked either.
> 
> The invocation of x86_init.intr_mode_init() will be moved from
> native_smp_prepare_cpus() in a follow up patch to solve  REASON/PROBLEM>.

Can you be a little bit more precise here, please? :-)

> That move would cause the invocation of x86_init.intr_mode_init() for XEN
> PV platforms. To prevent that, override the default x86_init.
> intr_mode_init() callback with a noop().
> 
> [Rewrited by Thomas Gleixner ]
> 
> Signed-off-by: Dou Liyang 
> Cc: xen-de...@lists.xenproject.org
> Cc: boris.ostrov...@oracle.com

On which tree does this apply? Would be nice to get a hint against which
source this can be reviewed.


Juergen

> ---
>  arch/x86/xen/enlighten_pv.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 811e4dd..07147dd 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -1250,6 +1250,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
>   x86_platform.get_nmi_reason = xen_get_nmi_reason;
>  
>   x86_init.resources.memory_setup = xen_memory_setup;
> + x86_init.irqs.intr_mode_init= x86_init_noop;
>   x86_init.oem.arch_setup = xen_arch_setup;
>   x86_init.oem.banner = xen_banner;
>  
> 



linux-next: build failure after merge of the rcu tree

2017-08-27 Thread Stephen Rothwell
Hi Paul,

After merging the rcu tree, today's linux-next build (arm
multi_v7_defconfig) failed like this:

In file included from arch/arm/kernel/asm-offsets.c:14:0:
include/linux/sched.h: In function 'membarrier_sched_out':
include/linux/sched.h:1680:3: error: implicit declaration of function 
'sync_core' [-Werror=implicit-function-declaration]
   sync_core();
   ^

Caused by commit

  0d6eb99818da ("membarrier: Provide register sync core cmd")

I have used the rcu tree from next-20170825 for today.

-- 
Cheers,
Stephen Rothwell


Re: [PATCH 3.18 00/10] 3.18.67-stable review

2017-08-27 Thread Greg Kroah-Hartman
On Sun, Aug 27, 2017 at 09:49:48AM -0700, Guenter Roeck wrote:
> On Tue, Aug 22, 2017 at 12:09:32PM -0700, Greg Kroah-Hartman wrote:
> > This is the start of the stable review cycle for the 3.18.67 release.
> > There are 10 patches in this series, all will be posted as a response
> > to this one.  If anyone has any issues with these being applied, please
> > let me know.
> > 
> > Responses should be made by Thu Aug 24 19:08:42 UTC 2017.
> > Anything received after that time might be too late.
> > 
> 
> Better late than never. Sorry, I was out of town.

Not a problem, so was I :)

> 
> Build results:
>   total: 136 pass: 136 fail: 0
> Qemu test results:
>   total: 111 pass: 111 fail: 0

Thanks for the report for all of these.

greg k-h


Re: [PATCH net-next v7 04/10] bpf: Define handle_fs and add a new helper bpf_handle_fs_get_mode()

2017-08-27 Thread James Morris
On Mon, 21 Aug 2017, Mickaël Salaün wrote:

> @@ -85,6 +90,8 @@ enum bpf_arg_type {
>  
>   ARG_PTR_TO_CTX, /* pointer to context */
>   ARG_ANYTHING,   /* any (initialized) argument is ok */
> +
> + ARG_CONST_PTR_TO_HANDLE_FS, /* pointer to an abstract FS struct */
>  };

Looks like a spurious empty line.

-- 
James Morris



linux-next: manual merge of the tip tree with the spi tree

2017-08-27 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the tip tree got a conflict in:

  tools/Makefile

between commit:

  e9d4650dcc59 ("spi: tools: add install section")

from the spi tree and commit:

  ecda85e70277 ("x86/lguest: Remove lguest support")

from the tip tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc tools/Makefile
index 616e7722b327,a19b176b914b..
--- a/tools/Makefile
+++ b/tools/Makefile
@@@ -90,8 -89,8 +89,8 @@@ freefall: FORC
  kvm_stat: FORCE
$(call descend,kvm/$@)
  
- all: acpi cgroup cpupower gpio hv firewire lguest liblockdep \
+ all: acpi cgroup cpupower gpio hv firewire liblockdep \
 -  perf selftests turbostat usb \
 +  perf selftests spi turbostat usb \
virtio vm net x86_energy_perf_policy \
tmon freefall objtool kvm_stat
  
@@@ -101,7 -100,7 +100,7 @@@ acpi_install
  cpupower_install:
$(call descend,power/$(@:_install=),install)
  
- cgroup_install firewire_install gpio_install hv_install lguest_install 
perf_install spi_install usb_install virtio_install vm_install net_install 
objtool_install:
 -cgroup_install firewire_install gpio_install hv_install perf_install 
usb_install virtio_install vm_install net_install objtool_install:
++cgroup_install firewire_install gpio_install hv_install perf_install 
spi_install usb_install virtio_install vm_install net_install objtool_install:
$(call descend,$(@:_install=),install)
  
  liblockdep_install:


Re: [kernel-hardening] Re: [PATCH net-next v7 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier

2017-08-27 Thread James Morris
On Wed, 23 Aug 2017, Mickaël Salaün wrote:

> >> +  struct {
> >> +  __u32   abi; /* minimal ABI version, cf. user doc */
> > 
> > the concept of abi (version) sounds a bit weird to me.
> > Why bother with it at all?
> > Once the first set of patches lands the kernel as whole will have landlock 
> > feature
> > with a set of helpers, actions, event types.
> > Some future patches will extend the landlock feature step by step.
> > This abi concept assumes that anyone who adds new helper would need
> > to keep incrementing this 'abi'. What value does it give to user or to 
> > kernel?
> > The users will already know that landlock is present in kernel 4.14 or 
> > whatever
> > and the kernel 4.18 has more landlock features. Why bother with extra abi 
> > number?
> 
> That's right for helpers and context fields, but we can't check the use
> of one field's content. The status field is intended to be a bitfield
> extendable in the future. For example, one use case is to set a flag to
> inform the eBPF program that it was already called with the same context
> and can skip most of its check (if not related to maps). Same goes for
> the FS action bitfield, one may want to add more of them. Another
> example may be the check for abilities. We may want to relax/remove the
> capability require to set one of them. With an ABI version, the user can
> easily check if the current kernel support that.

Don't call it an ABI, perhaps minimum policy version (similar to 
what SELinux does).  Changes need to be made so that any existing 
userspace still works.



-- 
James Morris



Re: [PATCH net-next v7 02/10] bpf: Add eBPF program subtype and is_valid_subtype() verifier

2017-08-27 Thread James Morris
On Tue, 22 Aug 2017, Alexei Starovoitov wrote:

> more general question: what is the status of security/ bits?
> I'm assuming they still need to be reviewed and explicitly acked by James, 
> right?

Yep, along with other core security developers where possible.


-- 
James Morris




Re: [kernel-hardening] [PATCH net-next v7 00/10] Landlock LSM: Toward unprivileged sandboxing

2017-08-27 Thread James Morris
On Mon, 21 Aug 2017, Mickaël Salaün wrote:

> ## Why a new LSM? Are SELinux, AppArmor, Smack and Tomoyo not good enough?
> 
> The current access control LSMs are fine for their purpose which is to give 
> the
> *root* the ability to enforce a security policy for the *system*. What is
> missing is a way to enforce a security policy for any application by its
> developer and *unprivileged user* as seccomp can do for raw syscall filtering.
> 

You could mention here that the first case is Mandatory Access Control, 
in general terms.



-- 
James Morris



[PATCH v8 01/13] x86/apic: Construct a selector for the interrupt delivery mode

2017-08-27 Thread Dou Liyang
Now, there are many switches in kernel which are used to determine
the final interrupt delivery mode, as shown below:

1) kconfig:
   CONFIG_X86_64; CONFIG_X86_LOCAL_APIC; CONFIG_x86_IO_APIC
2) kernel option: disable_apic; skip_ioapic_setup
3) CPU Capability: boot_cpu_has(X86_FEATURE_APIC)
4) MP table: smp_found_config
5) ACPI: acpi_lapic; acpi_ioapic; nr_ioapic

These switches are disordered and scattered and there are also some
dependencies with each other. These make the code difficult to
maintain and read.

Construct a selector to unify them into a single function, then,
Use this selector to get an interrupt delivery mode directly.

Signed-off-by: Dou Liyang 
---
 arch/x86/kernel/apic/apic.c | 59 +
 1 file changed, 59 insertions(+)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 98b3dd8..01bde03 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1235,6 +1235,65 @@ void __init sync_Arb_IDs(void)
APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
+enum apic_intr_mode {
+   APIC_PIC,
+   APIC_VIRTUAL_WIRE,
+   APIC_SYMMETRIC_IO,
+};
+
+static int __init apic_intr_mode_select(void)
+{
+   /* Check kernel option */
+   if (disable_apic) {
+   pr_info("APIC disabled via kernel command line\n");
+   return APIC_PIC;
+   }
+
+   /* Check BIOS */
+#ifdef CONFIG_X86_64
+   /* On 64-bit, the APIC must be integrated, Check local APIC only */
+   if (!boot_cpu_has(X86_FEATURE_APIC)) {
+   disable_apic = 1;
+   pr_info("APIC disabled by BIOS\n");
+   return APIC_PIC;
+   }
+#else
+   /*
+* On 32-bit, check whether there is a separate chip or integrated
+* APIC
+*/
+
+   /* Has a separate chip ? */
+   if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
+   disable_apic = 1;
+
+   return APIC_PIC;
+   }
+
+   /* Has a local APIC ? */
+   if (!boot_cpu_has(X86_FEATURE_APIC) &&
+   APIC_INTEGRATED(boot_cpu_apic_version)) {
+   disable_apic = 1;
+   pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
+  boot_cpu_physical_apicid);
+
+   return APIC_PIC;
+   }
+#endif
+
+   /* Check MP table or ACPI MADT configuration */
+   if (!smp_found_config) {
+   disable_ioapic_support();
+
+   if (!acpi_lapic)
+   pr_info("APIC: ACPI MADT or MP tables are not 
detected\n");
+
+   return APIC_VIRTUAL_WIRE;
+   }
+
+   return APIC_SYMMETRIC_IO;
+}
+
 /*
  * An initial setup of the virtual wire mode.
  */
-- 
2.5.5





Re: [RESEND PATCH v4 2/2] i2c: Add Spreadtrum I2C controller driver

2017-08-27 Thread Baolin Wang
Hi Wolfram,

On 27 August 2017 at 23:30, Wolfram Sang  wrote:
> Hi,
>
> thanks for your submission.
>
>> +static void sprd_i2c_dump_reg(struct sprd_i2c *i2c_dev)
>> +{
>> + dev_err(&i2c_dev->adap.dev, ": ==dump i2c-%d reg===\n",
>> + i2c_dev->adap.nr);
>> + dev_err(&i2c_dev->adap.dev, ": I2C_CTRL:0x%x\n",
>> + readl(i2c_dev->base + I2C_CTL));
>> + dev_err(&i2c_dev->adap.dev, ": I2C_ADDR_CFG:0x%x\n",
>> + readl(i2c_dev->base + I2C_ADDR_CFG));
>> + dev_err(&i2c_dev->adap.dev, ": I2C_COUNT:0x%x\n",
>> + readl(i2c_dev->base + I2C_COUNT));
>> + dev_err(&i2c_dev->adap.dev, ": I2C_RX:0x%x\n",
>> + readl(i2c_dev->base + I2C_RX));
>> + dev_err(&i2c_dev->adap.dev, ": I2C_STATUS:0x%x\n",
>> + readl(i2c_dev->base + I2C_STATUS));
>> + dev_err(&i2c_dev->adap.dev, ": ADDR_DVD0:0x%x\n",
>> + readl(i2c_dev->base + ADDR_DVD0));
>> + dev_err(&i2c_dev->adap.dev, ": ADDR_DVD1:0x%x\n",
>> + readl(i2c_dev->base + ADDR_DVD1));
>> + dev_err(&i2c_dev->adap.dev, ": ADDR_STA0_DVD:0x%x\n",
>> + readl(i2c_dev->base + ADDR_STA0_DVD));
>> + dev_err(&i2c_dev->adap.dev, ": ADDR_RST:0x%x\n",
>> + readl(i2c_dev->base + ADDR_RST));
>
> I really thing register dumps should be dev_dbg().

OK. Will fix in next version.

>
>> +}
>> +
>> +static void sprd_i2c_set_count(struct sprd_i2c *i2c_dev, u32 count)
>> +{
>> + writel(count, i2c_dev->base + I2C_COUNT);
>> +}
>> +
>> +static void sprd_i2c_send_stop(struct sprd_i2c *i2c_dev, int stop)
>> +{
>> + unsigned int tmp = readl(i2c_dev->base + I2C_CTL);
>
> u32? Here and in many other places?

OK.

>
> ...
>
>> +static irqreturn_t sprd_i2c_isr_thread(int irq, void *dev_id)
>> +{
>> + struct sprd_i2c *i2c_dev = dev_id;
>> + struct i2c_msg *msg = i2c_dev->msg;
>> + int ack = readl(i2c_dev->base + I2C_STATUS) & I2C_RX_ACK;
>> + u32 i2c_count = readl(i2c_dev->base + I2C_COUNT);
>> + u32 i2c_tran;
>> +
>> + if (msg->flags & I2C_M_RD)
>> + i2c_tran = i2c_dev->count >= I2C_FIFO_FULL_THLD;
>> + else
>> + i2c_tran = i2c_count;
>> +
>> + /*
>> +  * If we got one ACK from slave when writing data, and we did not
>
> Here you say: "If we get ack..."
>
>> +  * finish this transmission (i2c_tran is not zero), then we should
>> +  * continue to write data.
>> +  *
>> +  * For reading data, ack is always 0, if i2c_tran is not 0 which
>> +  * means we still need to contine to read data from slave.
>> +  */
>> + if (i2c_tran && !ack) {
>
> ... but the code gives the assumption you did NOT get an ack. So, either
> rename the variable to 'ack_err' or keep it 'ack' and invert the logic
> when initializing the variable.

If ack == 0 means we got one ack. I will invert the logic as you suggested.

>
>> + sprd_i2c_data_transfer(i2c_dev);
>> + return IRQ_HANDLED;
>> + }
>> +
>> + i2c_dev->err = 0;
>> +
>> + /*
>> +  * If we did not get one ACK from slave when writing data, we should
>> +  * dump all registers to check I2C status.
>
> Why? I would say no. NACK from a slave can always happen, e.g. when an
> EEPROM is busy erasing a page.

For our I2C controller databook, if the master did not get one ACK
from slave when writing data to salve, we should send one STOP signal
to abort this data transfer or generate one repeated START signal to
start one new data transfer cycle. Considering our I2C usage
scenarios, we should dump registers to analyze I2C status and notify
to user to re-start new data transfer.

>
>> +  */
>> + if (ack) {
>> + i2c_dev->err = -EIO;
>> + sprd_i2c_dump_reg(i2c_dev);
>> + } else if (msg->flags & I2C_M_RD && i2c_dev->count) {
>> + sprd_i2c_read_bytes(i2c_dev, i2c_dev->buf, i2c_dev->count);
>> + }
>> +
>> + /* Transmission is done and clear ack and start operation */
>> + sprd_i2c_clear_ack(i2c_dev);
>> + sprd_i2c_clear_start(i2c_dev);
>> + complete(&i2c_dev->complete);
>> +
>> + return IRQ_HANDLED;
>> +}
>
> ...
>
>> +
>> + pm_runtime_set_autosuspend_delay(i2c_dev->dev, SPRD_I2C_PM_TIMEOUT);
>> + pm_runtime_use_autosuspend(i2c_dev->dev);
>> + pm_runtime_set_active(i2c_dev->dev);
>> + pm_runtime_enable(i2c_dev->dev);
>> +
>> + ret = pm_runtime_get_sync(i2c_dev->dev);
>> + if (ret < 0) {
>> + dev_err(&pdev->dev, "i2c%d pm runtime resume failed!\n",
>> + pdev->id);
>
> Error message has wrong text.

Will fix it.

>
>> + goto err_rpm_put;
>> + }
>> +
>> +static int sprd_i2c_init(void)
>> +{
>> + return platform_driver_register(&sprd_i2c_driver);
>> +}
>> +arch_initcall_sync(sprd_i2c_init);
>
> arch_initcall? and no exit() function? Why is it that way and/or why
> can't you use platform_module_driver()?

As I explained before, in our Spreadtrum platform, our regul

[PATCH v8 04/13] x86/apic: Move logical APIC ID away from apic_bsp_setup()

2017-08-27 Thread Dou Liyang
apic_bsp_setup() sets and returns logical APIC ID for initializing
cpu0_logical_apicid in SMP-capable system.

The id has nothing to do with the initialization of local APIC and
I/O APIC. And apic_bsp_setup() should be called for interrupt mode
setup intently.

Move the id setup into a separate helper function for cleanup and
mark apic_bsp_setup() void.

Signed-off-by: Dou Liyang 
---
 arch/x86/include/asm/apic.h |  2 +-
 arch/x86/kernel/apic/apic.c | 10 +-
 arch/x86/kernel/smpboot.c   | 12 +++-
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 1a970f5..4e550c7 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -146,7 +146,7 @@ static inline int apic_force_enable(unsigned long addr)
 extern int apic_force_enable(unsigned long addr);
 #endif
 
-extern int apic_bsp_setup(bool upmode);
+extern void apic_bsp_setup(bool upmode);
 extern void apic_ap_setup(void);
 
 /*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 80a273d..0fcbcf3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2426,25 +2426,17 @@ static void __init apic_bsp_up_setup(void)
  * Returns:
  * apic_id of BSP APIC
  */
-int __init apic_bsp_setup(bool upmode)
+void __init apic_bsp_setup(bool upmode)
 {
-   int id;
-
connect_bsp_APIC();
if (upmode)
apic_bsp_up_setup();
setup_local_APIC();
 
-   if (x2apic_mode)
-   id = apic_read(APIC_LDR);
-   else
-   id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-
enable_IO_APIC();
end_local_APIC_setup();
irq_remap_enable_fault_handling();
setup_IO_APIC();
-   return id;
 }
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 394cd81..4ace4d0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1291,6 +1291,14 @@ static void __init smp_cpu_index_default(void)
}
 }
 
+static void __init smp_get_logical_apicid(void)
+{
+   if (x2apic_mode)
+   cpu0_logical_apicid = apic_read(APIC_LDR);
+   else
+   cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+}
+
 /*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
@@ -1351,11 +1359,13 @@ void __init native_smp_prepare_cpus(unsigned int 
max_cpus)
}
 
default_setup_apic_routing();
-   cpu0_logical_apicid = apic_bsp_setup(false);
+   apic_bsp_setup(false);
 
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
 
+   smp_get_logical_apicid();
+
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
 
-- 
2.5.5





[PATCH v8 08/13] x86/ioapic: Refactor the delay logic in timer_irq_works()

2017-08-27 Thread Dou Liyang
Kernel use timer_irq_works() to detects the timer IRQs. It calls
mdelay(10) to delay ten ticks and check whether the timer IRQ work
or not. The mdelay() depends on the loops_per_jiffy which is set up
in calibrate_delay(). Current kernel defaults the IRQ 0 is available
when it calibrates delay.

But it is wrong in the dump-capture kernel with 'notsc' option inherited
from 1st kernel option. dump-capture kernel can't make sure the timer IRQ
works well.

The correct design is making the interrupt mode setup and checking timer
IRQ works in advance of calibrate_delay(). That results in the mdelay()
being unusable in timer_irq_works().

Preparatory patch to make the setup in advance. Refactor the delay logic
by waiting for some cycles. In the system with X86_FEATURE_TSC feature,
Use rdtsc(), others will call __delay() directly.

Note: regard 4G as the max CPU frequence of current single CPU.

Signed-off-by: Dou Liyang 
---
 arch/x86/kernel/apic/io_apic.c | 45 --
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 237e9c2..348ea7e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1585,6 +1585,43 @@ static int __init notimercheck(char *s)
 }
 __setup("no_timer_check", notimercheck);
 
+static void __init delay_with_tsc(void)
+{
+   unsigned long long start, now;
+   unsigned long end = jiffies + 4;
+
+   start = rdtsc();
+
+   /*
+* We don't know the TSC frequency yet, but waiting for
+* 400/HZ TSC cycles is safe:
+* 4 GHz == 10 jiffies
+* 1 GHz == 40 jiffies
+*/
+   do {
+   rep_nop();
+   now = rdtsc();
+   } while ((now - start) < 400UL / HZ &&
+   time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+   unsigned long end = jiffies + 4;
+   int band = 1;
+
+   /*
+* We don't know any frequency yet, but waiting for
+* 4094000/HZ cycles is safe:
+* 4 GHz == 10 jiffies
+* 1 GHz == 40 jiffies
+* 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+*/
+   do {
+   __delay(((1U << band++) * 1000UL) / HZ);
+   } while (band < 12 && time_before_eq(jiffies, end));
+}
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1603,8 +1640,12 @@ static int __init timer_irq_works(void)
 
local_save_flags(flags);
local_irq_enable();
-   /* Let ten ticks pass... */
-   mdelay((10 * 1000) / HZ);
+
+   if (boot_cpu_has(X86_FEATURE_TSC))
+   delay_with_tsc();
+   else
+   delay_without_tsc();
+
local_irq_restore(flags);
 
/*
-- 
2.5.5





[PATCH v8 11/13] ACPI / init: Invoke early ACPI initialization earlier

2017-08-27 Thread Dou Liyang
Linux uses acpi_early_init() to put the ACPI table management into
the late stage from the early stage.The two stages are different. the
mapped ACPI tables in early stage is temporary and should be unmapped, but
in late stage, it permanent and don't need to be unmapped.

Originally, mapping and parsing the DMAR table is in the late stage.
However, initializing interrupt delivery mode earlier will move it into
the early stage. This causes an ACPI error warning when Linux reallocates
the ACPI root tables. Because Linux doesn't unmapped the DMAR table after
using in the early stage.

Invoke acpi_early_init() earlier before late_time_init(), Keep the DMAR
be mapped and parsed in late stage like before.

Reported-by: Xiaolong Ye 
Signed-off-by: Dou Liyang 
Cc: linux-a...@vger.kernel.org
Cc: Rafael J. Wysocki 
Cc: Zheng, Lv 
---
 init/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/main.c b/init/main.c
index 052481f..52dee20 100644
--- a/init/main.c
+++ b/init/main.c
@@ -655,12 +655,12 @@ asmlinkage __visible void __init start_kernel(void)
kmemleak_init();
setup_per_cpu_pageset();
numa_policy_init();
+   acpi_early_init();
if (late_time_init)
late_time_init();
calibrate_delay();
pidmap_init();
anon_vma_init();
-   acpi_early_init();
 #ifdef CONFIG_X86
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi_enter_virtual_mode();
-- 
2.5.5





[PATCH v8 05/13] x86/apic: Unify interrupt mode setup for SMP-capable system

2017-08-27 Thread Dou Liyang
In the SMP-capable system, enable and setup the interrupt delivery
mode in native_smp_prepare_cpus().

This design mixs the APIC and SMP together, it has highly coupling.

Make the initialization of interrupt mode independent, Unify and
refine it to apic_intr_mode_init() for SMP-capable system.

Signed-off-by: Dou Liyang 
---
 arch/x86/kernel/apic/apic.c | 39 ---
 arch/x86/kernel/smpboot.c   | 14 ++
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0fcbcf3..9038c5f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1238,7 +1238,9 @@ void __init sync_Arb_IDs(void)
 enum apic_intr_mode {
APIC_PIC,
APIC_VIRTUAL_WIRE,
+   APIC_VIRTUAL_WIRE_NO_CONFIG,
APIC_SYMMETRIC_IO,
+   APIC_SYMMETRIC_IO_NO_ROUTING,
 };
 
 static int __init apic_intr_mode_select(void)
@@ -1285,12 +1287,29 @@ static int __init apic_intr_mode_select(void)
if (!smp_found_config) {
disable_ioapic_support();
 
-   if (!acpi_lapic)
+   if (!acpi_lapic) {
pr_info("APIC: ACPI MADT or MP tables are not 
detected\n");
 
+   return APIC_VIRTUAL_WIRE_NO_CONFIG;
+   }
+
return APIC_VIRTUAL_WIRE;
}
 
+#ifdef CONFIG_SMP
+   /* If SMP should be disabled, then really disable it! */
+   if (!setup_max_cpus) {
+   pr_info("APIC: SMP mode deactivated\n");
+   return APIC_SYMMETRIC_IO_NO_ROUTING;
+   }
+
+   if (read_apic_id() != boot_cpu_physical_apicid) {
+   panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+read_apic_id(), boot_cpu_physical_apicid);
+   /* Or can we switch back to PIC here? */
+   }
+#endif
+
return APIC_SYMMETRIC_IO;
 }
 
@@ -1346,17 +1365,31 @@ void __init init_bsp_APIC(void)
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
+   bool upmode = false;
+
switch (apic_intr_mode_select()) {
case APIC_PIC:
pr_info("APIC: Keep in PIC mode(8259)\n");
return;
case APIC_VIRTUAL_WIRE:
pr_info("APIC: Switch to virtual wire mode setup\n");
-   return;
+   default_setup_apic_routing();
+   break;
+   case APIC_VIRTUAL_WIRE_NO_CONFIG:
+   pr_info("APIC: Switch to virtual wire mode setup with no 
configuration\n");
+   upmode = true;
+   default_setup_apic_routing();
+   break;
case APIC_SYMMETRIC_IO:
pr_info("APIC: Switch to symmectic I/O mode setup\n");
-   return;
+   default_setup_apic_routing();
+   break;
+   case APIC_SYMMETRIC_IO_NO_ROUTING:
+   pr_info("APIC: Switch to symmectic I/O mode setup in no SMP 
routine\n");
+   break;
}
+
+   apic_bsp_setup(upmode);
 }
 
 static void lapic_setup_esr(void)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4ace4d0..8301b75 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1333,18 +1333,17 @@ void __init native_smp_prepare_cpus(unsigned int 
max_cpus)
 
set_cpu_sibling_map(0);
 
+   apic_intr_mode_init();
+
switch (smp_sanity_check(max_cpus)) {
case SMP_NO_CONFIG:
disable_smp();
-   if (APIC_init_uniprocessor())
-   pr_notice("Local APIC not detected. Using dummy APIC 
emulation.\n");
return;
case SMP_NO_APIC:
disable_smp();
return;
case SMP_FORCE_UP:
disable_smp();
-   apic_bsp_setup(false);
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
return;
@@ -1352,15 +1351,6 @@ void __init native_smp_prepare_cpus(unsigned int 
max_cpus)
break;
}
 
-   if (read_apic_id() != boot_cpu_physical_apicid) {
-   panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-read_apic_id(), boot_cpu_physical_apicid);
-   /* Or can we switch back to PIC here? */
-   }
-
-   default_setup_apic_routing();
-   apic_bsp_setup(false);
-
/* Setup local timer */
x86_init.timers.setup_percpu_clockev();
 
-- 
2.5.5





[PATCH v8 10/13] x86/xen: Bypass intr mode setup in enlighten_pv system

2017-08-27 Thread Dou Liyang
XEN PV overrides smp_prepare_cpus(). xen_pv_smp_prepare_cpus()
initializes interrupts in the XEN PV specific way and does not invoke
native_smp_prepare_cpus(). As a consequence, x86_init.intr_mode_init() is
not invoked either.

The invocation of x86_init.intr_mode_init() will be moved from
native_smp_prepare_cpus() in a follow up patch to solve .

That move would cause the invocation of x86_init.intr_mode_init() for XEN
PV platforms. To prevent that, override the default x86_init.
intr_mode_init() callback with a noop().

[Rewrited by Thomas Gleixner ]

Signed-off-by: Dou Liyang 
Cc: xen-de...@lists.xenproject.org
Cc: boris.ostrov...@oracle.com
---
 arch/x86/xen/enlighten_pv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4dd..07147dd 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1250,6 +1250,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
x86_init.resources.memory_setup = xen_memory_setup;
+   x86_init.irqs.intr_mode_init= x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
 
-- 
2.5.5





[PATCH v8 12/13] x86/time: Initialize interrupt mode behind timer init

2017-08-27 Thread Dou Liyang
In start_kernel(), firstly, it works on the default interrupy mode, then
switch to the final mode. Normally, Booting with BIOS reset is OK.

But, At dump-capture kernel, it boot up without BIOS reset, default mode
may not be compatible with the actual registers, that causes the delivery
interrupt to fail.

Try to set up the final mode as soon as possible. according to the parts
which split from that initialization:

1) Set up the APIC/IOAPIC (including testing whether the timer
   interrupt works)

2) Calibrate TSC

3) Set up the local APIC timer

-- From Thomas Gleixner

Initializing the mode should be earlier than calibrating TSC as soon as
possible and needs testing whether the timer interrupt works at the same
time.

call it behind timers init, which meets the above conditions.

Signed-off-by: Dou Liyang 
---
 arch/x86/kernel/apic/apic.c | 2 --
 arch/x86/kernel/smpboot.c   | 2 --
 arch/x86/kernel/time.c  | 5 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 47b67f9..7fb5cde 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2471,8 +2471,6 @@ void __init apic_bsp_setup(bool upmode)
 #ifdef CONFIG_UP_LATE_INIT
 void __init up_late_init(void)
 {
-   x86_init.irqs.intr_mode_init();
-
if (apic_intr_mode == APIC_PIC)
return;
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2e0eaf2..4f63afc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1293,8 +1293,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
set_cpu_sibling_map(0);
 
-   x86_init.irqs.intr_mode_init();
-
smp_sanity_check();
 
switch (apic_intr_mode) {
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e0754cd..3ceb834 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -84,6 +84,11 @@ void __init hpet_time_init(void)
 static __init void x86_late_time_init(void)
 {
x86_init.timers.timer_init();
+   /*
+* After PIT/HPET timers init, select and setup
+* the final interrupt mode for delivering IRQs.
+*/
+   x86_init.irqs.intr_mode_init();
tsc_init();
 }
 
-- 
2.5.5





  1   2   3   4   >