Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Russell King - ARM Linux
On Fri, Oct 05, 2018 at 10:15:57AM +0530, Manjeet Pawar wrote:
> From: Rohit Thapliyal  
> 
> During user undefined instruction exception, the arm exception
> handler currently results in application crash through SIGILL.
> The bad instruction can be due to ddr/hardware issue.
> For such cases, exception trap handler could try to recover the corrupted
> text by clearing pagetable entry of undefined instruction pc and trying to 
> fetch
> the instruction opcode by generating major page fault.
> Resulting in loading the page with correct instruction from mapped file.
> If there is no error in root filesystem i.e. the opcode is intact
> in file, then filemap fault shall be able to recover
> the instruction and continue execution normally instead of crashing.

For the reasons others have pointed out in previous replies, I will not
merge this patch or anything similar to this for mainline.  The only
sane solution to the above problem is to fix the hardware or the
hardware timings so that DDR access is reliable.

Sorry.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up


Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Russell King - ARM Linux
On Fri, Oct 05, 2018 at 10:15:57AM +0530, Manjeet Pawar wrote:
> From: Rohit Thapliyal  
> 
> During user undefined instruction exception, the arm exception
> handler currently results in application crash through SIGILL.
> The bad instruction can be due to ddr/hardware issue.
> For such cases, exception trap handler could try to recover the corrupted
> text by clearing pagetable entry of undefined instruction pc and trying to 
> fetch
> the instruction opcode by generating major page fault.
> Resulting in loading the page with correct instruction from mapped file.
> If there is no error in root filesystem i.e. the opcode is intact
> in file, then filemap fault shall be able to recover
> the instruction and continue execution normally instead of crashing.

For the reasons others have pointed out in previous replies, I will not
merge this patch or anything similar to this for mainline.  The only
sane solution to the above problem is to fix the hardware or the
hardware timings so that DDR access is reliable.

Sorry.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up


Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Mark Rutland
On Fri, Oct 05, 2018 at 10:15:57AM +0530, Manjeet Pawar wrote:
> From: Rohit Thapliyal  
> 
> During user undefined instruction exception, the arm exception
> handler currently results in application crash through SIGILL.
> The bad instruction can be due to ddr/hardware issue.

If the DDR controller can return invalid data, then there is simply no
software workaround.

What if the invalid data happens to be valid opcodes? Those would be
executed by the CPU resulting in invalid behaviour, no matter what this
patch does.

What if this occurs for kernel text? The kernel will die.

What about data, in kernel or userspace? Erroneous behaviour would
result.

What about the page tables? Very bad things would happen if those were
corrupted.

> For such cases, exception trap handler could try to recover the corrupted
> text by clearing pagetable entry of undefined instruction pc and trying to 
> fetch
> the instruction opcode by generating major page fault.
> Resulting in loading the page with correct instruction from mapped file.
> If there is no error in root filesystem i.e. the opcode is intact
> in file, then filemap fault shall be able to recover
> the instruction and continue execution normally instead of crashing.
> 
> Signed-off-by: Rohit Thapliyal 
> Signed-off-by: Manjeet Pawar 

I appreciate it's frustrating to have hardware which is very close to
working, but unfortunately this patch cannot solve the problem it sets
out to, and I don't think that it makes sense to have this in mainline.

Thanks,
Mark.

> ---
>  arch/arm/kernel/traps.c | 100 
> +++-
>  1 file changed, 90 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index badf02c..d971834 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -51,6 +52,9 @@
>  };
>  
>  void *vectors_page;
> +#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
> +static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
> +static int undef_recovery_attempt;
>  
>  #ifdef CONFIG_DEBUG_USER
>  unsigned int user_debug;
> @@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
> instr)
>   return fn ? fn(regs, instr) : 1;
>  }
>  
> -asmlinkage void do_undefinstr(struct pt_regs *regs)
> +static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
>  {
> - unsigned int instr;
> - siginfo_t info;
> - void __user *pc;
> -
> - clear_siginfo();
> - pc = (void __user *)instruction_pointer(regs);
> + unsigned int instr = 0;
>  
>   if (processor_mode(regs) == SVC_MODE) {
>  #ifdef CONFIG_THUMB2_KERNEL
> @@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
>   goto die_sig;
>   instr = __mem_to_opcode_arm(instr);
>   }
> +die_sig:
> + return instr;
> +}
> +
> +asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
> +{
> + unsigned int instr, instr2;
> + siginfo_t info;
> + void __user *pc;
> + unsigned long addr;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pte_t *pte;
> + spinlock_t *ptl;
> + clear_siginfo();
>  
> - if (call_undef_hook(regs, instr) == 0)
> + pc = (void __user *)instruction_pointer(regs);
> +
> + instr = getInstr(pc, regs);
> +
> + if (instr && call_undef_hook(regs, instr) == 0)
>   return;
>  
> -die_sig:
>  #ifdef CONFIG_DEBUG_USER
>   if (user_debug & UDBG_UNDEFINED) {
>   pr_info("%s (%d): undefined instruction: pc=%p\n",
> @@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
>   dump_instr(KERN_INFO, regs);
>   }
>  #endif
> -
> + /* Trying to recover an invalid userspace instruction from here */
> + if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
> __ratelimit(_recov_rs)) {
> + addr = (unsigned long) pc;
> + if (processor_mode(regs) == USR_MODE) {
> + struct page *page = NULL;
> +
> + mm = current->mm;
> + vma = find_vma(mm, (unsigned long)pc);
> + if (!vma)
> + goto fail_recovery;
> +
> + if (!vma->vm_file)
> + goto fail_recovery;
> +
> + dump_instr(KERN_ALERT, regs);
> + down_write(>mmap_sem);
> + /* Check first, just in case, recovery already done by 
> some other thread simultaneously */
> + flush_cache_range(vma, vma->vm_start, vma->vm_end);
> + instr2 = getInstr(pc, regs);
> + if (instr != instr2) {
> + up_write(>mmap_sem);
> + return;
> + 

Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Mark Rutland
On Fri, Oct 05, 2018 at 10:15:57AM +0530, Manjeet Pawar wrote:
> From: Rohit Thapliyal  
> 
> During user undefined instruction exception, the arm exception
> handler currently results in application crash through SIGILL.
> The bad instruction can be due to ddr/hardware issue.

If the DDR controller can return invalid data, then there is simply no
software workaround.

What if the invalid data happens to be valid opcodes? Those would be
executed by the CPU resulting in invalid behaviour, no matter what this
patch does.

What if this occurs for kernel text? The kernel will die.

What about data, in kernel or userspace? Erroneous behaviour would
result.

What about the page tables? Very bad things would happen if those were
corrupted.

> For such cases, exception trap handler could try to recover the corrupted
> text by clearing pagetable entry of undefined instruction pc and trying to 
> fetch
> the instruction opcode by generating major page fault.
> Resulting in loading the page with correct instruction from mapped file.
> If there is no error in root filesystem i.e. the opcode is intact
> in file, then filemap fault shall be able to recover
> the instruction and continue execution normally instead of crashing.
> 
> Signed-off-by: Rohit Thapliyal 
> Signed-off-by: Manjeet Pawar 

I appreciate it's frustrating to have hardware which is very close to
working, but unfortunately this patch cannot solve the problem it sets
out to, and I don't think that it makes sense to have this in mainline.

Thanks,
Mark.

> ---
>  arch/arm/kernel/traps.c | 100 
> +++-
>  1 file changed, 90 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
> index badf02c..d971834 100644
> --- a/arch/arm/kernel/traps.c
> +++ b/arch/arm/kernel/traps.c
> @@ -29,6 +29,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  #include 
> @@ -51,6 +52,9 @@
>  };
>  
>  void *vectors_page;
> +#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
> +static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
> +static int undef_recovery_attempt;
>  
>  #ifdef CONFIG_DEBUG_USER
>  unsigned int user_debug;
> @@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
> instr)
>   return fn ? fn(regs, instr) : 1;
>  }
>  
> -asmlinkage void do_undefinstr(struct pt_regs *regs)
> +static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
>  {
> - unsigned int instr;
> - siginfo_t info;
> - void __user *pc;
> -
> - clear_siginfo();
> - pc = (void __user *)instruction_pointer(regs);
> + unsigned int instr = 0;
>  
>   if (processor_mode(regs) == SVC_MODE) {
>  #ifdef CONFIG_THUMB2_KERNEL
> @@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
>   goto die_sig;
>   instr = __mem_to_opcode_arm(instr);
>   }
> +die_sig:
> + return instr;
> +}
> +
> +asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
> +{
> + unsigned int instr, instr2;
> + siginfo_t info;
> + void __user *pc;
> + unsigned long addr;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pte_t *pte;
> + spinlock_t *ptl;
> + clear_siginfo();
>  
> - if (call_undef_hook(regs, instr) == 0)
> + pc = (void __user *)instruction_pointer(regs);
> +
> + instr = getInstr(pc, regs);
> +
> + if (instr && call_undef_hook(regs, instr) == 0)
>   return;
>  
> -die_sig:
>  #ifdef CONFIG_DEBUG_USER
>   if (user_debug & UDBG_UNDEFINED) {
>   pr_info("%s (%d): undefined instruction: pc=%p\n",
> @@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
>   dump_instr(KERN_INFO, regs);
>   }
>  #endif
> -
> + /* Trying to recover an invalid userspace instruction from here */
> + if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
> __ratelimit(_recov_rs)) {
> + addr = (unsigned long) pc;
> + if (processor_mode(regs) == USR_MODE) {
> + struct page *page = NULL;
> +
> + mm = current->mm;
> + vma = find_vma(mm, (unsigned long)pc);
> + if (!vma)
> + goto fail_recovery;
> +
> + if (!vma->vm_file)
> + goto fail_recovery;
> +
> + dump_instr(KERN_ALERT, regs);
> + down_write(>mmap_sem);
> + /* Check first, just in case, recovery already done by 
> some other thread simultaneously */
> + flush_cache_range(vma, vma->vm_start, vma->vm_end);
> + instr2 = getInstr(pc, regs);
> + if (instr != instr2) {
> + up_write(>mmap_sem);
> + return;
> + 

Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Robin Murphy

On 05/10/18 05:45, Manjeet Pawar wrote:

From: Rohit Thapliyal 

During user undefined instruction exception, the arm exception
handler currently results in application crash through SIGILL.
The bad instruction can be due to ddr/hardware issue.
For such cases, exception trap handler could try to recover the corrupted
text by clearing pagetable entry of undefined instruction pc and trying to fetch
the instruction opcode by generating major page fault.
Resulting in loading the page with correct instruction from mapped file.
If there is no error in root filesystem i.e. the opcode is intact
in file, then filemap fault shall be able to recover
the instruction and continue execution normally instead of crashing.


And what if that random memory corruption hits data, or kernel 
instructions? This seems like a lot of effort to go to to fail to solve 
an unsolvable problem...


Robin.


Signed-off-by: Rohit Thapliyal 
Signed-off-by: Manjeet Pawar 
---
  arch/arm/kernel/traps.c | 100 +++-
  1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index badf02c..d971834 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -29,6 +29,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -51,6 +52,9 @@
  };
  
  void *vectors_page;

+#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
+static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
+static int undef_recovery_attempt;
  
  #ifdef CONFIG_DEBUG_USER

  unsigned int user_debug;
@@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
instr)
return fn ? fn(regs, instr) : 1;
  }
  
-asmlinkage void do_undefinstr(struct pt_regs *regs)

+static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
  {
-   unsigned int instr;
-   siginfo_t info;
-   void __user *pc;
-
-   clear_siginfo();
-   pc = (void __user *)instruction_pointer(regs);
+   unsigned int instr = 0;
  
  	if (processor_mode(regs) == SVC_MODE) {

  #ifdef CONFIG_THUMB2_KERNEL
@@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
goto die_sig;
instr = __mem_to_opcode_arm(instr);
}
+die_sig:
+   return instr;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+   unsigned int instr, instr2;
+   siginfo_t info;
+   void __user *pc;
+   unsigned long addr;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   spinlock_t *ptl;
+   clear_siginfo();
  
-	if (call_undef_hook(regs, instr) == 0)

+   pc = (void __user *)instruction_pointer(regs);
+
+   instr = getInstr(pc, regs);
+
+   if (instr && call_undef_hook(regs, instr) == 0)
return;
  
-die_sig:

  #ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
pr_info("%s (%d): undefined instruction: pc=%p\n",
@@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
dump_instr(KERN_INFO, regs);
}
  #endif
-
+   /* Trying to recover an invalid userspace instruction from here */
+   if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
__ratelimit(_recov_rs)) {
+   addr = (unsigned long) pc;
+   if (processor_mode(regs) == USR_MODE) {
+   struct page *page = NULL;
+
+   mm = current->mm;
+   vma = find_vma(mm, (unsigned long)pc);
+   if (!vma)
+   goto fail_recovery;
+
+   if (!vma->vm_file)
+   goto fail_recovery;
+
+   dump_instr(KERN_ALERT, regs);
+   down_write(>mmap_sem);
+   /* Check first, just in case, recovery already done by 
some other thread simultaneously */
+   flush_cache_range(vma, vma->vm_start, vma->vm_end);
+   instr2 = getInstr(pc, regs);
+   if (instr != instr2) {
+   up_write(>mmap_sem);
+   return;
+   }
+   pgd = pgd_offset(vma->vm_mm, addr);
+   pud = pud_offset(pgd, addr);
+   if (!pud_present(*pud)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pmd = pmd_offset(pud, addr);
+   if (!pmd_present(*pmd)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pte = pte_offset_map_lock(mm, pmd, addr, );
+   if (!pte_present(*pte)) {
+

Re: [PATCH] traps:Recover undefined user instruction on ARM

2018-10-05 Thread Robin Murphy

On 05/10/18 05:45, Manjeet Pawar wrote:

From: Rohit Thapliyal 

During user undefined instruction exception, the arm exception
handler currently results in application crash through SIGILL.
The bad instruction can be due to ddr/hardware issue.
For such cases, exception trap handler could try to recover the corrupted
text by clearing pagetable entry of undefined instruction pc and trying to fetch
the instruction opcode by generating major page fault.
Resulting in loading the page with correct instruction from mapped file.
If there is no error in root filesystem i.e. the opcode is intact
in file, then filemap fault shall be able to recover
the instruction and continue execution normally instead of crashing.


And what if that random memory corruption hits data, or kernel 
instructions? This seems like a lot of effort to go to to fail to solve 
an unsolvable problem...


Robin.


Signed-off-by: Rohit Thapliyal 
Signed-off-by: Manjeet Pawar 
---
  arch/arm/kernel/traps.c | 100 +++-
  1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index badf02c..d971834 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -29,6 +29,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include 

  #include 
@@ -51,6 +52,9 @@
  };
  
  void *vectors_page;

+#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
+static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
+static int undef_recovery_attempt;
  
  #ifdef CONFIG_DEBUG_USER

  unsigned int user_debug;
@@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
instr)
return fn ? fn(regs, instr) : 1;
  }
  
-asmlinkage void do_undefinstr(struct pt_regs *regs)

+static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
  {
-   unsigned int instr;
-   siginfo_t info;
-   void __user *pc;
-
-   clear_siginfo();
-   pc = (void __user *)instruction_pointer(regs);
+   unsigned int instr = 0;
  
  	if (processor_mode(regs) == SVC_MODE) {

  #ifdef CONFIG_THUMB2_KERNEL
@@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
goto die_sig;
instr = __mem_to_opcode_arm(instr);
}
+die_sig:
+   return instr;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+   unsigned int instr, instr2;
+   siginfo_t info;
+   void __user *pc;
+   unsigned long addr;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   spinlock_t *ptl;
+   clear_siginfo();
  
-	if (call_undef_hook(regs, instr) == 0)

+   pc = (void __user *)instruction_pointer(regs);
+
+   instr = getInstr(pc, regs);
+
+   if (instr && call_undef_hook(regs, instr) == 0)
return;
  
-die_sig:

  #ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
pr_info("%s (%d): undefined instruction: pc=%p\n",
@@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
dump_instr(KERN_INFO, regs);
}
  #endif
-
+   /* Trying to recover an invalid userspace instruction from here */
+   if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
__ratelimit(_recov_rs)) {
+   addr = (unsigned long) pc;
+   if (processor_mode(regs) == USR_MODE) {
+   struct page *page = NULL;
+
+   mm = current->mm;
+   vma = find_vma(mm, (unsigned long)pc);
+   if (!vma)
+   goto fail_recovery;
+
+   if (!vma->vm_file)
+   goto fail_recovery;
+
+   dump_instr(KERN_ALERT, regs);
+   down_write(>mmap_sem);
+   /* Check first, just in case, recovery already done by 
some other thread simultaneously */
+   flush_cache_range(vma, vma->vm_start, vma->vm_end);
+   instr2 = getInstr(pc, regs);
+   if (instr != instr2) {
+   up_write(>mmap_sem);
+   return;
+   }
+   pgd = pgd_offset(vma->vm_mm, addr);
+   pud = pud_offset(pgd, addr);
+   if (!pud_present(*pud)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pmd = pmd_offset(pud, addr);
+   if (!pmd_present(*pmd)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pte = pte_offset_map_lock(mm, pmd, addr, );
+   if (!pte_present(*pte)) {
+

[PATCH] traps:Recover undefined user instruction on ARM

2018-10-04 Thread Manjeet Pawar
From: Rohit Thapliyal  

During user undefined instruction exception, the arm exception
handler currently results in application crash through SIGILL.
The bad instruction can be due to ddr/hardware issue.
For such cases, exception trap handler could try to recover the corrupted
text by clearing pagetable entry of undefined instruction pc and trying to fetch
the instruction opcode by generating major page fault.
Resulting in loading the page with correct instruction from mapped file.
If there is no error in root filesystem i.e. the opcode is intact
in file, then filemap fault shall be able to recover
the instruction and continue execution normally instead of crashing.

Signed-off-by: Rohit Thapliyal 
Signed-off-by: Manjeet Pawar 
---
 arch/arm/kernel/traps.c | 100 +++-
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index badf02c..d971834 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -51,6 +52,9 @@
 };
 
 void *vectors_page;
+#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
+static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
+static int undef_recovery_attempt;
 
 #ifdef CONFIG_DEBUG_USER
 unsigned int user_debug;
@@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
instr)
return fn ? fn(regs, instr) : 1;
 }
 
-asmlinkage void do_undefinstr(struct pt_regs *regs)
+static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
 {
-   unsigned int instr;
-   siginfo_t info;
-   void __user *pc;
-
-   clear_siginfo();
-   pc = (void __user *)instruction_pointer(regs);
+   unsigned int instr = 0;
 
if (processor_mode(regs) == SVC_MODE) {
 #ifdef CONFIG_THUMB2_KERNEL
@@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
goto die_sig;
instr = __mem_to_opcode_arm(instr);
}
+die_sig:
+   return instr;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+   unsigned int instr, instr2;
+   siginfo_t info;
+   void __user *pc;
+   unsigned long addr;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   spinlock_t *ptl;
+   clear_siginfo();
 
-   if (call_undef_hook(regs, instr) == 0)
+   pc = (void __user *)instruction_pointer(regs);
+
+   instr = getInstr(pc, regs);
+
+   if (instr && call_undef_hook(regs, instr) == 0)
return;
 
-die_sig:
 #ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
pr_info("%s (%d): undefined instruction: pc=%p\n",
@@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
dump_instr(KERN_INFO, regs);
}
 #endif
-
+   /* Trying to recover an invalid userspace instruction from here */
+   if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
__ratelimit(_recov_rs)) {
+   addr = (unsigned long) pc;
+   if (processor_mode(regs) == USR_MODE) {
+   struct page *page = NULL;
+
+   mm = current->mm;
+   vma = find_vma(mm, (unsigned long)pc);
+   if (!vma)
+   goto fail_recovery;
+
+   if (!vma->vm_file)
+   goto fail_recovery;
+
+   dump_instr(KERN_ALERT, regs);
+   down_write(>mmap_sem);
+   /* Check first, just in case, recovery already done by 
some other thread simultaneously */
+   flush_cache_range(vma, vma->vm_start, vma->vm_end);
+   instr2 = getInstr(pc, regs);
+   if (instr != instr2) {
+   up_write(>mmap_sem);
+   return;
+   }
+   pgd = pgd_offset(vma->vm_mm, addr);
+   pud = pud_offset(pgd, addr);
+   if (!pud_present(*pud)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pmd = pmd_offset(pud, addr);
+   if (!pmd_present(*pmd)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pte = pte_offset_map_lock(mm, pmd, addr, );
+   if (!pte_present(*pte)) {
+   pte_unmap_unlock(pte, ptl);
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   page = pte_page(*pte);
+
+  

[PATCH] traps:Recover undefined user instruction on ARM

2018-10-04 Thread Manjeet Pawar
From: Rohit Thapliyal  

During user undefined instruction exception, the arm exception
handler currently results in application crash through SIGILL.
The bad instruction can be due to ddr/hardware issue.
For such cases, exception trap handler could try to recover the corrupted
text by clearing pagetable entry of undefined instruction pc and trying to fetch
the instruction opcode by generating major page fault.
Resulting in loading the page with correct instruction from mapped file.
If there is no error in root filesystem i.e. the opcode is intact
in file, then filemap fault shall be able to recover
the instruction and continue execution normally instead of crashing.

Signed-off-by: Rohit Thapliyal 
Signed-off-by: Manjeet Pawar 
---
 arch/arm/kernel/traps.c | 100 +++-
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index badf02c..d971834 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -51,6 +52,9 @@
 };
 
 void *vectors_page;
+#define MAX_UNDEF_RECOVERY_ATTEMPT NR_CPUS
+static DEFINE_RATELIMIT_STATE(undef_recov_rs, 10 * HZ, NR_CPUS);
+static int undef_recovery_attempt;
 
 #ifdef CONFIG_DEBUG_USER
 unsigned int user_debug;
@@ -435,14 +439,9 @@ int call_undef_hook(struct pt_regs *regs, unsigned int 
instr)
return fn ? fn(regs, instr) : 1;
 }
 
-asmlinkage void do_undefinstr(struct pt_regs *regs)
+static unsigned int getInstr(void __user *pc, struct pt_regs *regs)
 {
-   unsigned int instr;
-   siginfo_t info;
-   void __user *pc;
-
-   clear_siginfo();
-   pc = (void __user *)instruction_pointer(regs);
+   unsigned int instr = 0;
 
if (processor_mode(regs) == SVC_MODE) {
 #ifdef CONFIG_THUMB2_KERNEL
@@ -472,11 +471,32 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
goto die_sig;
instr = __mem_to_opcode_arm(instr);
}
+die_sig:
+   return instr;
+}
+
+asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+{
+   unsigned int instr, instr2;
+   siginfo_t info;
+   void __user *pc;
+   unsigned long addr;
+   struct mm_struct *mm;
+   struct vm_area_struct *vma;
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   spinlock_t *ptl;
+   clear_siginfo();
 
-   if (call_undef_hook(regs, instr) == 0)
+   pc = (void __user *)instruction_pointer(regs);
+
+   instr = getInstr(pc, regs);
+
+   if (instr && call_undef_hook(regs, instr) == 0)
return;
 
-die_sig:
 #ifdef CONFIG_DEBUG_USER
if (user_debug & UDBG_UNDEFINED) {
pr_info("%s (%d): undefined instruction: pc=%p\n",
@@ -485,7 +505,67 @@ asmlinkage void do_undefinstr(struct pt_regs *regs)
dump_instr(KERN_INFO, regs);
}
 #endif
-
+   /* Trying to recover an invalid userspace instruction from here */
+   if ((undef_recovery_attempt < MAX_UNDEF_RECOVERY_ATTEMPT) && 
__ratelimit(_recov_rs)) {
+   addr = (unsigned long) pc;
+   if (processor_mode(regs) == USR_MODE) {
+   struct page *page = NULL;
+
+   mm = current->mm;
+   vma = find_vma(mm, (unsigned long)pc);
+   if (!vma)
+   goto fail_recovery;
+
+   if (!vma->vm_file)
+   goto fail_recovery;
+
+   dump_instr(KERN_ALERT, regs);
+   down_write(>mmap_sem);
+   /* Check first, just in case, recovery already done by 
some other thread simultaneously */
+   flush_cache_range(vma, vma->vm_start, vma->vm_end);
+   instr2 = getInstr(pc, regs);
+   if (instr != instr2) {
+   up_write(>mmap_sem);
+   return;
+   }
+   pgd = pgd_offset(vma->vm_mm, addr);
+   pud = pud_offset(pgd, addr);
+   if (!pud_present(*pud)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pmd = pmd_offset(pud, addr);
+   if (!pmd_present(*pmd)) {
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   pte = pte_offset_map_lock(mm, pmd, addr, );
+   if (!pte_present(*pte)) {
+   pte_unmap_unlock(pte, ptl);
+   up_write(>mmap_sem);
+   goto fail_recovery;
+   }
+   page = pte_page(*pte);
+
+