On Tue, 2017-06-27 at 10:32 +0200, Christophe LEROY wrote:
>
> Le 27/06/2017 à 09:48, Balbir Singh a écrit :
> > This patch creates the window using text_poke_area, allocated
> > via get_vm_area(). text_poke_area is per CPU to avoid locking.
> > text_poke_area for each cpu is setup using late_initcall, prior
> > to setup of these alternate mapping areas, we continue to use
> > direct write to change/modify kernel text. With the ability
> > to use alternate mappings to write to kernel text, it provides
> > us the freedom to then turn text read-only and implement
> > CONFIG_STRICT_KERNEL_RWX.
> >
> > This code is CPU hotplug aware to ensure that the we have mappings
> > for any new cpus as they come online and tear down mappings for
> > any cpus that are offline.
> >
> > Other arches do similar things, but use fixmaps. The reason
> > for not using fixmaps is to make use of any randomization in
> > the future.
> >
> > Signed-off-by: Balbir Singh
> > ---
> > arch/powerpc/lib/code-patching.c | 160
> > ++-
> > 1 file changed, 156 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/lib/code-patching.c
> > b/arch/powerpc/lib/code-patching.c
> > index 500b0f6..19b8368 100644
> > --- a/arch/powerpc/lib/code-patching.c
> > +++ b/arch/powerpc/lib/code-patching.c
> > @@ -12,23 +12,172 @@
> > #include
> > #include
> > #include
> > -#include
> > -#include
> > +#include
> > +#include
> > #include
> > #include
> >
> > +#include
> > +#include
> > +#include
> > +#include
> >
> > -int patch_instruction(unsigned int *addr, unsigned int instr)
> > +static int __patch_instruction(unsigned int *addr, unsigned int instr)
> > {
> > int err;
> >
> > __put_user_size(instr, addr, 4, err);
> > if (err)
> > return err;
> > - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
> > + asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
> > + return 0;
> > +}
> > +
> > +#ifdef CONFIG_STRICT_KERNEL_RWX
> > +static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
> > +
> > +static int text_area_cpu_up(unsigned int cpu)
> > +{
> > + struct vm_struct *area;
> > +
> > + area = get_vm_area(PAGE_SIZE, VM_ALLOC);
> > + if (!area) {
> > + WARN_ONCE(1, "Failed to create text area for cpu %d\n",
> > + cpu);
> > + return -1;
> > + }
> > + this_cpu_write(text_poke_area, area);
> > + return 0;
> > +}
> > +
> > +static int text_area_cpu_down(unsigned int cpu)
> > +{
> > + free_vm_area(this_cpu_read(text_poke_area));
> > + return 0;
> > +}
> > +
> > +/*
> > + * This is an early_initcall and early_initcalls happen at the right time
> > + * for us, after slab is enabled and before we mark ro pages R/O. In the
> > + * future if get_vm_area is randomized, this will be more flexible than
> > + * fixmap
> > + */
> > +static int __init setup_text_poke_area(void)
> > +{
> > + BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> > + "powerpc/text_poke:online", text_area_cpu_up,
> > + text_area_cpu_down));
> > +
> > + pr_info("text_poke area ready...\n");
> > + return 0;
> > +}
> > +
> > +/*
> > + * This can be called for kernel text or a module.
> > + */
> > +static int map_patch_area(void *addr, unsigned long text_poke_addr)
> > +{
> > + unsigned long pfn;
> > + int err;
> > +
> > + if (is_vmalloc_addr(addr))
> > + pfn = vmalloc_to_pfn(addr);
> > + else
> > + pfn = __pa_symbol(addr) >> PAGE_SHIFT;
> > +
> > + err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
> > + pgprot_val(PAGE_KERNEL));
> > + pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
> > + if (err)
> > + return -1;
> > + return 0;
> > +}
> > +
> > +static inline int unmap_patch_area(unsigned long addr)
> > +{
> > + pte_t *ptep;
> > + pmd_t *pmdp;
> > + pud_t *pudp;
> > + pgd_t *pgdp;
> > +
> > + pgdp = pgd_offset_k(addr);
> > + if (unlikely(!pgdp))
> > + return -EINVAL;
> > + pudp = pud_offset(pgdp, addr);
> > + if (unlikely(!pudp))
> > + return -EINVAL;
> > + pmdp = pmd_offset(pudp, addr);
> > + if (unlikely(!pmdp))
> > + return -EINVAL;
> > + ptep = pte_offset_kernel(pmdp, addr);
> > + if (unlikely(!ptep))
> > + return -EINVAL;
> > +
> > + pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
> > + /*
> > +* In hash, pte_clear flushes the tlb, in radix, we have to
> > +*/
> > + pte_clear(&init_mm, addr, ptep);
> > + flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> > return 0;
> > }
> >
> > +int patch_instruction(unsigned int *addr, unsigned int instr)
> > +{
> > + int err;
> > + unsigned int *dest = NULL;
> > + unsigned long flags;
> > + unsigned long text_poke_addr;
> > + unsigned long kaddr = (unsigned long)addr;
> > +
> > + /*
> > +* During early earl