David Gibson <d...@au1.ibm.com> writes:

> On Fri, Feb 15, 2013 at 09:09:38PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.ku...@linux.vnet.ibm.com>
>> 

.... snip....

>> - *  - We allow for USER_ESID_BITS significant bits of ESID and
>> - * CONTEXT_BITS  bits of context for user addresses.
>> - *  i.e. 64T (46 bits) of address space for up to half a million contexts.
>> - *
>> - *  - The scramble function gives robust scattering in the hash
>> - * table (at least based on some initial results).  The previous
>> - * method was more susceptible to pathological cases giving excessive
>> - * hash collisions.
>> + * We also need to avoid the last segment of the last context, because that
>> + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
>> + * because of the modulo operation in vsid scramble. But the vmemmap
>> + * (which is what uses region 0xf) will never be close to 64TB in size
>> + * (it's 56 bytes per page of system memory).
>>   */
>>  
>>  #define CONTEXT_BITS                19
>> @@ -386,15 +382,25 @@ extern void slb_set_size(u16 size);
>>  #define USER_ESID_BITS_1T   6
>
> USER_ESID_BITS should probably be renamed just ESID_BITS, since it's
> now relevant to kernel addresses too.

Done. I added this as a patch on top of the series. 

>
>>  /*
>> + * 256MB segment
>> + * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
>> + * available for user + kernel mapping. The top 4 contexts are used for
>> + * kernel mapping. Each segment contains 2^28 bytes. Each
>> + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
>> + * (19 == 37 + 28 - 46).
>> + */
>> +#define MAX_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 1)
>
> Hrm.  I think it would be clearer to have MAX_CONTEXT (still) be the
> maximum usable *user* context (i.e. 0x80000 - 5) and put the kernel
> ones above that still.
>

Done as

-#define MAX_CONTEXT    ((ASM_CONST(1) << CONTEXT_BITS) - 1)
+#define MAX_USER_CONTEXT       ((ASM_CONST(1) << CONTEXT_BITS) - 5)

Also updated the reference of MAX_CONTEXT in the code to
MAX_USER_CONTEXT
 
>> +
>> +/*
>>   * This should be computed such that protovosid * vsid_mulitplier
>>   * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
>>   */
>>  #define VSID_MULTIPLIER_256M        ASM_CONST(12538073)     /* 24-bit prime 
>> */
>> -#define VSID_BITS_256M              (CONTEXT_BITS + USER_ESID_BITS + 1)
>> +#define VSID_BITS_256M              (CONTEXT_BITS + USER_ESID_BITS)
>>  #define VSID_MODULUS_256M   ((1UL<<VSID_BITS_256M)-1)
>>  
>>  #define VSID_MULTIPLIER_1T  ASM_CONST(12538073)     /* 24-bit prime */
>> -#define VSID_BITS_1T                (CONTEXT_BITS + USER_ESID_BITS_1T + 1)
>> +#define VSID_BITS_1T                (CONTEXT_BITS + USER_ESID_BITS_1T)
>>  #define VSID_MODULUS_1T             ((1UL<<VSID_BITS_1T)-1)
>>  
>>  

.... snip......

>>  #endif /* _ASM_POWERPC_MMU_HASH64_H_ */
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>> b/arch/powerpc/kernel/exceptions-64s.S
>> index 4665e82..0e9c48c 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -1268,20 +1268,39 @@ do_ste_alloc:
>>  _GLOBAL(do_stab_bolted)
>
> The stab path certainly hasn't been tested, since we've been broken on
> stab machines for a long time.
>
>>      stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
>>      std     r11,PACA_EXSLB+EX_SRR0(r13)     /* save SRR0 in exc. frame */
>> +    mfspr   r11,SPRN_DAR                    /* ea */
>>  
>> +    /*
>> +     * check for bad kernel/user address
>> +     * (ea & ~REGION_MASK) >= PGTABLE_RANGE
>> +     */
>> +    clrldi  r9,r11,4
>> +    li      r10,-1
>> +    clrldi  r10,r10,(64 - 46)
>> +    cmpld   cr7,r9,r10
>
> You can replace the above 4 instructions with just:
>       rldicr. r9,r11,4,(64-46-4)
>

nice, updated as below

-       clrldi  r9,r11,4
-       li      r10,-1
-       clrldi  r10,r10,(64 - 46)
-       cmpld   cr7,r9,r10
+       rldicr. r9,r11,4,(64 - 46 - 4)
        li      r9,0    /* VSID = 0 for bad address */
-       bgt     cr7,0f
+       bne-    0f


>> +    li      r9,0    /* VSID = 0 for bad address */
>> +    bgt     cr7,0f
>> +
>> +    /*


.... snip....

>> diff --git a/arch/powerpc/mm/hash_utils_64.c 
>> b/arch/powerpc/mm/hash_utils_64.c
>> index 3a292be..bfeab83 100644
>> --- a/arch/powerpc/mm/hash_utils_64.c
>> +++ b/arch/powerpc/mm/hash_utils_64.c
>> @@ -194,6 +194,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned 
>> long vend,
>>              unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
>>              unsigned long tprot = prot;
>>  
>> +            /*
>> +             * If we hit a bad address return error.
>> +             */
>> +            if (!vsid)
>> +                    return -1;
>>              /* Make kernel text executable */
>>              if (overlaps_kernel_text(vaddr, vaddr + step))
>>                      tprot &= ~HPTE_R_N;
>> @@ -921,11 +926,6 @@ int hash_page(unsigned long ea, unsigned long access, 
>> unsigned long trap)
>>      DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
>>              ea, access, trap);
>>  
>> -    if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
>> -            DBG_LOW(" out of pgtable range !\n");
>> -            return 1;
>> -    }
>> -
>
> Hrm.  This test is conceptually different, even if the logic is the
> same as the vsid availablility test you may have performed earlier.
> Perhaps add BUILD_BUG_ON()s to ensure that they really are the same.
>

can you elaborate that ? What BUILD_BUG_ON test are you suggesting ?



>
>>      /* Get region & vsid */
>>      switch (REGION_ID(ea)) {
>>      case USER_REGION_ID:
>> @@ -956,6 +956,11 @@ int hash_page(unsigned long ea, unsigned long access, 
>> unsigned long trap)
>>      }
>>      DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
>>  
>> +    /* Bad address. */
>> +    if (!vsid) {
>> +            DBG_LOW("Bad address!\n");
>> +            return 1;
>> +    }
>>      /* Get pgdir */
>>      pgdir = mm->pgd;
>>      if (pgdir == NULL)
>> @@ -1125,6 +1130,8 @@ void hash_preload(struct mm_struct *mm, unsigned long 
>> ea,
>>      /* Get VSID */
>>      ssize = user_segment_size(ea);
>>      vsid = get_vsid(mm->context.id, ea, ssize);
>> +    if (!vsid)
>> +            return;
>>  
>>      /* Hash doesn't like irqs */
>>      local_irq_save(flags);
>> @@ -1217,6 +1224,9 @@ static void kernel_map_linear_page(unsigned long 
>> vaddr, unsigned long lmi)
>>      hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
>>      hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
>>  
>> +    /* Don't create HPTE entries for bad address */
>> +    if (!vsid)
>> +            return;
>>      ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
>>                               mode, HPTE_V_BOLTED,
>>                               mmu_linear_psize, mmu_kernel_ssize);
>> diff --git a/arch/powerpc/mm/mmu_context_hash64.c 
>> b/arch/powerpc/mm/mmu_context_hash64.c
>> index 40bc5b0..59cd773 100644
>> --- a/arch/powerpc/mm/mmu_context_hash64.c
>> +++ b/arch/powerpc/mm/mmu_context_hash64.c
>> @@ -29,15 +29,6 @@
>>  static DEFINE_SPINLOCK(mmu_context_lock);
>>  static DEFINE_IDA(mmu_context_ida);
>>  
>> -/*
>> - * 256MB segment
>> - * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
>> - * available for user mappings. Each segment contains 2^28 bytes. Each
>> - * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
>> - * (19 == 37 + 28 - 46).
>> - */
>> -#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1)
>> -
>>  int __init_new_context(void)
>>  {
>>      int index;
>> @@ -56,7 +47,8 @@ again:
>>      else if (err)
>>              return err;
>>  
>> -    if (index > MAX_CONTEXT) {
>> +    if (index > (MAX_CONTEXT - 4)) {
>> +            /* Top 4 context id values are used for kernel */
>
> This change would not be necessary if you changed MAX_CONTEXT as
> suggested above.
>

done now have

-       if (index > (MAX_CONTEXT - 4)) {
-               /* Top 4 context id values are used for kernel */
+       if (index > (MAX_USER_CONTEXT) {


>>              spin_lock(&mmu_context_lock);
>>              ida_remove(&mmu_context_ida, index);
>>              spin_unlock(&mmu_context_lock);
>> diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
>> index 1a16ca2..c066d00 100644
>> --- a/arch/powerpc/mm/slb_low.S
>> +++ b/arch/powerpc/mm/slb_low.S
>> @@ -31,13 +31,20 @@
>>   * No other registers are examined or changed.
>>   */
>>  _GLOBAL(slb_allocate_realmode)
>> -    /* r3 = faulting address */
>> +    /*
>> +     * check for bad kernel/user address
>> +     * (ea & ~REGION_MASK) >= PGTABLE_RANGE
>> +     */
>> +    clrldi  r9,r3,4
>> +    li      r10,-1
>> +    clrldi  r10,r10,(64 - 46)
>> +    cmpld   cr7,r9,r10
>> +    bgt     cr7,8f
>
> As in the stab path, you can accomplish this with a single rldicr.
>
>>  
>>      srdi    r9,r3,60                /* get region */
>> -    srdi    r10,r3,28               /* get esid */
>>      cmpldi  cr7,r9,0xc              /* cmp PAGE_OFFSET for later use */
>>  
>> -    /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
>> +    /* r3 = address, cr7 = <> PAGE_OFFSET */
>>      blt     cr7,0f                  /* user or kernel? */
>>  
>>      /* kernel address: proto-VSID = ESID */
>> @@ -56,18 +63,26 @@ _GLOBAL(slb_allocate_realmode)
>>       */
>>  _GLOBAL(slb_miss_kernel_load_linear)
>>      li      r11,0
>> -    li      r9,0x1
>> +    /*
>> +     * context = (MAX_CONTEXT - 3) + ((ea >> 60) - 0xc)
>> +     */
>> +    srdi    r9,r3,60
>> +    subi    r9,r9,(0xc + 3 + 1)
>> +    lis     r10, 8
>> +    add     r9,r9,r10
>
> Hrm.  You can avoid clobbering r10, which I assume is why you removed
> the computation of esid from the common path by doing this instead:
>
>       rldicl  r9,r3,4,62
>       addis   r9,r9,8
>       subi    r9,r9,4

nice. Updated. I am inlining the final patch below. 

>
>> +    srdi    r10,r3,SID_SHIFT        /* get esid */
>>      /*
>>       * for 1T we shift 12 bits more.  slb_finish_load_1T will do
>>       * the necessary adjustment
>>       */
>> -    rldimi  r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
>> +    rldimi  r10,r9,USER_ESID_BITS,0
>>  BEGIN_FTR_SECTION
>>      b       slb_finish_load
>>  END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
>>      b       slb_finish_load_1T
>>  
>>  1:
>> +    srdi    r10,r3,SID_SHIFT        /* get esid */
>>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
>>      /* Check virtual memmap region. To be patches at kernel boot */
>>      cmpldi  cr0,r9,0xf
>> @@ -91,23 +106,26 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
>>      _GLOBAL(slb_miss_kernel_load_io)
>>      li      r11,0
>>  6:
>> -    li      r9,0x1
>> +    /*
>> +     * context = (MAX_CONTEXT - 3) + ((ea >> 60) - 0xc)
>> +     */
>> +    srdi    r9,r3,60
>> +    subi    r9,r9,(0xc + 3 + 1)
>> +    lis     r10,8
>> +    add     r9,r9,r10
>> +    srdi    r10,r3,SID_SHIFT
>
> Same here.  Can you put the kernel context calculation into a common
> path?  In fact, now that kernel vsids, like user vsids are made of a
> context and vsid component, can you put the rldimi which combines them
> into a common path for both kernel and user addresses?


will do.

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index ffa0c48..f0351b5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1274,12 +1274,9 @@ _GLOBAL(do_stab_bolted)
         * check for bad kernel/user address
         * (ea & ~REGION_MASK) >= PGTABLE_RANGE
         */
-       clrldi  r9,r11,4
-       li      r10,-1
-       clrldi  r10,r10,(64 - 46)
-       cmpld   cr7,r9,r10
+       rldicr. r9,r11,4,(64 - 46 - 4)
        li      r9,0    /* VSID = 0 for bad address */
-       bgt     cr7,0f
+       bne-    0f
 
        /*
         * Calculate VSID:
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 80fd644..349411e 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -35,16 +35,14 @@ _GLOBAL(slb_allocate_realmode)
         * check for bad kernel/user address
         * (ea & ~REGION_MASK) >= PGTABLE_RANGE
         */
-       clrldi  r9,r3,4
-       li      r10,-1
-       clrldi  r10,r10,(64 - 46)
-       cmpld   cr7,r9,r10
-       bgt     cr7,8f
+       rldicr. r9,r3,4,(64 - 46 - 4)
+       bne-    8f
 
        srdi    r9,r3,60                /* get region */
+       srdi    r10,r3,SID_SHIFT        /* get esid */
        cmpldi  cr7,r9,0xc              /* cmp PAGE_OFFSET for later use */
 
-       /* r3 = address, cr7 = <> PAGE_OFFSET */
+       /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
        blt     cr7,0f                  /* user or kernel? */
 
        /* kernel address: proto-VSID = ESID */
@@ -66,11 +64,10 @@ _GLOBAL(slb_miss_kernel_load_linear)
        /*
         * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
         */
-       srdi    r9,r3,60
-       subi    r9,r9,(0xc + 4)
-       lis     r10, 8
-       add     r9,r9,r10
-       srdi    r10,r3,SID_SHIFT        /* get esid */
+       rldicl  r9,r3,4,62
+       addis   r9,r9,8
+       subi    r9,r9,4
+
        /*
         * for 1T we shift 12 bits more.  slb_finish_load_1T will do
         * the necessary adjustment
@@ -82,7 +79,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
        b       slb_finish_load_1T
 
 1:
-       srdi    r10,r3,SID_SHIFT        /* get esid */
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
        /* Check virtual memmap region. To be patches at kernel boot */
        cmpldi  cr0,r9,0xf
@@ -109,11 +105,9 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
        /*
         * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
         */
-       srdi    r9,r3,60
-       subi    r9,r9,(0xc + 4)
-       lis     r10,8
-       add     r9,r9,r10
-       srdi    r10,r3,SID_SHIFT
+       rldicl  r9,r3,4,62
+       addis   r9,r9,8
+       subi    r9,r9,4
        /*
         * for 1T we shift 12 bits more.  slb_finish_load_1T will do
         * the necessary adjustment
@@ -125,8 +119,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
        b       slb_finish_load_1T
 
 0:
-       srdi    r10,r3,SID_SHIFT        /* get esid */
-
        /* when using slices, we extract the psize off the slice bitmaps
         * and then we need to get the sllp encoding off the mmu_psize_defs
         * array.


_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to