Re: [PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB

2012-07-23 Thread Aneesh Kumar K.V
Paul Mackerras  writes:

> On Mon, Jul 09, 2012 at 06:43:37PM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" 
>> 
>> This patch makes the high psizes mask as an unsigned char array
>> so that we can have more than 16TB. Currently we support upto
>> 64TB
>
> Some comments inline...
>
>> @@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, 
>> pte_t pte, int trap)
>>  #ifdef CONFIG_PPC_MM_SLICES
>>  unsigned int get_paca_psize(unsigned long addr)
>>  {
>> -unsigned long index, slices;
>> +u64 lpsizes;
>> +unsigned char *hpsizes;
>> +unsigned long index, mask_index;
>>  
>>  if (addr < SLICE_LOW_TOP) {
>> -slices = get_paca()->context.low_slices_psize;
>> +lpsizes = get_paca()->context.low_slices_psize;
>>  index = GET_LOW_SLICE_INDEX(addr);
>> -} else {
>> -slices = get_paca()->context.high_slices_psize;
>> -index = GET_HIGH_SLICE_INDEX(addr);
>> +return (lpsizes >> (index * 4)) & 0xF;
>>  }
>> -return (slices >> (index * 4)) & 0xF;
>> +hpsizes = get_paca()->context.high_slices_psize;
>> +index = GET_HIGH_SLICE_INDEX(addr) >> 1;
>> +mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1);
>> +return (hpsizes[index] >> (mask_index * 4)) & 0xF;
>
> The last 3 lines here feel awkward.  How about:
>
index = GET_HIGH_SLICE_INDEX(addr);
mask_index = index & 1;
return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;


That is much simpler. I updated the patch, changing to the above format in
all the location.

>
>>  static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int 
>> psize)
>>  {
>> +unsigned char *hpsizes;
>> +int index, mask_index;
>>  struct slice_mask ret = { 0, 0 };
>>  unsigned long i;
>> -u64 psizes;
>> +u64 lpsizes;
>>  
>> -psizes = mm->context.low_slices_psize;
>> +lpsizes = mm->context.low_slices_psize;
>>  for (i = 0; i < SLICE_NUM_LOW; i++)
>> -if (((psizes >> (i * 4)) & 0xf) == psize)
>> +if (((lpsizes >> (i * 4)) & 0xf) == psize)
>>  ret.low_slices |= 1u << i;
>>  
>> -psizes = mm->context.high_slices_psize;
>> -for (i = 0; i < SLICE_NUM_HIGH; i++)
>> -if (((psizes >> (i * 4)) & 0xf) == psize)
>> +hpsizes = mm->context.high_slices_psize;
>> +for (i = 0; i < SLICE_NUM_HIGH; i++) {
>> +index = i >> 1;
>> +mask_index = i - (index << 1);
>
> Again, seems like a complicated way to do mask_index = i & 1 (or
> even i % 2, if you prefer, but then make i an unsigned type).
>
> Paul.


-aneesh

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB

2012-07-22 Thread Paul Mackerras
On Mon, Jul 09, 2012 at 06:43:37PM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" 
> 
> This patch makes the high psizes mask as an unsigned char array
> so that we can have more than 16TB. Currently we support upto
> 64TB

Some comments inline...

> @@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, 
> pte_t pte, int trap)
>  #ifdef CONFIG_PPC_MM_SLICES
>  unsigned int get_paca_psize(unsigned long addr)
>  {
> - unsigned long index, slices;
> + u64 lpsizes;
> + unsigned char *hpsizes;
> + unsigned long index, mask_index;
>  
>   if (addr < SLICE_LOW_TOP) {
> - slices = get_paca()->context.low_slices_psize;
> + lpsizes = get_paca()->context.low_slices_psize;
>   index = GET_LOW_SLICE_INDEX(addr);
> - } else {
> - slices = get_paca()->context.high_slices_psize;
> - index = GET_HIGH_SLICE_INDEX(addr);
> + return (lpsizes >> (index * 4)) & 0xF;
>   }
> - return (slices >> (index * 4)) & 0xF;
> + hpsizes = get_paca()->context.high_slices_psize;
> + index = GET_HIGH_SLICE_INDEX(addr) >> 1;
> + mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1);
> + return (hpsizes[index] >> (mask_index * 4)) & 0xF;

The last 3 lines here feel awkward.  How about:

> + index = GET_HIGH_SLICE_INDEX(addr);
> + mask_index = index & 1;
> + return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;

>  static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
>  {
> + unsigned char *hpsizes;
> + int index, mask_index;
>   struct slice_mask ret = { 0, 0 };
>   unsigned long i;
> - u64 psizes;
> + u64 lpsizes;
>  
> - psizes = mm->context.low_slices_psize;
> + lpsizes = mm->context.low_slices_psize;
>   for (i = 0; i < SLICE_NUM_LOW; i++)
> - if (((psizes >> (i * 4)) & 0xf) == psize)
> + if (((lpsizes >> (i * 4)) & 0xf) == psize)
>   ret.low_slices |= 1u << i;
>  
> - psizes = mm->context.high_slices_psize;
> - for (i = 0; i < SLICE_NUM_HIGH; i++)
> - if (((psizes >> (i * 4)) & 0xf) == psize)
> + hpsizes = mm->context.high_slices_psize;
> + for (i = 0; i < SLICE_NUM_HIGH; i++) {
> + index = i >> 1;
> + mask_index = i - (index << 1);

Again, seems like a complicated way to do mask_index = i & 1 (or
even i % 2, if you prefer, but then make i an unsigned type).

Paul.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB

2012-07-09 Thread Aneesh Kumar K.V
From: "Aneesh Kumar K.V" 

This patch makes the high psizes mask as an unsigned char array
so that we can have more than 16TB. Currently we support upto
64TB

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/mmu-hash64.h |6 ++-
 arch/powerpc/include/asm/page_64.h|6 ++-
 arch/powerpc/mm/hash_utils_64.c   |   15 +++---
 arch/powerpc/mm/slb_low.S |   35 
 arch/powerpc/mm/slice.c   |   95 +
 5 files changed, 107 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-hash64.h 
b/arch/powerpc/include/asm/mmu-hash64.h
index a085de2..35b74e8 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -463,7 +463,11 @@ typedef struct {
 
 #ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize;   /* SLB page size encodings */
-   u64 high_slices_psize;  /* 4 bits per slice for now */
+   /*
+* Right now we support 64TB and 4 bits for each
+* 1TB slice we need 32 bytes for 64TB.
+*/
+   unsigned char high_slices_psize[32];  /* 4 bits per slice for now */
 #else
u16 sllp;   /* SLB page size encoding */
 #endif
diff --git a/arch/powerpc/include/asm/page_64.h 
b/arch/powerpc/include/asm/page_64.h
index fed85e6..6c9bef4 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -82,7 +82,11 @@ extern u64 ppc64_pft_size;
 
 struct slice_mask {
u16 low_slices;
-   u16 high_slices;
+   /*
+* This should be derived out of PGTABLE_RANGE. For the current
+* max 64TB, u64 should be ok.
+*/
+   u64 high_slices;
 };
 
 struct mm_struct;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0d8c4b8..eb4a3ac 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, 
pte_t pte, int trap)
 #ifdef CONFIG_PPC_MM_SLICES
 unsigned int get_paca_psize(unsigned long addr)
 {
-   unsigned long index, slices;
+   u64 lpsizes;
+   unsigned char *hpsizes;
+   unsigned long index, mask_index;
 
if (addr < SLICE_LOW_TOP) {
-   slices = get_paca()->context.low_slices_psize;
+   lpsizes = get_paca()->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
-   } else {
-   slices = get_paca()->context.high_slices_psize;
-   index = GET_HIGH_SLICE_INDEX(addr);
+   return (lpsizes >> (index * 4)) & 0xF;
}
-   return (slices >> (index * 4)) & 0xF;
+   hpsizes = get_paca()->context.high_slices_psize;
+   index = GET_HIGH_SLICE_INDEX(addr) >> 1;
+   mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1);
+   return (hpsizes[index] >> (mask_index * 4)) & 0xF;
 }
 
 #else
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index b9ee79ce..c355af6 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 * between 4k and 64k standard page size
 */
 #ifdef CONFIG_PPC_MM_SLICES
+   /* r10 have esid */
cmpldi  r10,16
-
-   /* Get the slice index * 4 in r11 and matching slice size mask in r9 */
-   ld  r9,PACALOWSLICESPSIZE(r13)
-   sldir11,r10,2
+   /* below SLICE_LOW_TOP */
blt 5f
-   ld  r9,PACAHIGHSLICEPSIZE(r13)
-   srdir11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2)
-   andi.   r11,r11,0x3c
-
-5: /* Extract the psize and multiply to get an array offset */
+   /*
+* Handle hpsizes,
+* r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
+* We use r10 here, later we restore it to esid.
+* Can we use other register instead of r10 ?
+*/
+   srdir10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */
+   srdir11,r10,1   /* r11 is array index */
+   addir9,r11,PACAHIGHSLICEPSIZE
+   lbzxr9,r9,r13   /* r9 is hpsizes[r11] */
+   sldir11,r11,1
+   subfr11,r11,r10 /* mask_index = index - (array_index << 1) */
+   srdir10,r3,28   /* restore r10 with esid */
+   b   6f
+5:
+   /*
+* Handle lpsizes
+* r9 is get_paca()->context.low_slices_psize, r11 is index
+*/
+   ld  r9,PACALOWSLICESPSIZE(r13)
+   mr  r11,r10
+6:
+   sldir11,r11,2  /* index * 4 */
+   /* Extract the psize and multiply to get an array offset */
srd r9,r9,r11
andi.   r9,r9,0xf
mulli   r9,r9,MMUPSIZEDEFSIZE
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 73709f7..302a481 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -42,7 +42,7 @@ int _slice_debug = 1;
 
 static void slice_