Re: [PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB
Paul Mackerras writes: > On Mon, Jul 09, 2012 at 06:43:37PM +0530, Aneesh Kumar K.V wrote: >> From: "Aneesh Kumar K.V" >> >> This patch makes the high psizes mask as an unsigned char array >> so that we can have more than 16TB. Currently we support upto >> 64TB > > Some comments inline... > >> @@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, >> pte_t pte, int trap) >> #ifdef CONFIG_PPC_MM_SLICES >> unsigned int get_paca_psize(unsigned long addr) >> { >> -unsigned long index, slices; >> +u64 lpsizes; >> +unsigned char *hpsizes; >> +unsigned long index, mask_index; >> >> if (addr < SLICE_LOW_TOP) { >> -slices = get_paca()->context.low_slices_psize; >> +lpsizes = get_paca()->context.low_slices_psize; >> index = GET_LOW_SLICE_INDEX(addr); >> -} else { >> -slices = get_paca()->context.high_slices_psize; >> -index = GET_HIGH_SLICE_INDEX(addr); >> +return (lpsizes >> (index * 4)) & 0xF; >> } >> -return (slices >> (index * 4)) & 0xF; >> +hpsizes = get_paca()->context.high_slices_psize; >> +index = GET_HIGH_SLICE_INDEX(addr) >> 1; >> +mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1); >> +return (hpsizes[index] >> (mask_index * 4)) & 0xF; > > The last 3 lines here feel awkward. How about: > index = GET_HIGH_SLICE_INDEX(addr); mask_index = index & 1; return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; That is much simpler. I updated the patch, changing to the above format in all the location. > >> static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int >> psize) >> { >> +unsigned char *hpsizes; >> +int index, mask_index; >> struct slice_mask ret = { 0, 0 }; >> unsigned long i; >> -u64 psizes; >> +u64 lpsizes; >> >> -psizes = mm->context.low_slices_psize; >> +lpsizes = mm->context.low_slices_psize; >> for (i = 0; i < SLICE_NUM_LOW; i++) >> -if (((psizes >> (i * 4)) & 0xf) == psize) >> +if (((lpsizes >> (i * 4)) & 0xf) == psize) >> ret.low_slices |= 1u << i; >> >> -psizes = mm->context.high_slices_psize; >> -for (i = 0; i < SLICE_NUM_HIGH; i++) >> -if (((psizes >> (i * 4)) & 0xf) == psize) >> +hpsizes = mm->context.high_slices_psize; >> +for (i = 0; i < SLICE_NUM_HIGH; i++) { >> +index = i >> 1; >> +mask_index = i - (index << 1); > > Again, seems like a complicated way to do mask_index = i & 1 (or > even i % 2, if you prefer, but then make i an unsigned type). > > Paul. -aneesh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB
On Mon, Jul 09, 2012 at 06:43:37PM +0530, Aneesh Kumar K.V wrote: > From: "Aneesh Kumar K.V" > > This patch makes the high psizes mask as an unsigned char array > so that we can have more than 16TB. Currently we support upto > 64TB Some comments inline... > @@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, > pte_t pte, int trap) > #ifdef CONFIG_PPC_MM_SLICES > unsigned int get_paca_psize(unsigned long addr) > { > - unsigned long index, slices; > + u64 lpsizes; > + unsigned char *hpsizes; > + unsigned long index, mask_index; > > if (addr < SLICE_LOW_TOP) { > - slices = get_paca()->context.low_slices_psize; > + lpsizes = get_paca()->context.low_slices_psize; > index = GET_LOW_SLICE_INDEX(addr); > - } else { > - slices = get_paca()->context.high_slices_psize; > - index = GET_HIGH_SLICE_INDEX(addr); > + return (lpsizes >> (index * 4)) & 0xF; > } > - return (slices >> (index * 4)) & 0xF; > + hpsizes = get_paca()->context.high_slices_psize; > + index = GET_HIGH_SLICE_INDEX(addr) >> 1; > + mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1); > + return (hpsizes[index] >> (mask_index * 4)) & 0xF; The last 3 lines here feel awkward. How about: > + index = GET_HIGH_SLICE_INDEX(addr); > + mask_index = index & 1; > + return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; > static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) > { > + unsigned char *hpsizes; > + int index, mask_index; > struct slice_mask ret = { 0, 0 }; > unsigned long i; > - u64 psizes; > + u64 lpsizes; > > - psizes = mm->context.low_slices_psize; > + lpsizes = mm->context.low_slices_psize; > for (i = 0; i < SLICE_NUM_LOW; i++) > - if (((psizes >> (i * 4)) & 0xf) == psize) > + if (((lpsizes >> (i * 4)) & 0xf) == psize) > ret.low_slices |= 1u << i; > > - psizes = mm->context.high_slices_psize; > - for (i = 0; i < SLICE_NUM_HIGH; i++) > - if (((psizes >> (i * 4)) & 0xf) == psize) > + hpsizes = mm->context.high_slices_psize; > + for (i = 0; i < SLICE_NUM_HIGH; i++) { > + index = i >> 1; > + mask_index = i - (index << 1); Again, seems like a complicated way to do mask_index = i & 1 (or even i % 2, if you prefer, but then make i an unsigned type). Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH -V3 07/11] arch/powerpc: Increase the slice range to 64TB
From: "Aneesh Kumar K.V" This patch makes the high psizes mask as an unsigned char array so that we can have more than 16TB. Currently we support upto 64TB Signed-off-by: Aneesh Kumar K.V --- arch/powerpc/include/asm/mmu-hash64.h |6 ++- arch/powerpc/include/asm/page_64.h|6 ++- arch/powerpc/mm/hash_utils_64.c | 15 +++--- arch/powerpc/mm/slb_low.S | 35 arch/powerpc/mm/slice.c | 95 + 5 files changed, 107 insertions(+), 50 deletions(-) diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index a085de2..35b74e8 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -463,7 +463,11 @@ typedef struct { #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ - u64 high_slices_psize; /* 4 bits per slice for now */ + /* +* Right now we support 64TB and 4 bits for each +* 1TB slice we need 32 bytes for 64TB. +*/ + unsigned char high_slices_psize[32]; /* 4 bits per slice for now */ #else u16 sllp; /* SLB page size encoding */ #endif diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index fed85e6..6c9bef4 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -82,7 +82,11 @@ extern u64 ppc64_pft_size; struct slice_mask { u16 low_slices; - u16 high_slices; + /* +* This should be derived out of PGTABLE_RANGE. For the current +* max 64TB, u64 should be ok. +*/ + u64 high_slices; }; struct mm_struct; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0d8c4b8..eb4a3ac 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -804,16 +804,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES unsigned int get_paca_psize(unsigned long addr) { - unsigned long index, slices; + u64 lpsizes; + unsigned char *hpsizes; + unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - slices = get_paca()->context.low_slices_psize; + lpsizes = get_paca()->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - } else { - slices = get_paca()->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); + return (lpsizes >> (index * 4)) & 0xF; } - return (slices >> (index * 4)) & 0xF; + hpsizes = get_paca()->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr) >> 1; + mask_index = GET_HIGH_SLICE_INDEX(addr) - (index << 1); + return (hpsizes[index] >> (mask_index * 4)) & 0xF; } #else diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b9ee79ce..c355af6 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -108,17 +108,34 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) * between 4k and 64k standard page size */ #ifdef CONFIG_PPC_MM_SLICES + /* r10 have esid */ cmpldi r10,16 - - /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ - ld r9,PACALOWSLICESPSIZE(r13) - sldir11,r10,2 + /* below SLICE_LOW_TOP */ blt 5f - ld r9,PACAHIGHSLICEPSIZE(r13) - srdir11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) - andi. r11,r11,0x3c - -5: /* Extract the psize and multiply to get an array offset */ + /* +* Handle hpsizes, +* r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index +* We use r10 here, later we restore it to esid. +* Can we use other register instead of r10 ? +*/ + srdir10,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT) /* index */ + srdir11,r10,1 /* r11 is array index */ + addir9,r11,PACAHIGHSLICEPSIZE + lbzxr9,r9,r13 /* r9 is hpsizes[r11] */ + sldir11,r11,1 + subfr11,r11,r10 /* mask_index = index - (array_index << 1) */ + srdir10,r3,28 /* restore r10 with esid */ + b 6f +5: + /* +* Handle lpsizes +* r9 is get_paca()->context.low_slices_psize, r11 is index +*/ + ld r9,PACALOWSLICESPSIZE(r13) + mr r11,r10 +6: + sldir11,r11,2 /* index * 4 */ + /* Extract the psize and multiply to get an array offset */ srd r9,r9,r11 andi. r9,r9,0xf mulli r9,r9,MMUPSIZEDEFSIZE diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 73709f7..302a481 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -42,7 +42,7 @@ int _slice_debug = 1; static void slice_