Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-15 Thread Palmer Dabbelt

On Thu, 11 Oct 2018 05:18:20 PDT (-0700), sba...@raithlin.com wrote:

Palmer


I don't really know anything about this, but you're welcome to add a
   
   Reviewed-by: Palmer Dabbelt 


Thanks. I think it would be good to get someone who's familiar with linux/mm to 
take a look.

if you think it'll help.  I'm assuming you're targeting a different tree for 
the patch set, in which case it's probably best to keep this together with the 
rest of it.


No I think this series should be pulled by the RISC-V maintainer. The other patches in this series just refactor some code and need to be ACK'ed by their ARCH developers but I suspect the series should be pulled into RISC-V. That said since it does touch other arch should it be pulled by mm? 


BTW note that RISC-V SPARSEMEM support is pretty useful for all manner of 
things and not just the p2pdma discussed in the cover.


Ah, OK -- I thought this was adding the support everywhere.  Do you mind 
re-sending the patches with the various acks/reviews and I'll put in on 
for-next?




Thanks for porting your stuff to RISC-V!


You bet ;-)


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-15 Thread Palmer Dabbelt

On Thu, 11 Oct 2018 05:18:20 PDT (-0700), sba...@raithlin.com wrote:

Palmer


I don't really know anything about this, but you're welcome to add a
   
   Reviewed-by: Palmer Dabbelt 


Thanks. I think it would be good to get someone who's familiar with linux/mm to 
take a look.

if you think it'll help.  I'm assuming you're targeting a different tree for 
the patch set, in which case it's probably best to keep this together with the 
rest of it.


No I think this series should be pulled by the RISC-V maintainer. The other patches in this series just refactor some code and need to be ACK'ed by their ARCH developers but I suspect the series should be pulled into RISC-V. That said since it does touch other arch should it be pulled by mm? 


BTW note that RISC-V SPARSEMEM support is pretty useful for all manner of 
things and not just the p2pdma discussed in the cover.


Ah, OK -- I thought this was adding the support everywhere.  Do you mind 
re-sending the patches with the various acks/reviews and I'll put in on 
for-next?




Thanks for porting your stuff to RISC-V!


You bet ;-)


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 12:45 p.m., Logan Gunthorpe wrote:
> Ok, I spoke too soon...
> 
> Having this define next to the struct page definition works great for
> riscv. However, making that happen in arm64 seems to be a nightmare. The
> include chain in arm64 is tangled up so much that including mm_types
> where this is needed seems to be extremely difficult.

Sorry for all the unnecessary churn but I've figured it out. Just had to
realize we only need mm_types.h to be included where
STRUCT_PAGE_MAX_SHIFT is finally expanded. Thus we only need it in one
more spot (fixmap.h). See below.

Thanks,

Logan

--


diff --git a/arch/arm64/include/asm/memory.h
b/arch/arm64/include/asm/memory.h
index b96442960aea..f0a5c9531e8b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -34,15 +34,6 @@
  */
 #define PCI_IO_SIZESZ_16M

-/*
- * Log2 of the upper bound of the size of a struct page. Used for sizing
- * the vmemmap region only, does not affect actual memory footprint.
- * We don't use sizeof(struct page) directly since taking its size here
- * requires its definition to be available at this point in the inclusion
- * chain, and it may not be a power of 2 in the first place.
- */
-#define STRUCT_PAGE_MAX_SHIFT  6
-
 /*
  * VMEMMAP_SIZE - allows the whole linear region to be covered by
  *a struct page array
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 827e4d3bbc7a..8cc7b09c1bc7 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -16,6 +16,7 @@
 #define __ASM_GENERIC_FIXMAP_H

 #include 
+#include 

 #define __fix_to_virt(x)   (FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)   ((FIXADDR_TOP - ((x)_MASK)) >>
PAGE_SHIFT)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..d1c3cde8c201 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -206,6 +206,11 @@ struct page {
 #endif
 } _struct_page_alignment;

+/*
+ * Used for sizing the vmemmap region on some architectures.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  ilog2(roundup_pow_of_two(sizeof(struct
page)))
+
 #define PAGE_FRAG_CACHE_MAX_SIZE   __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER  get_order(PAGE_FRAG_CACHE_MAX_SIZE)


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 12:45 p.m., Logan Gunthorpe wrote:
> Ok, I spoke too soon...
> 
> Having this define next to the struct page definition works great for
> riscv. However, making that happen in arm64 seems to be a nightmare. The
> include chain in arm64 is tangled up so much that including mm_types
> where this is needed seems to be extremely difficult.

Sorry for all the unnecessary churn but I've figured it out. Just had to
realize we only need mm_types.h to be included where
STRUCT_PAGE_MAX_SHIFT is finally expanded. Thus we only need it in one
more spot (fixmap.h). See below.

Thanks,

Logan

--


diff --git a/arch/arm64/include/asm/memory.h
b/arch/arm64/include/asm/memory.h
index b96442960aea..f0a5c9531e8b 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -34,15 +34,6 @@
  */
 #define PCI_IO_SIZESZ_16M

-/*
- * Log2 of the upper bound of the size of a struct page. Used for sizing
- * the vmemmap region only, does not affect actual memory footprint.
- * We don't use sizeof(struct page) directly since taking its size here
- * requires its definition to be available at this point in the inclusion
- * chain, and it may not be a power of 2 in the first place.
- */
-#define STRUCT_PAGE_MAX_SHIFT  6
-
 /*
  * VMEMMAP_SIZE - allows the whole linear region to be covered by
  *a struct page array
diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 827e4d3bbc7a..8cc7b09c1bc7 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -16,6 +16,7 @@
 #define __ASM_GENERIC_FIXMAP_H

 #include 
+#include 

 #define __fix_to_virt(x)   (FIXADDR_TOP - ((x) << PAGE_SHIFT))
 #define __virt_to_fix(x)   ((FIXADDR_TOP - ((x)_MASK)) >>
PAGE_SHIFT)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ed8f6292a53..d1c3cde8c201 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -206,6 +206,11 @@ struct page {
 #endif
 } _struct_page_alignment;

+/*
+ * Used for sizing the vmemmap region on some architectures.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  ilog2(roundup_pow_of_two(sizeof(struct
page)))
+
 #define PAGE_FRAG_CACHE_MAX_SIZE   __ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER  get_order(PAGE_FRAG_CACHE_MAX_SIZE)


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 10:24 a.m., Logan Gunthorpe wrote:
> 
> 
> On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>>> +/*
>>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>>> + * the vmemmap region only, does not affect actual memory footprint.
>>> + * We don't use sizeof(struct page) directly since taking its size here
>>> + * requires its definition to be available at this point in the inclusion
>>> + * chain, and it may not be a power of 2 in the first place.
>>> + */
>>> +#define STRUCT_PAGE_MAX_SHIFT  6
>>
>> I know this is copied from arm64, but wouldn't this be a good time
>> to move this next to the struct page defintion?

Ok, I spoke too soon...

Having this define next to the struct page definition works great for
riscv. However, making that happen in arm64 seems to be a nightmare. The
include chain in arm64 is tangled up so much that including mm_types
where this is needed seems to be extremely difficult.

Unless you have any ideas, this might not be possible.

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 10:24 a.m., Logan Gunthorpe wrote:
> 
> 
> On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>>> +/*
>>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>>> + * the vmemmap region only, does not affect actual memory footprint.
>>> + * We don't use sizeof(struct page) directly since taking its size here
>>> + * requires its definition to be available at this point in the inclusion
>>> + * chain, and it may not be a power of 2 in the first place.
>>> + */
>>> +#define STRUCT_PAGE_MAX_SHIFT  6
>>
>> I know this is copied from arm64, but wouldn't this be a good time
>> to move this next to the struct page defintion?

Ok, I spoke too soon...

Having this define next to the struct page definition works great for
riscv. However, making that happen in arm64 seems to be a nightmare. The
include chain in arm64 is tangled up so much that including mm_types
where this is needed seems to be extremely difficult.

Unless you have any ideas, this might not be possible.

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 10:24 a.m., Logan Gunthorpe wrote:
> On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>>> +/*
>>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>>> + * the vmemmap region only, does not affect actual memory footprint.
>>> + * We don't use sizeof(struct page) directly since taking its size here
>>> + * requires its definition to be available at this point in the inclusion
>>> + * chain, and it may not be a power of 2 in the first place.
>>> + */
>>> +#define STRUCT_PAGE_MAX_SHIFT  6
>>
>> I know this is copied from arm64, but wouldn't this be a good time
>> to move this next to the struct page defintion?
>>
>> Also this:
>>
>> arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
>> STRUCT_PAGE_MAX_SHIFT));
>>
>> should move to comment code (or would have to be duplicated for riscv)
> 
> Makes sense. Where is a good place for the BUILD_BUG_ON in common code?

Never mind. Seems like it's pretty trivial to do this:

#define STRUCT_PAGE_MAX_SHIFT \
ilog2(roundup_pow_of_two(sizeof(struct page)))

So the BUILD_BUG_ON becomes unnecessary.

The comment saying it can't be done is really misleading as it wasn't
actually difficult.

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 10:24 a.m., Logan Gunthorpe wrote:
> On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>>> +/*
>>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>>> + * the vmemmap region only, does not affect actual memory footprint.
>>> + * We don't use sizeof(struct page) directly since taking its size here
>>> + * requires its definition to be available at this point in the inclusion
>>> + * chain, and it may not be a power of 2 in the first place.
>>> + */
>>> +#define STRUCT_PAGE_MAX_SHIFT  6
>>
>> I know this is copied from arm64, but wouldn't this be a good time
>> to move this next to the struct page defintion?
>>
>> Also this:
>>
>> arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
>> STRUCT_PAGE_MAX_SHIFT));
>>
>> should move to comment code (or would have to be duplicated for riscv)
> 
> Makes sense. Where is a good place for the BUILD_BUG_ON in common code?

Never mind. Seems like it's pretty trivial to do this:

#define STRUCT_PAGE_MAX_SHIFT \
ilog2(roundup_pow_of_two(sizeof(struct page)))

So the BUILD_BUG_ON becomes unnecessary.

The comment saying it can't be done is really misleading as it wasn't
actually difficult.

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>> +/*
>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>> + * the vmemmap region only, does not affect actual memory footprint.
>> + * We don't use sizeof(struct page) directly since taking its size here
>> + * requires its definition to be available at this point in the inclusion
>> + * chain, and it may not be a power of 2 in the first place.
>> + */
>> +#define STRUCT_PAGE_MAX_SHIFT   6
> 
> I know this is copied from arm64, but wouldn't this be a good time
> to move this next to the struct page defintion?
> 
> Also this:
> 
> arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
> STRUCT_PAGE_MAX_SHIFT));
> 
> should move to comment code (or would have to be duplicated for riscv)

Makes sense. Where is a good place for the BUILD_BUG_ON in common code?

I've queued up changes for your other feedback.

Thanks,

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Logan Gunthorpe



On 2018-10-11 7:37 a.m., Christoph Hellwig wrote:
>> +/*
>> + * Log2 of the upper bound of the size of a struct page. Used for sizing
>> + * the vmemmap region only, does not affect actual memory footprint.
>> + * We don't use sizeof(struct page) directly since taking its size here
>> + * requires its definition to be available at this point in the inclusion
>> + * chain, and it may not be a power of 2 in the first place.
>> + */
>> +#define STRUCT_PAGE_MAX_SHIFT   6
> 
> I know this is copied from arm64, but wouldn't this be a good time
> to move this next to the struct page defintion?
> 
> Also this:
> 
> arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
> STRUCT_PAGE_MAX_SHIFT));
> 
> should move to comment code (or would have to be duplicated for riscv)

Makes sense. Where is a good place for the BUILD_BUG_ON in common code?

I've queued up changes for your other feedback.

Thanks,

Logan


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Christoph Hellwig
> +/*
> + * Log2 of the upper bound of the size of a struct page. Used for sizing
> + * the vmemmap region only, does not affect actual memory footprint.
> + * We don't use sizeof(struct page) directly since taking its size here
> + * requires its definition to be available at this point in the inclusion
> + * chain, and it may not be a power of 2 in the first place.
> + */
> +#define STRUCT_PAGE_MAX_SHIFT6

I know this is copied from arm64, but wouldn't this be a good time
to move this next to the struct page defintion?

Also this:

arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
STRUCT_PAGE_MAX_SHIFT));

should move to comment code (or would have to be duplicated for riscv)

> +#define VMEMMAP_SIZE (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
> +STRUCT_PAGE_MAX_SHIFT))

Might be more readable with a another define, and without abuse of the
horrible UL macro:

#define VMEMMAP_SHIFT \
(CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE(1UL << VMEMMAP_SHIFT)

> +#define VMEMMAP_END  (VMALLOC_START - 1)
> +#define VMEMMAP_START(VMALLOC_START - VMEMMAP_SIZE)
> +
> +#define vmemmap  ((struct page *)VMEMMAP_START)

This could also use some comments..

> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_SPARSEMEM_H
> +#define __ASM_SPARSEMEM_H
> +
> +#ifdef CONFIG_SPARSEMEM
> +#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
> +#define SECTION_SIZE_BITS30
> +#endif
> +
> +#endif

For potentially wide-spanning ifdefs like inclusion headers it always
is nice to have a comment with the symbol on the endif line.


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Christoph Hellwig
> +/*
> + * Log2 of the upper bound of the size of a struct page. Used for sizing
> + * the vmemmap region only, does not affect actual memory footprint.
> + * We don't use sizeof(struct page) directly since taking its size here
> + * requires its definition to be available at this point in the inclusion
> + * chain, and it may not be a power of 2 in the first place.
> + */
> +#define STRUCT_PAGE_MAX_SHIFT6

I know this is copied from arm64, but wouldn't this be a good time
to move this next to the struct page defintion?

Also this:

arch/arm64/mm/init.c:   BUILD_BUG_ON(sizeof(struct page) > (1 << 
STRUCT_PAGE_MAX_SHIFT));

should move to comment code (or would have to be duplicated for riscv)

> +#define VMEMMAP_SIZE (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
> +STRUCT_PAGE_MAX_SHIFT))

Might be more readable with a another define, and without abuse of the
horrible UL macro:

#define VMEMMAP_SHIFT \
(CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE(1UL << VMEMMAP_SHIFT)

> +#define VMEMMAP_END  (VMALLOC_START - 1)
> +#define VMEMMAP_START(VMALLOC_START - VMEMMAP_SIZE)
> +
> +#define vmemmap  ((struct page *)VMEMMAP_START)

This could also use some comments..

> @@ -0,0 +1,11 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_SPARSEMEM_H
> +#define __ASM_SPARSEMEM_H
> +
> +#ifdef CONFIG_SPARSEMEM
> +#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
> +#define SECTION_SIZE_BITS30
> +#endif
> +
> +#endif

For potentially wide-spanning ifdefs like inclusion headers it always
is nice to have a comment with the symbol on the endif line.


Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Stephen Bates
Palmer

> I don't really know anything about this, but you're welcome to add a
>
>Reviewed-by: Palmer Dabbelt 

Thanks. I think it would be good to get someone who's familiar with linux/mm to 
take a look.

> if you think it'll help.  I'm assuming you're targeting a different tree for 
> the patch set, in which case it's probably best to keep this together with 
> the 
> rest of it.

No I think this series should be pulled by the RISC-V maintainer. The other 
patches in this series just refactor some code and need to be ACK'ed by their 
ARCH developers but I suspect the series should be pulled into RISC-V. That 
said since it does touch other arch should it be pulled by mm? 

BTW note that RISC-V SPARSEMEM support is pretty useful for all manner of 
things and not just the p2pdma discussed in the cover.

> Thanks for porting your stuff to RISC-V!

You bet ;-)

Stephen





Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-11 Thread Stephen Bates
Palmer

> I don't really know anything about this, but you're welcome to add a
>
>Reviewed-by: Palmer Dabbelt 

Thanks. I think it would be good to get someone who's familiar with linux/mm to 
take a look.

> if you think it'll help.  I'm assuming you're targeting a different tree for 
> the patch set, in which case it's probably best to keep this together with 
> the 
> rest of it.

No I think this series should be pulled by the RISC-V maintainer. The other 
patches in this series just refactor some code and need to be ACK'ed by their 
ARCH developers but I suspect the series should be pulled into RISC-V. That 
said since it does touch other arch should it be pulled by mm? 

BTW note that RISC-V SPARSEMEM support is pretty useful for all manner of 
things and not just the p2pdma discussed in the cover.

> Thanks for porting your stuff to RISC-V!

You bet ;-)

Stephen





Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-10 Thread Palmer Dabbelt

On Fri, 05 Oct 2018 09:16:42 PDT (-0700), log...@deltatee.com wrote:

This patch implements sparsemem support for risc-v which helps pave the
way for memory hotplug and eventually P2P support.

We introduce Kconfig options for virtual and physical address bits which
are used to calculate the size of the vmemmap and set the
MAX_PHYSMEM_BITS.

The vmemmap is located directly before the VMALLOC region and sized
such that we can allocate enough pages to populate all the virtual
address space in the system (similar to the way it's done in arm64).

During initialization, call memblocks_present() and sparse_init(),
and provide a stub for vmemmap_populate() (all of which is similar to
arm64).

Signed-off-by: Logan Gunthorpe 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Andrew Waterman 
Cc: Olof Johansson 
Cc: Michael Clark 
Cc: Rob Herring 
Cc: Zong Li 
---
 arch/riscv/Kconfig | 23 +++
 arch/riscv/include/asm/pgtable.h   | 24 
 arch/riscv/include/asm/sparsemem.h | 11 +++
 arch/riscv/kernel/setup.c  |  4 +++-
 arch/riscv/mm/init.c   |  8 
 5 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/include/asm/sparsemem.h


I don't really know anything about this, but you're welcome to add a

Reviewed-by: Palmer Dabbelt 

if you think it'll help.  I'm assuming you're targeting a different tree for 
the patch set, in which case it's probably best to keep this together with the 
rest of it.


Thanks for porting your stuff to RISC-V!


diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..a1b5d758a542 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -52,12 +52,32 @@ config ZONE_DMA32
bool
default y if 64BIT

+config VA_BITS
+   int
+   default 32 if 32BIT
+   default 39 if 64BIT
+
+config PA_BITS
+   int
+   default 34 if 32BIT
+   default 56 if 64BIT
+
 config PAGE_OFFSET
hex
default 0xC000 if 32BIT && MAXPHYSMEM_2GB
default 0x8000 if 64BIT && MAXPHYSMEM_2GB
default 0xffe0 if 64BIT && MAXPHYSMEM_128GB

+config ARCH_FLATMEM_ENABLE
+   def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+   def_bool y
+   select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+   def_bool ARCH_SPARSEMEM_ENABLE
+
 config STACKTRACE_SUPPORT
def_bool y

@@ -92,6 +112,9 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
def_bool n

+config HAVE_ARCH_PFN_VALID
+   def_bool y
+
 menu "Platform type"

 choice
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..20c49cded686 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -89,6 +89,26 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC

+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END  (PAGE_OFFSET - 1)
+#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
+
+/*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  6
+
+#define VMEMMAP_SIZE   (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
+  STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_END(VMALLOC_START - 1)
+#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap((struct page *)VMEMMAP_START)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -411,10 +431,6 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
 }

-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END  (PAGE_OFFSET - 1)
-#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
-
 /*
  * Task size is 0x400 for RV64 or 0xb80 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/sparsemem.h 
b/arch/riscv/include/asm/sparsemem.h
new file mode 100644
index ..4563e806c788
--- /dev/null
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
+#define SECTION_SIZE_BITS  30
+#endif
+
+#endif
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..89fa781a9bf8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -205,6 +205,9 @@ static void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
   

Re: [PATCH 5/5] RISC-V: Implement sparsemem

2018-10-10 Thread Palmer Dabbelt

On Fri, 05 Oct 2018 09:16:42 PDT (-0700), log...@deltatee.com wrote:

This patch implements sparsemem support for risc-v which helps pave the
way for memory hotplug and eventually P2P support.

We introduce Kconfig options for virtual and physical address bits which
are used to calculate the size of the vmemmap and set the
MAX_PHYSMEM_BITS.

The vmemmap is located directly before the VMALLOC region and sized
such that we can allocate enough pages to populate all the virtual
address space in the system (similar to the way it's done in arm64).

During initialization, call memblocks_present() and sparse_init(),
and provide a stub for vmemmap_populate() (all of which is similar to
arm64).

Signed-off-by: Logan Gunthorpe 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Andrew Waterman 
Cc: Olof Johansson 
Cc: Michael Clark 
Cc: Rob Herring 
Cc: Zong Li 
---
 arch/riscv/Kconfig | 23 +++
 arch/riscv/include/asm/pgtable.h   | 24 
 arch/riscv/include/asm/sparsemem.h | 11 +++
 arch/riscv/kernel/setup.c  |  4 +++-
 arch/riscv/mm/init.c   |  8 
 5 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/include/asm/sparsemem.h


I don't really know anything about this, but you're welcome to add a

Reviewed-by: Palmer Dabbelt 

if you think it'll help.  I'm assuming you're targeting a different tree for 
the patch set, in which case it's probably best to keep this together with the 
rest of it.


Thanks for porting your stuff to RISC-V!


diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..a1b5d758a542 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -52,12 +52,32 @@ config ZONE_DMA32
bool
default y if 64BIT

+config VA_BITS
+   int
+   default 32 if 32BIT
+   default 39 if 64BIT
+
+config PA_BITS
+   int
+   default 34 if 32BIT
+   default 56 if 64BIT
+
 config PAGE_OFFSET
hex
default 0xC000 if 32BIT && MAXPHYSMEM_2GB
default 0x8000 if 64BIT && MAXPHYSMEM_2GB
default 0xffe0 if 64BIT && MAXPHYSMEM_128GB

+config ARCH_FLATMEM_ENABLE
+   def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+   def_bool y
+   select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+   def_bool ARCH_SPARSEMEM_ENABLE
+
 config STACKTRACE_SUPPORT
def_bool y

@@ -92,6 +112,9 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
def_bool n

+config HAVE_ARCH_PFN_VALID
+   def_bool y
+
 menu "Platform type"

 choice
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..20c49cded686 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -89,6 +89,26 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC

+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END  (PAGE_OFFSET - 1)
+#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
+
+/*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  6
+
+#define VMEMMAP_SIZE   (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
+  STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_END(VMALLOC_START - 1)
+#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap((struct page *)VMEMMAP_START)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -411,10 +431,6 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
 }

-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END  (PAGE_OFFSET - 1)
-#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
-
 /*
  * Task size is 0x400 for RV64 or 0xb80 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/sparsemem.h 
b/arch/riscv/include/asm/sparsemem.h
new file mode 100644
index ..4563e806c788
--- /dev/null
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
+#define SECTION_SIZE_BITS  30
+#endif
+
+#endif
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..89fa781a9bf8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -205,6 +205,9 @@ static void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
   

[PATCH 5/5] RISC-V: Implement sparsemem

2018-10-05 Thread Logan Gunthorpe
This patch implements sparsemem support for risc-v which helps pave the
way for memory hotplug and eventually P2P support.

We introduce Kconfig options for virtual and physical address bits which
are used to calculate the size of the vmemmap and set the
MAX_PHYSMEM_BITS.

The vmemmap is located directly before the VMALLOC region and sized
such that we can allocate enough pages to populate all the virtual
address space in the system (similar to the way it's done in arm64).

During initialization, call memblocks_present() and sparse_init(),
and provide a stub for vmemmap_populate() (all of which is similar to
arm64).

Signed-off-by: Logan Gunthorpe 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Andrew Waterman 
Cc: Olof Johansson 
Cc: Michael Clark 
Cc: Rob Herring 
Cc: Zong Li 
---
 arch/riscv/Kconfig | 23 +++
 arch/riscv/include/asm/pgtable.h   | 24 
 arch/riscv/include/asm/sparsemem.h | 11 +++
 arch/riscv/kernel/setup.c  |  4 +++-
 arch/riscv/mm/init.c   |  8 
 5 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/include/asm/sparsemem.h

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..a1b5d758a542 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -52,12 +52,32 @@ config ZONE_DMA32
bool
default y if 64BIT
 
+config VA_BITS
+   int
+   default 32 if 32BIT
+   default 39 if 64BIT
+
+config PA_BITS
+   int
+   default 34 if 32BIT
+   default 56 if 64BIT
+
 config PAGE_OFFSET
hex
default 0xC000 if 32BIT && MAXPHYSMEM_2GB
default 0x8000 if 64BIT && MAXPHYSMEM_2GB
default 0xffe0 if 64BIT && MAXPHYSMEM_128GB
 
+config ARCH_FLATMEM_ENABLE
+   def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+   def_bool y
+   select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+   def_bool ARCH_SPARSEMEM_ENABLE
+
 config STACKTRACE_SUPPORT
def_bool y
 
@@ -92,6 +112,9 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
def_bool n
 
+config HAVE_ARCH_PFN_VALID
+   def_bool y
+
 menu "Platform type"
 
 choice
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..20c49cded686 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -89,6 +89,26 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC
 
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END  (PAGE_OFFSET - 1)
+#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
+
+/*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  6
+
+#define VMEMMAP_SIZE   (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
+  STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_END(VMALLOC_START - 1)
+#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap((struct page *)VMEMMAP_START)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -411,10 +431,6 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
 }
 
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END  (PAGE_OFFSET - 1)
-#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
-
 /*
  * Task size is 0x400 for RV64 or 0xb80 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/sparsemem.h 
b/arch/riscv/include/asm/sparsemem.h
new file mode 100644
index ..4563e806c788
--- /dev/null
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
+#define SECTION_SIZE_BITS  30
+#endif
+
+#endif
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..89fa781a9bf8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -205,6 +205,9 @@ static void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
  , 0);
}
+
+   memblocks_present();
+   sparse_init();
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -239,4 +242,3 @@ void __init setup_arch(char **cmdline_p)
 
riscv_fill_hwcap();
 }
-
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 58a522f9bcc3..5d529878667c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ 

[PATCH 5/5] RISC-V: Implement sparsemem

2018-10-05 Thread Logan Gunthorpe
This patch implements sparsemem support for risc-v which helps pave the
way for memory hotplug and eventually P2P support.

We introduce Kconfig options for virtual and physical address bits which
are used to calculate the size of the vmemmap and set the
MAX_PHYSMEM_BITS.

The vmemmap is located directly before the VMALLOC region and sized
such that we can allocate enough pages to populate all the virtual
address space in the system (similar to the way it's done in arm64).

During initialization, call memblocks_present() and sparse_init(),
and provide a stub for vmemmap_populate() (all of which is similar to
arm64).

Signed-off-by: Logan Gunthorpe 
Cc: Palmer Dabbelt 
Cc: Albert Ou 
Cc: Andrew Waterman 
Cc: Olof Johansson 
Cc: Michael Clark 
Cc: Rob Herring 
Cc: Zong Li 
---
 arch/riscv/Kconfig | 23 +++
 arch/riscv/include/asm/pgtable.h   | 24 
 arch/riscv/include/asm/sparsemem.h | 11 +++
 arch/riscv/kernel/setup.c  |  4 +++-
 arch/riscv/mm/init.c   |  8 
 5 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 arch/riscv/include/asm/sparsemem.h

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a344980287a5..a1b5d758a542 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -52,12 +52,32 @@ config ZONE_DMA32
bool
default y if 64BIT
 
+config VA_BITS
+   int
+   default 32 if 32BIT
+   default 39 if 64BIT
+
+config PA_BITS
+   int
+   default 34 if 32BIT
+   default 56 if 64BIT
+
 config PAGE_OFFSET
hex
default 0xC000 if 32BIT && MAXPHYSMEM_2GB
default 0x8000 if 64BIT && MAXPHYSMEM_2GB
default 0xffe0 if 64BIT && MAXPHYSMEM_128GB
 
+config ARCH_FLATMEM_ENABLE
+   def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+   def_bool y
+   select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SELECT_MEMORY_MODEL
+   def_bool ARCH_SPARSEMEM_ENABLE
+
 config STACKTRACE_SUPPORT
def_bool y
 
@@ -92,6 +112,9 @@ config PGTABLE_LEVELS
 config HAVE_KPROBES
def_bool n
 
+config HAVE_ARCH_PFN_VALID
+   def_bool y
+
 menu "Platform type"
 
 choice
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..20c49cded686 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -89,6 +89,26 @@ extern pgd_t swapper_pg_dir[];
 #define __S110 PAGE_SHARED_EXEC
 #define __S111 PAGE_SHARED_EXEC
 
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END  (PAGE_OFFSET - 1)
+#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
+
+/*
+ * Log2 of the upper bound of the size of a struct page. Used for sizing
+ * the vmemmap region only, does not affect actual memory footprint.
+ * We don't use sizeof(struct page) directly since taking its size here
+ * requires its definition to be available at this point in the inclusion
+ * chain, and it may not be a power of 2 in the first place.
+ */
+#define STRUCT_PAGE_MAX_SHIFT  6
+
+#define VMEMMAP_SIZE   (UL(1) << (CONFIG_VA_BITS - PAGE_SHIFT - 1 + \
+  STRUCT_PAGE_MAX_SHIFT))
+#define VMEMMAP_END(VMALLOC_START - 1)
+#define VMEMMAP_START  (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap((struct page *)VMEMMAP_START)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero,
  * used for zero-mapped memory areas, etc.
@@ -411,10 +431,6 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
 }
 
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END  (PAGE_OFFSET - 1)
-#define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)
-
 /*
  * Task size is 0x400 for RV64 or 0xb80 for RV32.
  * Note that PGDIR_SIZE must evenly divide TASK_SIZE.
diff --git a/arch/riscv/include/asm/sparsemem.h 
b/arch/riscv/include/asm/sparsemem.h
new file mode 100644
index ..4563e806c788
--- /dev/null
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_SPARSEMEM_H
+#define __ASM_SPARSEMEM_H
+
+#ifdef CONFIG_SPARSEMEM
+#define MAX_PHYSMEM_BITS   CONFIG_PA_BITS
+#define SECTION_SIZE_BITS  30
+#endif
+
+#endif
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index aee603123030..89fa781a9bf8 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -205,6 +205,9 @@ static void __init setup_bootmem(void)
  PFN_PHYS(end_pfn - start_pfn),
  , 0);
}
+
+   memblocks_present();
+   sparse_init();
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -239,4 +242,3 @@ void __init setup_arch(char **cmdline_p)
 
riscv_fill_hwcap();
 }
-
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 58a522f9bcc3..5d529878667c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@