Re: [PATCH 4/8] v2 Allow memory block to span multiple memory sections

2010-09-28 Thread Nathan Fontenot
On 09/27/2010 06:55 PM, Dave Hansen wrote:
 On Mon, 2010-09-27 at 14:25 -0500, Nathan Fontenot wrote:
 +static inline int base_memory_block_id(int section_nr)
 +{
 +   return section_nr / sections_per_block;
 +}
 ...
 -   mutex_lock(mem_sysfs_mutex);
 -
 -   mem-phys_index = __section_nr(section);
 +   scn_nr = __section_nr(section);
 +   mem-phys_index = base_memory_block_id(scn_nr) * sections_per_block; 
 
 I'm really regretting giving this variable such a horrid name.  I suck.
 
 I think this is correct now:
 
   mem-phys_index = base_memory_block_id(scn_nr) * sections_per_block;
   mem-phys_index = section_nr / sections_per_block * sections_per_block;
   mem-phys_index = section_nr
 
 Since it gets exported to userspace this way:
 
 +static ssize_t show_mem_start_phys_index(struct sys_device *dev,
 struct sysdev_attribute *attr, char *buf)
  {
 struct memory_block *mem =
 container_of(dev, struct memory_block, sysdev);
 -   return sprintf(buf, %08lx\n, mem-phys_index / sections_per_block);
 +   unsigned long phys_index;
 +
 +   phys_index = mem-start_phys_index / sections_per_block;
 +   return sprintf(buf, %08lx\n, phys_index);
 +}
 
 The only other thing I'd say is that we need to put phys_index out of
 its misery and call it what it is now: a section number.  I think it's
 OK to call them start/end_section_nr, at least inside the kernel.  I
 intentionally used phys_index terminology in sysfs so that we _could_
 eventually do this stuff and break the relationship between sections and
 the sysfs dirs, but I think keeping the terminology around inside the
 kernel is confusing now.

Yes, it took me a couple o looks to get the phys_index - section number
correlation.  I think changing the kernel names to start/end_section_number
is a good idea.

-Nathan

 
 -- Dave
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 4/8] v2 Allow memory block to span multiple memory sections

2010-09-28 Thread Nathan Fontenot
On 09/28/2010 07:48 AM, Robin Holt wrote:
 +u32 __weak memory_block_size_bytes(void)
 +{
 +return MIN_MEMORY_BLOCK_SIZE;
 +}
 +
 +static u32 get_memory_block_size(void)
 
 Can we make this an unsigned long?  We are testing on a system whose
 smallest possible configuration is 4GB per socket with 512 sockets.
 We would like to be able to specify this as 2GB by default (results
 in the least lost memory) and suggest we add a command line option
 which overrides this value.  We have many installations where 16GB may
 be optimal.  Large configurations will certainly become more prevalent.

Works for me.

 
 ...
 @@ -551,12 +608,16 @@
  unsigned int i;
  int ret;
  int err;
 +int block_sz;
 
 This one needs to match the return above.  In our tests, we ended up
 with a negative sections_per_block which caused very unexpected results.

Oh, nice catch.  I'll update both of these.

-Nathan

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH 4/8] v2 Allow memory block to span multiple memory sections

2010-09-27 Thread Nathan Fontenot
Update the memory sysfs code such that each sysfs memory directory is now
considered a memory block that can span multiple memory sections per
memory block.  The default size of each memory block is SECTION_SIZE_BITS
to maintain the current behavior of having a single memory section per
memory block (i.e. one sysfs directory per memory section).

For architectures that want to have memory blocks span multiple
memory sections they need only define their own memory_block_size_bytes()
routine.

Signed-off-by: Nathan Fontenot nf...@austin.ibm.com

---
 drivers/base/memory.c |  155 ++
 1 file changed, 108 insertions(+), 47 deletions(-)

Index: linux-next/drivers/base/memory.c
===
--- linux-next.orig/drivers/base/memory.c   2010-09-27 09:31:57.0 
-0500
+++ linux-next/drivers/base/memory.c2010-09-27 13:50:18.0 -0500
@@ -30,6 +30,14 @@
 static DEFINE_MUTEX(mem_sysfs_mutex);
 
 #define MEMORY_CLASS_NAME  memory
+#define MIN_MEMORY_BLOCK_SIZE  (1  SECTION_SIZE_BITS)
+
+static int sections_per_block;
+
+static inline int base_memory_block_id(int section_nr)
+{
+   return section_nr / sections_per_block;
+}
 
 static struct sysdev_class memory_sysdev_class = {
.name = MEMORY_CLASS_NAME,
@@ -84,28 +92,47 @@
  * register_memory - Setup a sysfs device for a memory block
  */
 static
-int register_memory(struct memory_block *memory, struct mem_section *section)
+int register_memory(struct memory_block *memory)
 {
int error;
 
memory-sysdev.cls = memory_sysdev_class;
-   memory-sysdev.id = __section_nr(section);
+   memory-sysdev.id = memory-phys_index / sections_per_block;
 
error = sysdev_register(memory-sysdev);
return error;
 }
 
 static void
-unregister_memory(struct memory_block *memory, struct mem_section *section)
+unregister_memory(struct memory_block *memory)
 {
BUG_ON(memory-sysdev.cls != memory_sysdev_class);
-   BUG_ON(memory-sysdev.id != __section_nr(section));
 
/* drop the ref. we got in remove_memory_block() */
kobject_put(memory-sysdev.kobj);
sysdev_unregister(memory-sysdev);
 }
 
+u32 __weak memory_block_size_bytes(void)
+{
+   return MIN_MEMORY_BLOCK_SIZE;
+}
+
+static u32 get_memory_block_size(void)
+{
+   u32 block_sz;
+
+   block_sz = memory_block_size_bytes();
+
+   /* Validate blk_sz is a power of 2 and not less than section size */
+   if ((block_sz  (block_sz - 1)) || (block_sz  MIN_MEMORY_BLOCK_SIZE)) {
+   WARN_ON(1);
+   block_sz = MIN_MEMORY_BLOCK_SIZE;
+   }
+
+   return block_sz;
+}
+
 /*
  * use this as the physical section index that this memsection
  * uses.
@@ -116,7 +143,7 @@
 {
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
-   return sprintf(buf, %08lx\n, mem-phys_index);
+   return sprintf(buf, %08lx\n, mem-phys_index / sections_per_block);
 }
 
 /*
@@ -125,13 +152,16 @@
 static ssize_t show_mem_removable(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
 {
-   unsigned long start_pfn;
-   int ret;
+   unsigned long i, pfn;
+   int ret = 1;
struct memory_block *mem =
container_of(dev, struct memory_block, sysdev);
 
-   start_pfn = section_nr_to_pfn(mem-phys_index);
-   ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
+   for (i = 0; i  sections_per_block; i++) {
+   pfn = section_nr_to_pfn(mem-phys_index + i);
+   ret = is_mem_section_removable(pfn, PAGES_PER_SECTION);
+   }
+
return sprintf(buf, %d\n, ret);
 }
 
@@ -184,17 +214,14 @@
  * OK to have direct references to sparsemem variables in here.
  */
 static int
-memory_block_action(struct memory_block *mem, unsigned long action)
+memory_section_action(unsigned long phys_index, unsigned long action)
 {
int i;
-   unsigned long psection;
unsigned long start_pfn, start_paddr;
struct page *first_page;
int ret;
-   int old_state = mem-state;
 
-   psection = mem-phys_index;
-   first_page = pfn_to_page(psection  PFN_SECTION_SHIFT);
+   first_page = pfn_to_page(phys_index  PFN_SECTION_SHIFT);
 
/*
 * The probe routines leave the pages reserved, just
@@ -207,8 +234,8 @@
continue;
 
printk(KERN_WARNING section number %ld page number %d 
-   not reserved, was it already online? \n,
-   psection, i);
+   not reserved, was it already online?\n,
+   phys_index, i);
return -EBUSY;
}
}
@@ -219,18 +246,13 @@
ret = online_pages(start_pfn, PAGES_PER_SECTION);
   

Re: [PATCH 4/8] v2 Allow memory block to span multiple memory sections

2010-09-27 Thread Dave Hansen
On Mon, 2010-09-27 at 14:25 -0500, Nathan Fontenot wrote:
 +static inline int base_memory_block_id(int section_nr)
 +{
 +   return section_nr / sections_per_block;
 +}
...
 -   mutex_lock(mem_sysfs_mutex);
 -
 -   mem-phys_index = __section_nr(section);
 +   scn_nr = __section_nr(section);
 +   mem-phys_index = base_memory_block_id(scn_nr) * sections_per_block; 

I'm really regretting giving this variable such a horrid name.  I suck.

I think this is correct now:

mem-phys_index = base_memory_block_id(scn_nr) * sections_per_block;
mem-phys_index = section_nr / sections_per_block * sections_per_block;
mem-phys_index = section_nr

Since it gets exported to userspace this way:

 +static ssize_t show_mem_start_phys_index(struct sys_device *dev,
 struct sysdev_attribute *attr, char *buf)
  {
 struct memory_block *mem =
 container_of(dev, struct memory_block, sysdev);
 -   return sprintf(buf, %08lx\n, mem-phys_index / sections_per_block);
 +   unsigned long phys_index;
 +
 +   phys_index = mem-start_phys_index / sections_per_block;
 +   return sprintf(buf, %08lx\n, phys_index);
 +}

The only other thing I'd say is that we need to put phys_index out of
its misery and call it what it is now: a section number.  I think it's
OK to call them start/end_section_nr, at least inside the kernel.  I
intentionally used phys_index terminology in sysfs so that we _could_
eventually do this stuff and break the relationship between sections and
the sysfs dirs, but I think keeping the terminology around inside the
kernel is confusing now.

-- Dave

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev