On 2019/9/25 15:32, Mike Rapoport wrote:
> On Wed, Sep 25, 2019 at 03:18:43PM +0800, Yunfeng Ye wrote:
>> sparse_buffer_init() use memblock_alloc_try_nid_raw() to allocate memory
>> for page management structure, if memory allocation fails from specified
>> node, it will fall back to allocate from other nodes.
>>
>> Normally, the page management structure will not exceed 2% of the total
>> memory, but a large continuous block of allocation is needed. In most
>> cases, memory allocation from the specified node will success always,
>> but a node memory become highly fragmented will fail. we expect to
>> allocate memory base section rather than by allocating a large block of
>> memory from other NUMA nodes
>>
>> Add memblock_alloc_exact_nid_raw() for this situation, which allocate
>> boot memory block on the exact node. If a large contiguous block memory
>> allocate fail in sparse_buffer_init(), it will fall back to allocate
>> small block memory base section.
>>
>> Signed-off-by: Yunfeng Ye <yeyunf...@huawei.com>
> 
> One nit below, otherwise
> 
> Reviewed-by: Mike Rapoport <r...@linux.ibm.com>
> 
ok.

>> ---
>> v2 -> v3:
>>  - use "bool exact_nid" instead of "int need_exact_nid"
>>  - remove the comment "without panicking"
>>
>> v1 -> v2:
>>  - use memblock_alloc_exact_nid_raw() rather than using a flag
>>
>>  include/linux/memblock.h |  3 +++
>>  mm/memblock.c            | 65 
>> ++++++++++++++++++++++++++++++++++++++++--------
>>  mm/sparse.c              |  2 +-
>>  3 files changed, 58 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
>> index f491690..b38bbef 100644
>> --- a/include/linux/memblock.h
>> +++ b/include/linux/memblock.h
>> @@ -358,6 +358,9 @@ static inline phys_addr_t 
>> memblock_phys_alloc(phys_addr_t size,
>>                                       MEMBLOCK_ALLOC_ACCESSIBLE);
>>  }
>>
>> +void *memblock_alloc_exact_nid_raw(phys_addr_t size, phys_addr_t align,
>> +                             phys_addr_t min_addr, phys_addr_t max_addr,
>> +                             int nid);
>>  void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
>>                               phys_addr_t min_addr, phys_addr_t max_addr,
>>                               int nid);
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index 7d4f61a..0de9d83 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1323,12 +1323,13 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone 
>> *zone,
>>   * @start: the lower bound of the memory region to allocate (phys address)
>>   * @end: the upper bound of the memory region to allocate (phys address)
>>   * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + * @exact_nid: control the allocation fall back to other nodes
>>   *
>>   * The allocation is performed from memory region limited by
>>   * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
>>   *
>> - * If the specified node can not hold the requested memory the
>> - * allocation falls back to any node in the system
>> + * If the specified node can not hold the requested memory and @exact_nid
>> + * is zero, the allocation falls back to any node in the system
> 
>      ^ is false  ;-)
> 
sorry my negligence, thanks.

>>   *
>>   * For systems with memory mirroring, the allocation is attempted first
>>   * from the regions with mirroring enabled and then retried from any
>> @@ -1342,7 +1343,8 @@ __next_mem_pfn_range_in_zone(u64 *idx, struct zone 
>> *zone,
>>   */
>>  static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
>>                                      phys_addr_t align, phys_addr_t start,
>> -                                    phys_addr_t end, int nid)
>> +                                    phys_addr_t end, int nid,
>> +                                    bool exact_nid)
>>  {
>>      enum memblock_flags flags = choose_memblock_flags();
>>      phys_addr_t found;
>> @@ -1365,7 +1367,7 @@ static phys_addr_t __init 
>> memblock_alloc_range_nid(phys_addr_t size,
>>      if (found && !memblock_reserve(found, size))
>>              goto done;
>>
>> -    if (nid != NUMA_NO_NODE) {
>> +    if (nid != NUMA_NO_NODE && !exact_nid) {
>>              found = memblock_find_in_range_node(size, align, start,
>>                                                  end, NUMA_NO_NODE,
>>                                                  flags);
>> @@ -1413,7 +1415,8 @@ phys_addr_t __init 
>> memblock_phys_alloc_range(phys_addr_t size,
>>                                           phys_addr_t start,
>>                                           phys_addr_t end)
>>  {
>> -    return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
>> +    return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
>> +                                    false);
>>  }
>>
>>  /**
>> @@ -1432,7 +1435,7 @@ phys_addr_t __init 
>> memblock_phys_alloc_range(phys_addr_t size,
>>  phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, 
>> phys_addr_t align, int nid)
>>  {
>>      return memblock_alloc_range_nid(size, align, 0,
>> -                                    MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>> +                                    MEMBLOCK_ALLOC_ACCESSIBLE, nid, false);
>>  }
>>
>>  /**
>> @@ -1442,6 +1445,7 @@ phys_addr_t __init 
>> memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
>>   * @min_addr: the lower bound of the memory region to allocate (phys 
>> address)
>>   * @max_addr: the upper bound of the memory region to allocate (phys 
>> address)
>>   * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + * @exact_nid: control the allocation fall back to other nodes
>>   *
>>   * Allocates memory block using memblock_alloc_range_nid() and
>>   * converts the returned physical address to virtual.
>> @@ -1457,7 +1461,7 @@ phys_addr_t __init 
>> memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
>>  static void * __init memblock_alloc_internal(
>>                              phys_addr_t size, phys_addr_t align,
>>                              phys_addr_t min_addr, phys_addr_t max_addr,
>> -                            int nid)
>> +                            int nid, bool exact_nid)
>>  {
>>      phys_addr_t alloc;
>>
>> @@ -1469,11 +1473,13 @@ static void * __init memblock_alloc_internal(
>>      if (WARN_ON_ONCE(slab_is_available()))
>>              return kzalloc_node(size, GFP_NOWAIT, nid);
>>
>> -    alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
>> +    alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid,
>> +                                    exact_nid);
>>
>>      /* retry allocation without lower limit */
>>      if (!alloc && min_addr)
>> -            alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
>> +            alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid,
>> +                                            exact_nid);
>>
>>      if (!alloc)
>>              return NULL;
>> @@ -1482,6 +1488,43 @@ static void * __init memblock_alloc_internal(
>>  }
>>
>>  /**
>> + * memblock_alloc_exact_nid_raw - allocate boot memory block on the exact 
>> node
>> + * without zeroing memory
>> + * @size: size of memory block to be allocated in bytes
>> + * @align: alignment of the region and block's size
>> + * @min_addr: the lower bound of the memory region from where the allocation
>> + *    is preferred (phys address)
>> + * @max_addr: the upper bound of the memory region from where the allocation
>> + *        is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
>> + *        allocate only from memory limited by memblock.current_limit value
>> + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
>> + *
>> + * Public function, provides additional debug information (including caller
>> + * info), if enabled. Does not zero allocated memory.
>> + *
>> + * Return:
>> + * Virtual address of allocated memory block on success, NULL on failure.
>> + */
>> +void * __init memblock_alloc_exact_nid_raw(
>> +                    phys_addr_t size, phys_addr_t align,
>> +                    phys_addr_t min_addr, phys_addr_t max_addr,
>> +                    int nid)
>> +{
>> +    void *ptr;
>> +
>> +    memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa 
>> %pS\n",
>> +                 __func__, (u64)size, (u64)align, nid, &min_addr,
>> +                 &max_addr, (void *)_RET_IP_);
>> +
>> +    ptr = memblock_alloc_internal(size, align,
>> +                                       min_addr, max_addr, nid, true);
>> +    if (ptr && size > 0)
>> +            page_init_poison(ptr, size);
>> +
>> +    return ptr;
>> +}
>> +
>> +/**
>>   * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
>>   * memory and without panicking
>>   * @size: size of memory block to be allocated in bytes
>> @@ -1512,7 +1555,7 @@ void * __init memblock_alloc_try_nid_raw(
>>                   &max_addr, (void *)_RET_IP_);
>>
>>      ptr = memblock_alloc_internal(size, align,
>> -                                       min_addr, max_addr, nid);
>> +                                       min_addr, max_addr, nid, false);
>>      if (ptr && size > 0)
>>              page_init_poison(ptr, size);
>>
>> @@ -1547,7 +1590,7 @@ void * __init memblock_alloc_try_nid(
>>                   __func__, (u64)size, (u64)align, nid, &min_addr,
>>                   &max_addr, (void *)_RET_IP_);
>>      ptr = memblock_alloc_internal(size, align,
>> -                                       min_addr, max_addr, nid);
>> +                                       min_addr, max_addr, nid, false);
>>      if (ptr)
>>              memset(ptr, 0, size);
>>
>> diff --git a/mm/sparse.c b/mm/sparse.c
>> index 72f010d..1a06471 100644
>> --- a/mm/sparse.c
>> +++ b/mm/sparse.c
>> @@ -475,7 +475,7 @@ static void __init sparse_buffer_init(unsigned long 
>> size, int nid)
>>      phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
>>      WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
>>      sparsemap_buf =
>> -            memblock_alloc_try_nid_raw(size, PAGE_SIZE,
>> +            memblock_alloc_exact_nid_raw(size, PAGE_SIZE,
>>                                              addr,
>>                                              MEMBLOCK_ALLOC_ACCESSIBLE, nid);
>>      sparsemap_buf_end = sparsemap_buf + size;
>> -- 
>> 2.7.4
>>
> 

Reply via email to