On 20.10.2022 08:14, Wei Chen wrote:
> x86 has implemented a set of codes to process NUMA nodes. These
> codes will parse NUMA memory and processor information from
> ACPI SRAT table. But except some ACPI specific codes, most
> of the process code like memory blocks validation, node memory
> range updates and some sanity check can be reused by other
> NUMA implementation.
> 
> So in this patch, we move some variables and related functions
> for NUMA memory and processor to common as library. At the
> same time, numa_set_processor_nodes_parsed has been introduced
> for ACPI specific code to update processor parsing results.
> With this helper, we can reuse most of NUMA memory affinity init
> code from ACPI. As bad_srat and node_to_pxm functions have been
> used in common code to do architectural fallback and node to
> architectural node info translation. But it doesn't make sense
> to reuse the functions names in common code, we have rename them
> to neutral names as well.
> 
> PXM is an ACPI specific item, we can't use it in common code
> directly. As an alternative, we extend the parameters of
> numa_update_node_memblks. The caller can pass the PXM as print
> messages' prefix or as architectural node id. And we introduced
> an numa_fw_nid_name for each NUMA implementation to set their
> specific firmware NUMA node name. In this case, we do not need
> to retain a lot of per-arch code but still can print architectural
> log messages for different NUMA implementations. A default value
> "???" will be set to indicate an unset numa_fw_nid_name.
> 
> mem_hotplug is accessed by common code if memory hotplug is
> activated. Even if this is only supported by x86, export the
> variable so that other architectures could support it in the future.
> 
> As asm/acpi.h has been removed from common/numa.c, we have to
> move NR_NODE_MEMBLKS from asm/acpi.h to xen/numa.h in this patch
> as well.
> 
> Signed-off-by: Wei Chen <wei.c...@arm.com>

There's just one remaining concern I have: I continue to consider ...

> @@ -341,159 +247,14 @@ acpi_numa_memory_affinity_init(const struct 
> acpi_srat_mem_affinity *ma)
>               pxm &= 0xff;
>       node = setup_node(pxm);
>       if (node == NUMA_NO_NODE) {
> -             bad_srat();
> +             numa_fw_bad();
>               return;
>       }
>  
> -     /*
> -      * For the node that already has some memory blocks, we will
> -      * expand the node memory range temporarily to check memory
> -      * interleaves with other nodes. We will not use this node
> -      * temp memory range to check overlaps, because it will mask
> -      * the overlaps in same node.
> -      *
> -      * Node with 0 bytes memory doesn't need this expandsion.
> -      */
> -     nd_start = start;
> -     nd_end = end;
> -     nd = &nodes[node];
> -     if (nd->start != nd->end) {
> -             if (nd_start > nd->start)
> -                     nd_start = nd->start;
> -
> -             if (nd_end < nd->end)
> -                     nd_end = nd->end;
> -     }
> -
> -     /* It is fine to add this area to the nodes data it will be used later*/
> -     switch (conflicting_memblks(node, start, end, nd_start, nd_end, &i)) {
> -     case OVERLAP:
> -             if (memblk_nodeid[i] == node) {
> -                     bool mismatch = !(ma->flags &
> -                                       ACPI_SRAT_MEM_HOT_PLUGGABLE) !=
> -                                     !test_bit(i, memblk_hotplug);
> -
> -                     printk("%sSRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] 
> overlaps with itself [%"PRIpaddr", %"PRIpaddr"]\n",
> -                            mismatch ? KERN_ERR : KERN_WARNING, pxm, start,
> -                            end - 1, node_memblk_range[i].start,
> -                            node_memblk_range[i].end - 1);
> -                     if (mismatch) {
> -                             bad_srat();
> -                             return;
> -                     }
> -                     break;
> -             }
> -
> -             printk(KERN_ERR
> -                    "SRAT: PXM %u [%"PRIpaddr", %"PRIpaddr"] overlaps with 
> PXM %u [%"PRIpaddr", %"PRIpaddr"]\n",
> -                    pxm, start, end - 1, node_to_pxm(memblk_nodeid[i]),
> -                    node_memblk_range[i].start,
> -                    node_memblk_range[i].end - 1);
> -             bad_srat();
> -             return;
> -
> -     case INTERLEAVE:
> -             printk(KERN_ERR
> -                    "SRAT: PXM %u: [%"PRIpaddr", %"PRIpaddr"] interleaves 
> with PXM %u memblk [%"PRIpaddr", %"PRIpaddr"]\n",
> -                    pxm, nd_start, nd_end - 1, node_to_pxm(memblk_nodeid[i]),
> -                    node_memblk_range[i].start, node_memblk_range[i].end - 
> 1);
> -             bad_srat();
> -             return;
> -
> -     case NO_CONFLICT:
> -             break;
> -     }
> -
> -     if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
> -             node_set(node, memory_nodes_parsed);
> -             nd->start = nd_start;
> -             nd->end = nd_end;
> -     }
> -
> -     printk(KERN_INFO "SRAT: Node %u PXM %u [%"PRIpaddr", %"PRIpaddr"]%s\n",
> -            node, pxm, start, end - 1,
> -            ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : "");
> -
> -     /* Keep node_memblk_range[] sorted by address. */
> -     for (i = 0; i < num_node_memblks; ++i)
> -             if (node_memblk_range[i].start > start ||
> -                 (node_memblk_range[i].start == start &&
> -                  node_memblk_range[i].end > end))
> -                     break;
> -
> -     memmove(&node_memblk_range[i + 1], &node_memblk_range[i],
> -             (num_node_memblks - i) * sizeof(*node_memblk_range));
> -     node_memblk_range[i].start = start;
> -     node_memblk_range[i].end = end;
> -
> -     memmove(&memblk_nodeid[i + 1], &memblk_nodeid[i],
> -             (num_node_memblks - i) * sizeof(*memblk_nodeid));
> -     memblk_nodeid[i] = node;
> -
> -     if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
> -             next = true;
> -             if (end > mem_hotplug)
> -                     mem_hotplug = end;
> -     }
> -     for (; i <= num_node_memblks; ++i) {
> -             bool prev = next;
> -
> -             next = test_bit(i, memblk_hotplug);
> -             if (prev)
> -                     __set_bit(i, memblk_hotplug);
> -             else
> -                     __clear_bit(i, memblk_hotplug);
> -     }
> -
> -     num_node_memblks++;
> -}
> -
> -/* Sanity check to catch more bad SRATs (they are amazingly common).
> -   Make sure the PXMs cover all memory. */
> -static int __init nodes_cover_memory(void)
> -{
> -     unsigned int i;
> -
> -     for (i = 0; ; i++) {
> -             int err;
> -             unsigned int j;
> -             bool found;
> -             paddr_t start, end;
> -
> -             /* Try to loop memory map from index 0 to end to get RAM 
> ranges. */
> -             err = arch_get_ram_range(i, &start, &end);
> -
> -             /* Reached the end of the memory map? */
> -             if (err == -ENOENT)
> -                     break;
> -
> -             /* Skip non-RAM entries. */
> -             if (err)
> -                     continue;
> -
> -             do {
> -                     found = false;
> -                     for_each_node_mask(j, memory_nodes_parsed)
> -                             if (start < nodes[j].end
> -                                 && end > nodes[j].start) {
> -                                     if (start >= nodes[j].start) {
> -                                             start = nodes[j].end;
> -                                             found = true;
> -                                     }
> -                                     if (end <= nodes[j].end) {
> -                                             end = nodes[j].start;
> -                                             found = true;
> -                                     }
> -                             }
> -             } while (found && start < end);
> -
> -             if (start < end) {
> -                     printk(KERN_ERR "NUMA: No NODE for RAM range: "
> -                             "[%"PRIpaddr", %"PRIpaddr"]\n", start, end - 1);
> -                     return 0;
> -             }
> -     }
> -     return 1;
> +     numa_fw_nid_name = "PXM";

... this to be happening too late. Not because I can see a way for current
code to use the variable earlier, but because of the risk of future code
potentially doing so. Afaics srat_parse_regions() is called quite a bit
earlier, so perhaps the field should (also?) be set there, presumably
after acpi_table_parse() has succeeded. I've included "(also?)" because I
think to be on the safe side the setting here may want keeping, albeit
perhaps moving up in the function.

Jan

Reply via email to