Hi,
On 3/31/26 4:38 AM, Gustavo Romero wrote:
> Hi Alireza,
>
> On 3/11/26 13:06, Alireza Sanaee wrote:
>> Specify which level (core/cluster/socket) caches found at in the CPU
>> topology. Updating cache topology to device tree (spec v0.4).
>> Example:
>>
>> For example, 2 sockets (packages), and 2 clusters, 4 cores and 2 threads
>> created, in aggregate 2*2*4*2 logical cores. In the smp-cache object,
>> cores will have L1d and L1i. However, extending this is not difficult.
>> The clusters will share a unified L2 level cache, and finally sockets
>> will share L3. In this patch, threads will share L1 caches by default,
>> but this can be adjusted if case required.
>>
>> Only three levels of caches are supported. The patch does not
>> allow partial declaration of caches. In other words, the topology level
>> of every cache must be specified if that of any level is.
>>
>> ./qemu-system-aarch64 \
>> -machine virt,\
>> smp-cache.0.cache=l1i,smp-cache.0.topology=core,\
>> smp-cache.1.cache=l1d,smp-cache.1.topology=core,\
>> smp-cache.2.cache=l2,smp-cache.2.topology=cluster,\
>> smp-cache.3.cache=l3,smp-cache.3.topology=socket\
>> -cpu max \
>> -m 2048 \
>> -smp sockets=2,clusters=2,cores=4,threads=1 \
>> -kernel ./Image.gz \
>> -append "console=ttyAMA0 root=/dev/ram rdinit=/init acpi=force" \
>> -initrd rootfs.cpio.gz \
>> -bios ./edk2-aarch64-code.fd \
>> -nographic
>>
>> For instance, following device tree will be generated for a scenario
>> where we have 2 sockets, 2 clusters, 2 cores and 2 threads, in total 16
>> PEs. L1i and L1d are private to each thread, and L2 and L3 are shared at
>> socket level as an example.
>>
>> Limitation: SMT cores cannot share L1 cache for now. This
>> problem does not exist in PPTT tables.
>>
>> Co-developed-by: Jonathan Cameron <[email protected]>
>> Signed-off-by: Jonathan Cameron <[email protected]>
>> Signed-off-by: Alireza Sanaee <[email protected]>
>> ---
>> Thanks for the review Gustavo.
>> Change log:
>> v18->v19:
>> - Fix comments. Adding more details to the commit message.
>> - Removed the extra check on existence of cache at the
>> topology level
>> in add_cpu_cache_hierarchy function since it is
>> already checked
>> in fdt_add_cpu_nodes function.
>> - Fixed typos.
>>
>> hw/arm/virt.c | 335 +++++++++++++++++++++++++++++++++++++++++-
>> include/hw/arm/virt.h | 17 ++-
>> include/hw/core/cpu.h | 12 ++
>> 3 files changed, 362 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 7456614d05..87fb31ec6b 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -91,6 +91,7 @@
>> #include "hw/virtio/virtio-md-pci.h"
>> #include "hw/virtio/virtio-iommu.h"
>> #include "hw/char/pl011.h"
>> +#include "hw/core/cpu.h"
>> #include "hw/cxl/cxl.h"
>> #include "hw/cxl/cxl_host.h"
>> #include "qemu/guest-random.h"
>> @@ -281,6 +282,86 @@ static bool ns_el2_virt_timer_present(void)
>> arm_feature(env, ARM_FEATURE_EL2) &&
>> cpu_isar_feature(aa64_vh, cpu);
>> }
>> +void set_cpu_cache(CPUCoreCaches *cpu_cache, enum CacheType
>> cache_type,
>> + int cache_level, bool is_i_cache0)
>> +{
>> + int bank_index = ((cache_level - 1) * 2) | is_i_cache0;
>> + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0));
>> + bool ccidx = cpu_isar_feature(any_ccidx, armcpu);
>> +
>> + if (ccidx) {
>> + *cpu_cache = (CPUCoreCaches){
>> + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index],
>> CCSIDR_EL1,
>> + CCIDX_LINESIZE) + 4),
>> + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index],
>> CCSIDR_EL1,
>> + CCIDX_ASSOCIATIVITY) + 1,
>> + .sets = FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1,
>> + CCIDX_NUMSETS) + 1,
>> + };
>> + } else {
>> + *cpu_cache = (CPUCoreCaches){
>> + .linesize = 1 << (FIELD_EX64(armcpu->ccsidr[bank_index],
>> CCSIDR_EL1,
>> + LINESIZE) + 4),
>> + .associativity = FIELD_EX64(armcpu->ccsidr[bank_index],
>> CCSIDR_EL1,
>> + ASSOCIATIVITY) + 1,
>> + .sets =
>> + FIELD_EX64(armcpu->ccsidr[bank_index], CCSIDR_EL1,
>> NUMSETS) + 1,
>> + };
>> + }
>> + cpu_cache->type = cache_type;
>> + cpu_cache->level = cache_level;
>> + cpu_cache->size = cpu_cache->associativity *
>> + cpu_cache->sets *
>> + cpu_cache->linesize;
>> +
>> + return;
>> +}
>> +
>> +unsigned int virt_get_caches(const VirtMachineState *vms,
>> CPUCoreCaches *caches)
>> +{
>> + int num_cache = 0;
>> + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(0)); /* assume homogeneous
>> CPUs */
>> + ARMISARegisters *isar = &armcpu->isar;
>> + uint32_t clidr = GET_IDREG(isar, CLIDR);
One question: In KVM mode where is CLIDR set. I don't see it read from
host so where is it set? Is it assumed to be 0?
Thanks
Eric
>> +
>> + for (int cache_level = 1; cache_level <=
>> CLIDR_CTYPE_MAX_CACHE_LEVEL;
>> + cache_level++) {
>> + uint8_t ctype =
>> + (clidr >> (3 * (cache_level - 1))) &
>> CLIDR_CTYPE_MAX_CACHE_LEVEL;
>> +
>> + if (ctype == CLIDR_CTYPE_NO_CACHE) {
>> + /*
>> + * If a "No cache" cache type is found it means no
>> manageable caches
>> + * exist at further-out levels of hierarchy, so ignore
>> them.
>> + */
>> + break;
>> + } else if (ctype == CLIDR_CTYPE_SEPARATE_I_D_CACHES) {
>> + /*
>> + * Create separate D and I caches. D-cache is stored first.
>> + */
>> + enum CacheType cache_type;
>> + for (cache_type = DATA_CACHE; cache_type <=
>> INSTRUCTION_CACHE;
>> + cache_type++) {
>> + set_cpu_cache(&caches[num_cache++], cache_type,
>> cache_level,
>> + cache_type == INSTRUCTION_CACHE ? true
>> : false);
>> + }
>> + } else if (ctype == CLIDR_CTYPE_UNIFIED_CACHE) {
>> + set_cpu_cache(&caches[num_cache++], UNIFIED_CACHE,
>> cache_level,
>> + false);
>> + } else if (ctype == CLIDR_CTYPE_D_CACHE) {
>> + set_cpu_cache(&caches[num_cache++], DATA_CACHE,
>> cache_level, false);
>> + } else if (ctype == CLIDR_CTYPE_I_CACHE) {
>> + set_cpu_cache(&caches[num_cache++], INSTRUCTION_CACHE,
>> cache_level,
>> + true);
>> + } else {
>> + error_setg(&error_abort, "Unrecognized cache type");
>> + return 0;
>> + }
>> + }
>> +
>> + return num_cache;
>> +}
>> +
>> static void create_fdt(VirtMachineState *vms)
>> {
>> MachineState *ms = MACHINE(vms);
>> @@ -431,13 +512,124 @@ static void fdt_add_timer_nodes(const
>> VirtMachineState *vms)
>> }
>> }
>> +static void add_cache_node(void *fdt, char *nodepath,
>> CPUCoreCaches cache,
>> + uint32_t *next_level)
>> +{
>> + /* Assume L2/3 are unified caches. */
>> +
>> + uint32_t phandle;
>> +
>> + qemu_fdt_add_path(fdt, nodepath);
>> + phandle = qemu_fdt_alloc_phandle(fdt);
>> + qemu_fdt_setprop_cell(fdt, nodepath, "phandle", phandle);
>> + qemu_fdt_setprop_cell(fdt, nodepath, "cache-level", cache.level);
>> + qemu_fdt_setprop_cell(fdt, nodepath, "cache-size", cache.size);
>> + qemu_fdt_setprop_cell(fdt, nodepath, "cache-block-size",
>> cache.linesize);
>> + qemu_fdt_setprop_cell(fdt, nodepath, "cache-sets", cache.sets);
>> + qemu_fdt_setprop(fdt, nodepath, "cache-unified", NULL, 0);
>> + qemu_fdt_setprop_string(fdt, nodepath, "compatible", "cache");
>> + if (cache.level != 3) {
>> + /* top level cache doesn't have next-level-cache property */
>> + qemu_fdt_setprop_cell(fdt, nodepath, "next-level-cache",
>> *next_level);
>> + }
>> +
>> + *next_level = phandle;
>> +}
>> +
>> +static bool add_cpu_cache_hierarchy(void *fdt, CPUCoreCaches* cache,
>> + uint32_t cache_cnt,
>> + uint32_t top_level,
>> + uint32_t bottom_level,
>> + uint32_t cpu_id,
>> + uint32_t *next_level) {
>> + bool found_cache = false;
>> +
>> + for (int level = top_level; level >= bottom_level; level--) {
>> + for (int i = 0; i < cache_cnt; i++) {
>> + char *nodepath;
>> +
>> + if (i != level) {
>> + continue;
>> + }
>> +
>> + nodepath = g_strdup_printf("/cpus/cpu@%d/l%d-cache",
>> + cpu_id, level);
>> + add_cache_node(fdt, nodepath, cache[i], next_level);
>> + found_cache = true;
>> + g_free(nodepath);
>> +
>> + }
>> + }
>> +
>> + return found_cache;
>> +}
>> +
>> +static void set_cache_properties(void *fdt, const char *nodename,
>> + const char *prefix, CPUCoreCaches
>> cache)
>> +{
>> + char prop_name[64];
>> +
>> + snprintf(prop_name, sizeof(prop_name), "%s-block-size", prefix);
>> + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.linesize);
>> +
>> + snprintf(prop_name, sizeof(prop_name), "%s-size", prefix);
>> + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.size);
>> +
>> + snprintf(prop_name, sizeof(prop_name), "%s-sets", prefix);
>> + qemu_fdt_setprop_cell(fdt, nodename, prop_name, cache.sets);
>> +}
>> +
>> +static bool partial_cache_description(const MachineState *ms, int
>> num_caches)
>> +{
>> + assert(num_caches - 1 < CACHE_LEVEL_AND_TYPE__MAX);
>> + enum CpuTopologyLevel topo_level;
>> + enum CacheLevelAndType cache_level;
>> +
>> + for (cache_level = 0; cache_level < num_caches; cache_level++) {
>> + topo_level = machine_get_cache_topo_level(ms, cache_level);
>> + if (topo_level == CPU_TOPOLOGY_LEVEL_DEFAULT) {
>> + /* No topology level described for this cache level. */
>> + return true;
>> + }
>> + }
>> +
>> + return false;
>> +}
>> +
>> static void fdt_add_cpu_nodes(const VirtMachineState *vms)
>> {
>> int cpu;
>> int addr_cells = 1;
>> const MachineState *ms = MACHINE(vms);
>> + const MachineClass *mc = MACHINE_GET_CLASS(ms);
>> const VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
>> int smp_cpus = ms->smp.cpus;
>> + int socket_id, cluster_id, core_id;
>> + uint32_t next_level = 0;
>> + uint32_t socket_offset = 0;
>> + uint32_t cluster_offset = 0;
>> + uint32_t core_offset = 0;
>> + int last_socket = -1;
>> + int last_cluster = -1;
>> + int last_core = -1;
>> + int top_node = 3;
>> + int top_cluster = 3;
>> + int top_core = 3;
>> + int bottom_node = 3;
>> + int bottom_cluster = 3;
>> + int bottom_core = 3;
>> + unsigned int num_cache;
>> + CPUCoreCaches caches[CPU_MAX_CACHES];
>> + bool cache_created = false;
>> + bool cache_at_topo_level;
>> +
>> + num_cache = virt_get_caches(vms, caches);
>> +
>> + if (mc->smp_props.has_caches &&
>> + partial_cache_description(ms, num_cache)) {
>> + error_setg(&error_fatal, "Missing cache description");
>> + return;
>> + }
>> /*
>> * See Linux Documentation/devicetree/bindings/arm/cpus.yaml
>> @@ -466,9 +658,14 @@ static void fdt_add_cpu_nodes(const
>> VirtMachineState *vms)
>> qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
>> for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
>> + socket_id = cpu / (ms->smp.clusters * ms->smp.cores *
>> ms->smp.threads);
>> + cluster_id = cpu / (ms->smp.cores * ms->smp.threads) %
>> ms->smp.clusters;
>> + core_id = cpu / ms->smp.threads % ms->smp.cores;
>> +
>> char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
>> ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
>> CPUState *cs = CPU(armcpu);
>> + const char *prefix = NULL;
>> qemu_fdt_add_subnode(ms->fdt, nodename);
>> qemu_fdt_setprop_string(ms->fdt, nodename, "device_type",
>> "cpu");
>> @@ -498,6 +695,136 @@ static void fdt_add_cpu_nodes(const
>> VirtMachineState *vms)
>> qemu_fdt_alloc_phandle(ms->fdt));
>> }
>> + if (!vmc->no_cpu_topology && num_cache) {
>> + for (uint8_t i = 0; i < num_cache; i++) {
>> + /* Only level 1 in the CPU entry. */
>> + if (caches[i].level > 1) {
>> + continue;
>> + }
>> +
>> + if (caches[i].type == INSTRUCTION_CACHE) {
>> + prefix = "i-cache";
>> + } else if (caches[i].type == DATA_CACHE) {
>> + prefix = "d-cache";
>> + } else if (caches[i].type == UNIFIED_CACHE) {
>> + error_setg(&error_fatal,
>> + "Unified type is not implemented at
>> level %d",
>> + caches[i].level);
>> + return;
>> + } else {
>> + error_setg(&error_fatal, "Undefined cache type");
>> + return;
>> + }
>> +
>> + set_cache_properties(ms->fdt, nodename, prefix,
>> caches[i]);
>> + }
>> + }
>> +
>> + if (socket_id != last_socket) {
>> + bottom_node = top_node;
>> + /* This assumes socket as the highest topological level. */
>> + socket_offset = 0;
>> + cluster_offset = 0;
>> + cache_at_topo_level =
>> + machine_find_lowest_level_cache_at_topo_level(ms,
>> + &bottom_node,
>> + CPU_TOPOLOGY_LEVEL_SOCKET);
>> + if (cache_at_topo_level) {
>> + if (bottom_node == 1 && !virt_is_acpi_enabled(vms))
>> + error_setg(
>> + &error_fatal,
>> + "Cannot share L1 at socket_id %d."
>> + "DT limitation on sharing at cache level = 1",
>> + socket_id);
>> +
>> + cache_created = add_cpu_cache_hierarchy(ms->fdt,
>> caches,
>> + num_cache,
>> + top_node,
>> + bottom_node,
>> cpu,
>> +
>> &socket_offset);
>> +
>> + if (!cache_created) {
>> + error_setg(&error_fatal,
>> + "Socket: No caches at levels %d-%d",
>> + top_node, bottom_node);
>> + return;
>> + }
>> +
>> + top_cluster = bottom_node - 1;
>> + }
>> +
>> + last_socket = socket_id;
>> + }
>> +
>> + if (cluster_id != last_cluster) {
>> + bottom_cluster = top_cluster;
>> + cluster_offset = socket_offset;
>> + core_offset = 0;
>> + cache_at_topo_level =
>> + machine_find_lowest_level_cache_at_topo_level(ms,
>> + &bottom_cluster,
>> + CPU_TOPOLOGY_LEVEL_CLUSTER);
>> + if (cache_at_topo_level) {
>> + cache_created = add_cpu_cache_hierarchy(ms->fdt,
>> caches,
>> + num_cache,
>> + top_cluster,
>> +
>> bottom_cluster, cpu,
>> +
>> &cluster_offset);
>> + if (bottom_cluster == 1 &&
>> !virt_is_acpi_enabled(vms)) {
>> + error_setg(&error_fatal,
>> + "Cannot share L1 at socket_id %d, cluster_id
>> %d. "
>> + "DT limitation on sharing at cache level = 1.",
>> + socket_id, cluster_id);
>> + }
>> +
>> + if (!cache_created) {
>> + error_setg(&error_fatal,
>> + "Cluster: No caches at levels %d-%d.",
>> + top_cluster, bottom_cluster);
>> + return;
>> + }
>> +
>> + top_core = bottom_cluster - 1;
>> + } else if (top_cluster == bottom_node - 1) {
>> + top_core = bottom_node - 1;
>> + }
>> +
>> + last_cluster = cluster_id;
>> + }
>> +
>> + if (core_id != last_core) {
>> + bottom_core = top_core;
>> + core_offset = cluster_offset;
>> + cache_at_topo_level =
>> + machine_find_lowest_level_cache_at_topo_level(ms,
>> + &bottom_core,
>> + CPU_TOPOLOGY_LEVEL_CORE);
>> + if (cache_at_topo_level) {
>> + if (bottom_core == 1 && top_core > 1) {
>> + bottom_core++;
>> + cache_created = add_cpu_cache_hierarchy(ms->fdt,
>> + caches,
>> + num_cache,
>> + top_core,
>> +
>> bottom_core, cpu,
>> +
>> &core_offset);
>> +
>> + if (!cache_created) {
>> + error_setg(&error_fatal,
>> + "Core: No caches at levels %d-%d",
>> + top_core, bottom_core);
>> + return;
>> + }
>> + }
>> + }
>> +
>> + last_core = core_id;
>> + }
>> +
>> + next_level = core_offset;
>> + qemu_fdt_setprop_cell(ms->fdt, nodename, "next-level-cache",
>> + next_level);
>> +
>> g_free(nodename);
>> }
>> @@ -2912,7 +3239,7 @@ static void virt_set_oem_table_id(Object
>> *obj, const char *value,
>> }
>> -bool virt_is_acpi_enabled(VirtMachineState *vms)
>> +bool virt_is_acpi_enabled(const VirtMachineState *vms)
>> {
>> if (vms->acpi == ON_OFF_AUTO_OFF) {
>> return false;
>> @@ -3496,6 +3823,12 @@ static void
>> virt_machine_class_init(ObjectClass *oc, const void *data)
>> hc->unplug = virt_machine_device_unplug_cb;
>> mc->nvdimm_supported = true;
>> mc->smp_props.clusters_supported = true;
>> +
>> + /* Supported caches */
>> + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1D] = true;
>> + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L1I] = true;
>> + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L2] = true;
>> + mc->smp_props.cache_supported[CACHE_LEVEL_AND_TYPE_L3] = true;
>> mc->auto_enable_numa_with_memhp = true;
>> mc->auto_enable_numa_with_memdev = true;
>> /* platform instead of architectural choice */
>> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
>> index dba8ac7f2f..b22868c210 100644
>> --- a/include/hw/arm/virt.h
>> +++ b/include/hw/arm/virt.h
>> @@ -41,6 +41,7 @@
>> #include "system/kvm.h"
>> #include "hw/intc/arm_gicv3_common.h"
>> #include "qom/object.h"
>> +#include "hw/core/cpu.h"
>> #define NUM_GICV2M_SPIS 64
>> #define NUM_VIRTIO_TRANSPORTS 32
>> @@ -52,6 +53,8 @@
>> /* GPIO pins */
>> #define GPIO_PIN_POWER_BUTTON 3
>> +#define CPU_MAX_CACHES 16
>> +
>> enum {
>> VIRT_FLASH,
>> VIRT_MEM,
>> @@ -195,7 +198,19 @@ struct VirtMachineState {
>> OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE)
>> void virt_acpi_setup(VirtMachineState *vms);
>> -bool virt_is_acpi_enabled(VirtMachineState *vms);
>> +bool virt_is_acpi_enabled(const VirtMachineState *vms);
>> +
>> +#define CLIDR_CTYPE_NO_CACHE 0x00
>> +#define CLIDR_CTYPE_I_CACHE 0x01
>> +#define CLIDR_CTYPE_D_CACHE 0x02
>> +#define CLIDR_CTYPE_SEPARATE_I_D_CACHES 0x03
>> +#define CLIDR_CTYPE_UNIFIED_CACHE 0x04
>> +#define CLIDR_CTYPE_MAX_CACHE_LEVEL 7
>> +
>> +unsigned int virt_get_caches(const VirtMachineState *vms,
>> + CPUCoreCaches *caches);
>> +void set_cpu_cache(CPUCoreCaches *cpu_cache, enum CacheType cache_type,
>> + int cache_level, bool is_i_cache);
>> /* Return number of redistributors that fit in the specified
>> region */
>> static uint32_t virt_redist_capacity(VirtMachineState *vms, int
>> region)
>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
>> index 7d2f4459d2..08f8df3586 100644
>> --- a/include/hw/core/cpu.h
>> +++ b/include/hw/core/cpu.h
>> @@ -1218,4 +1218,16 @@ enum CacheType {
>> UNIFIED_CACHE
>> };
>> +struct CPUCoreCaches {
>> + enum CacheType type;
>> + uint32_t sets;
>> + uint32_t size;
>> + uint32_t level;
>> + uint16_t linesize;
>> + uint8_t attributes; /* write policy: 0x0 write back, 0x1 write
>> through */
>> + uint8_t associativity;
>> +};
>> +
>> +typedef struct CPUCoreCaches CPUCoreCaches;
>> +
>> #endif
>
> Reviewed-by: Gustavo Romero <[email protected]>
>
>
> Cheers,
> Gustavo
>