Il 13/05/2013 05:21, Liu Ping Fan ha scritto: > From: Liu Ping Fan <pingf...@linux.vnet.ibm.com> > > Now, each AddressSpaceDispatch has its own radix-tree, and all of them > lie on phys_section[] and phys_map_nodes[]. When we want lockless > mmio dispatch, we need something like RCU. > > Acheive this with PhysPageTable which contains all of the info for all > radix trees. After all address space listeners update (ie. excluding the > readers) we switch from PhysPageTable *cur_pgtbl to *next_pgtbl. > (The real RCU style is adopt by listener, see next patch) > > Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> > --- > exec.c | 197 > +++++++++++++++++++++++++------------------------ > include/exec/memory.h | 2 + > memory.c | 2 + > 3 files changed, 106 insertions(+), 95 deletions(-) > > diff --git a/exec.c b/exec.c > index c5f8082..bb4e540 100644 > --- a/exec.c > +++ b/exec.c > @@ -80,23 +80,34 @@ int use_icount; > #if !defined(CONFIG_USER_ONLY) > > #define SUBSECTION_IDX(addr) ((addr) & ~TARGET_PAGE_MASK) > -#define PHYS_SECTION_ID(psection) ((psection) - phys_sections) > +#define PHYS_SECTION_ID(psection, base) ((psection) - base->phys_sections) > > typedef struct PhysSection { > MemoryRegionSection section; > uint16_t *sub_section; > } PhysSection; > > -static PhysSection *phys_sections; > -static unsigned phys_sections_nb, phys_sections_nb_alloc; > -static uint16_t phys_section_unassigned; > -static uint16_t phys_section_notdirty; > -static uint16_t phys_section_rom; > -static uint16_t phys_section_watch; > +typedef PhysPageEntry Node[L2_SIZE]; > > -/* Simple allocator for PhysPageEntry nodes */ > -static PhysPageEntry (*phys_map_nodes)[L2_SIZE]; > -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc; > +typedef struct PhysPageTable PhysPageTable; > + > +struct PhysPageTable { > + int ref; > + PhysSection *phys_sections; > + unsigned phys_sections_nb; > + unsigned phys_sections_nb_alloc; > + uint16_t phys_section_unassigned; > + uint16_t phys_section_notdirty; > + uint16_t phys_section_rom; > + uint16_t phys_section_watch;
These four should be constants. Please make them #defines and add assertions that they have the expected values (in a separate patch). > + > + Node *phys_map_nodes; > + unsigned phys_map_nodes_nb; > + unsigned phys_map_nodes_nb_alloc; > +}; > +static PhysPageTable *cur_pgtbl; > +static PhysPageTable *next_pgtbl; You shouldn't need cur_pgtbl. Instead, each AddressSpaceDispatch should have a pointer to its own cur_pgtbl. In the commit hook you can then take a lock, unref the old table, assign cur_pgtbl = next_pgtbl and ref the new one. The lock must also be taken in address_space_translate, together with a relatively expensive ref/unref pair (two atomic operations). We probably need real RCU so that we can skip the ref/unref. > > #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) > > @@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch; > > static void phys_map_node_reserve(unsigned nodes) > { > - if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) { > + if (next_pgtbl->phys_map_nodes_nb + nodes > > next_pgtbl->phys_map_nodes_nb_alloc) { > typedef PhysPageEntry Node[L2_SIZE]; > - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16); > - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc, > - phys_map_nodes_nb + nodes); > - phys_map_nodes = g_renew(Node, phys_map_nodes, > - phys_map_nodes_nb_alloc); > + next_pgtbl->phys_map_nodes_nb_alloc = > MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16); > + next_pgtbl->phys_map_nodes_nb_alloc = > MAX(next_pgtbl->phys_map_nodes_nb_alloc, > + next_pgtbl->phys_map_nodes_nb + nodes); > + next_pgtbl->phys_map_nodes = g_renew(Node, > next_pgtbl->phys_map_nodes, > + next_pgtbl->phys_map_nodes_nb_alloc); > } > } > > @@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void) > unsigned i; > uint16_t ret; > > - ret = phys_map_nodes_nb++; > + ret = next_pgtbl->phys_map_nodes_nb++; > assert(ret != PHYS_MAP_NODE_NIL); > - assert(ret != phys_map_nodes_nb_alloc); > + assert(ret != next_pgtbl->phys_map_nodes_nb_alloc); > for (i = 0; i < L2_SIZE; ++i) { > - phys_map_nodes[ret][i].is_leaf = 0; > - phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > + next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0; > + next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; > } > return ret; > } > > -static void phys_map_nodes_reset(void) > -{ > - phys_map_nodes_nb = 0; > -} > - > - > static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, > hwaddr *nb, uint16_t leaf, > int level) > @@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp, > hwaddr *index, > > if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { > lp->ptr = phys_map_node_alloc(); > - p = phys_map_nodes[lp->ptr]; > + p = next_pgtbl->phys_map_nodes[lp->ptr]; > if (level == 0) { > for (i = 0; i < L2_SIZE; i++) { > p[i].is_leaf = 1; > - p[i].ptr = phys_section_unassigned; > + p[i].ptr = next_pgtbl->phys_section_unassigned; > } > } > } else { > - p = phys_map_nodes[lp->ptr]; > + p = next_pgtbl->phys_map_nodes[lp->ptr]; > } > lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; > > @@ -192,11 +197,13 @@ static PhysSection > *phys_section_find(AddressSpaceDispatch *d, > { > PhysPageEntry lp = d->phys_map; > PhysPageEntry *p; > + PhysSection *phys_sections = cur_pgtbl->phys_sections; > + Node *phys_map_nodes = cur_pgtbl->phys_map_nodes; > int i; > > for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { > if (lp.ptr == PHYS_MAP_NODE_NIL) { > - return &phys_sections[phys_section_unassigned]; > + return &phys_sections[cur_pgtbl->phys_section_unassigned]; > } > p = phys_map_nodes[lp.ptr]; > lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; > @@ -209,6 +216,7 @@ static MemoryRegionSection > *address_space_lookup_region(AddressSpace *as, > { > PhysSection *psection; > uint16_t idx; > + PhysSection *phys_sections = cur_pgtbl->phys_sections; > > psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS); > if (psection->sub_section) { > @@ -246,7 +254,7 @@ MemoryRegionSection *address_space_translate(AddressSpace > *as, hwaddr addr, > | (addr & iotlb.addr_mask)); > len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); > if (!iotlb.perm[is_write]) { > - section = &phys_sections[phys_section_unassigned].section; > + section = > &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section; > break; > } > > @@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState > *env, > iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) > + xlat; > if (!section->readonly) { > - iotlb |= phys_section_notdirty; > + iotlb |= cur_pgtbl->phys_section_notdirty; > } else { > - iotlb |= phys_section_rom; > + iotlb |= cur_pgtbl->phys_section_rom; > } > } else { > - iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section)); > + iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section), > cur_pgtbl); > iotlb += xlat; > } > > @@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, > if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) { > /* Avoid trapping reads of pages with a write breakpoint. */ > if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { > - iotlb = phys_section_watch + paddr; > + iotlb = cur_pgtbl->phys_section_watch + paddr; > *address |= TLB_MMIO; > break; > } > @@ -721,59 +729,40 @@ static int subsection_register(PhysSection *psection, > uint32_t start, > uint32_t end, uint16_t section); > static void subsections_init(PhysSection *psection); > > -static void destroy_page_desc(uint16_t section_index) > +/* Call after all listener has been commit. > + * we do not walk over tree, just simply drop. > + */ > +static void destroy_pagetable(PhysPageTable *pgtbl) > { > - g_free(phys_sections[section_index].sub_section); > -} > - > -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level) > -{ > - unsigned i; > - PhysPageEntry *p; > + int i; > > - if (lp->ptr == PHYS_MAP_NODE_NIL) { > - return; > - } > + g_free(pgtbl->phys_map_nodes); > > - p = phys_map_nodes[lp->ptr]; > - for (i = 0; i < L2_SIZE; ++i) { > - if (!p[i].is_leaf) { > - destroy_l2_mapping(&p[i], level - 1); > + for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) { > + if (pgtbl->phys_sections[i].sub_section) { > + g_free(pgtbl->phys_sections[i].sub_section); > } else { > - destroy_page_desc(p[i].ptr); > + memory_region_unref(pgtbl->phys_sections[i].section.mr); > } > } > - lp->is_leaf = 0; > - lp->ptr = PHYS_MAP_NODE_NIL; > -} > + g_free(pgtbl->phys_sections); > > -static void destroy_all_mappings(AddressSpaceDispatch *d) > -{ > - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); > - phys_map_nodes_reset(); > + g_free(pgtbl); > } This should be a separate patch. > -static uint16_t phys_section_add(MemoryRegionSection *section) > +static uint16_t phys_section_add(MemoryRegionSection *section, PhysPageTable > *pgtbl) phys_section_add will always add to next_pgtbl. > { > - assert(phys_sections_nb < TARGET_PAGE_SIZE); > + assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE); > > - if (phys_sections_nb == phys_sections_nb_alloc) { > - phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16); > - phys_sections = g_renew(PhysSection, phys_sections, > - phys_sections_nb_alloc); > + if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) { > + pgtbl->phys_sections_nb_alloc = MAX(pgtbl->phys_sections_nb_alloc * > 2, 16); > + pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections, > + pgtbl->phys_sections_nb_alloc); > } > - phys_sections[phys_sections_nb].section = *section; > - phys_sections[phys_sections_nb].sub_section = NULL; > + pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section; > + pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL; > memory_region_ref(section->mr); > - return phys_sections_nb++; > -} > - > -static void phys_sections_clear(void) > -{ > - while (phys_sections_nb > 0) { > - PhysSection *phys_section = &phys_sections[--phys_sections_nb]; > - memory_region_unref(phys_section->section.mr); > - } > + return pgtbl->phys_sections_nb++; > } > > static void register_subsection(AddressSpaceDispatch *d, > @@ -793,18 +782,18 @@ static void register_subsection(AddressSpaceDispatch *d, > psection->section.mr == &io_mem_unassigned); > > if (!psection->sub_section) { > - new_section = phys_section_add(&subsection); > - psection = &phys_sections[new_section]; > + new_section = phys_section_add(&subsection, next_pgtbl); > + psection = &next_pgtbl->phys_sections[new_section]; > subsections_init(psection); > phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section); > } else { > - new_section = PHYS_SECTION_ID(psection); > + new_section = PHYS_SECTION_ID(psection, next_pgtbl); > } > > - new_subsection = phys_section_add(section); > + new_subsection = phys_section_add(section, next_pgtbl); > > /* phys_section_add invalidates psection, reload it */ > - psection = &phys_sections[new_section]; > + psection = &next_pgtbl->phys_sections[new_section]; > start = section->offset_within_address_space & ~TARGET_PAGE_MASK; > end = start + section->size - 1; > subsection_register(psection, start, end, new_subsection); > @@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch *d, > MemoryRegionSection *sec > hwaddr start_addr = section->offset_within_address_space; > ram_addr_t size = section->size; > hwaddr addr; > - uint16_t section_index = phys_section_add(section); > + uint16_t section_index = phys_section_add(section, next_pgtbl); > > assert(size); > > @@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection) > { > psection->sub_section = g_malloc0(sizeof(uint16_t) * TARGET_PAGE_SIZE); > subsection_register(psection, 0, TARGET_PAGE_SIZE-1, > - phys_section_unassigned); > + next_pgtbl->phys_section_unassigned); > } > > static uint16_t dummy_section(MemoryRegion *mr) > @@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr) > .size = UINT64_MAX, > }; > > - return phys_section_add(§ion); > + return phys_section_add(§ion, next_pgtbl); > } > > MemoryRegion *iotlb_to_region(hwaddr index) > { > - return phys_sections[index & ~TARGET_PAGE_MASK].section.mr; > + return cur_pgtbl->phys_sections[index & ~TARGET_PAGE_MASK].section.mr; > } > > static void io_mem_init(void) > @@ -1685,21 +1674,40 @@ static void io_mem_init(void) > "watch", UINT64_MAX); > } > > +void global_begin(void) > +{ > + next_pgtbl = g_new0(PhysPageTable, 1); > + next_pgtbl->ref = 1; > + next_pgtbl->phys_section_unassigned = dummy_section(&io_mem_unassigned); > + next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty); > + next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom); > + next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch); > +} > + > +/* other listeners finished */ > +void global_commit(void) > +{ > + PhysPageTable *t = cur_pgtbl; > + > + cur_pgtbl = next_pgtbl; > + /* Fix me, currently, we rely on each address space listener agaist its > + * reader. So when we come here, no readers will touch old > phys_map_node. > + * After rcu, should changed to call_rcu() > + */ > + if (__sync_sub_and_fetch(&t->ref, 1) == 0) { > + destroy_pagetable(t); > + } global_commit should not be needed, and global_begin should be simply the new core_begin. It should unref next_pgtbl and reallocate a new one. > +} > + > static void mem_begin(MemoryListener *listener) > { > AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, > listener); > > - destroy_all_mappings(d); > d->phys_map.ptr = PHYS_MAP_NODE_NIL; > } > > static void core_begin(MemoryListener *listener) > { > - phys_sections_clear(); > - phys_section_unassigned = dummy_section(&io_mem_unassigned); > - phys_section_notdirty = dummy_section(&io_mem_notdirty); > - phys_section_rom = dummy_section(&io_mem_rom); > - phys_section_watch = dummy_section(&io_mem_watch); > } > > static void tcg_commit(MemoryListener *listener) > @@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace *as) > AddressSpaceDispatch *d = as->dispatch; > > memory_listener_unregister(&d->listener); > - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); > g_free(d); > as->dispatch = NULL; > } > @@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val) > > if (!memory_region_is_ram(section->mr) || section->readonly) { > if (memory_region_is_ram(section->mr)) { > - section = &phys_sections[phys_section_rom].section; > + section = > &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; > } > io_mem_write(section->mr, addr, val, 4); > } else { > @@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val) > > if (!memory_region_is_ram(section->mr) || section->readonly) { > if (memory_region_is_ram(section->mr)) { > - section = &phys_sections[phys_section_rom].section; > + section = > &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; > } > #ifdef TARGET_WORDS_BIGENDIAN > io_mem_write(section->mr, addr, val >> 32, 4); > @@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr, > uint32_t val, > > if (!memory_region_is_ram(section->mr) || section->readonly) { > if (memory_region_is_ram(section->mr)) { > - section = &phys_sections[phys_section_rom].section; > + section = > &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; > } > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > @@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr, > uint32_t val, > > if (!memory_region_is_ram(section->mr) || section->readonly) { > if (memory_region_is_ram(section->mr)) { > - section = &phys_sections[phys_section_rom].section; > + section = > &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; > } > #if defined(TARGET_WORDS_BIGENDIAN) > if (endian == DEVICE_LITTLE_ENDIAN) { > diff --git a/include/exec/memory.h b/include/exec/memory.h > index b97ace7..cc654fa 100644 > --- a/include/exec/memory.h > +++ b/include/exec/memory.h > @@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr, > void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, > int is_write, hwaddr access_len); > > +void global_begin(void); > +void global_commit(void); > > #endif > > diff --git a/memory.c b/memory.c > index 1a86607..da06dfd 100644 > --- a/memory.c > +++ b/memory.c > @@ -805,6 +805,7 @@ void memory_region_transaction_commit(void) > --memory_region_transaction_depth; > if (!memory_region_transaction_depth && memory_region_update_pending) { > memory_region_update_pending = false; > + global_begin(); > MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); > > QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { > @@ -812,6 +813,7 @@ void memory_region_transaction_commit(void) > } > > MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); > + global_commit(); > } > } > >