Il 26/05/2013 15:02, liu ping fan ha scritto: > [...] >>>>> +static PhysPageTable *cur_pgtbl; >>>>> +static PhysPageTable *next_pgtbl; >>>> >>>> You shouldn't need cur_pgtbl. Instead, each AddressSpaceDispatch should >>>> have a pointer to its own cur_pgtbl. In the commit hook you can then >>>> take a lock, unref the old table, assign cur_pgtbl = next_pgtbl and ref >>>> the new one. >>>> >>> Here is a trick. All AddressSpaceDispatch will update its radix tree >>> at the same time, so the re-claimer will come after all of them commit >>> to drop the old phys_map_node[] which holds the radix trees all at >>> once. Is it OK? Or you still prefer each AddressSpaceDispatch has its >>> own space for radix tree? >> >> I'm not sure... I prefer to have more stuff in AddressSpaceDispatch to >> make things as streamlined as possible on the read side. >> > When I try to make each AddressSpaceDispatch own a separate radix > tree,
Just to check we are on the same page: the radix tree will usually be the same for all AddressSpaceDispatch instances, except if the commit hook is running concurrently. Is this right? I found in subpage_read/write(), we need to retrieve > "phys_sections" for this subpage, ie, > section = &phys_sections[mmio->sub_section[idx]]; will be changed to > section = &subpage->as_dispatch->phys_sections[mmio->sub_section[idx]]; > It will sacrifice performance. Is it acceptable? It is acceptable, but it is also not necessary anymore. :) subpage_read is now doing simply: static uint64_t subpage_read(void *opaque, hwaddr addr, unsigned len) { subpage_t *subpage = opaque; uint8_t buf[4]; #if defined(DEBUG_SUBPAGE) printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__, subpage, len, addr); #endif address_space_read(subpage->as, addr + subpage->base, buf, len); switch (len) { case 1: return ldub_p(buf); case 2: return lduw_p(buf); case 4: return ldl_p(buf); default: abort(); } } You can base your work on the latest version of the code, which is in my github rcu branch. It is not very tested, but it should work. Paolo > Regards, > Pingfan > >>>> The lock must also be taken in address_space_translate, together with a >>>> relatively expensive ref/unref pair (two atomic operations). We >>>> probably need real RCU so that we can skip the ref/unref. >>>> >>> Currently, without RCU, do we rely on as->lock? So no need ref/unref >>> on cur_pgtbl? With RCU, I think we need ref/unref on mr, since >>> mr->ops->read/write can block, and should be excluded from RCU >>> section. >> >> Yes, we need ref/unref on the region, but in many cases that will be a >> no-op (most important, for access to system RAM). And if we can do the >> dispatch in the RCU section, similar to what I proposed in reviewing >> your patch 2, we have no ref/unref in the common case of accessing RAM. >> >>>>> #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) >>>>> >>>>> @@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch; >>>>> >>>>> static void phys_map_node_reserve(unsigned nodes) >>>>> { >>>>> - if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) { >>>>> + if (next_pgtbl->phys_map_nodes_nb + nodes > >>>>> next_pgtbl->phys_map_nodes_nb_alloc) { >>>>> typedef PhysPageEntry Node[L2_SIZE]; >>>>> - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16); >>>>> - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc, >>>>> - phys_map_nodes_nb + nodes); >>>>> - phys_map_nodes = g_renew(Node, phys_map_nodes, >>>>> - phys_map_nodes_nb_alloc); >>>>> + next_pgtbl->phys_map_nodes_nb_alloc = >>>>> MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16); >>>>> + next_pgtbl->phys_map_nodes_nb_alloc = >>>>> MAX(next_pgtbl->phys_map_nodes_nb_alloc, >>>>> + next_pgtbl->phys_map_nodes_nb + >>>>> nodes); >>>>> + next_pgtbl->phys_map_nodes = g_renew(Node, >>>>> next_pgtbl->phys_map_nodes, >>>>> + next_pgtbl->phys_map_nodes_nb_alloc); >>>>> } >>>>> } >>>>> >>>>> @@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void) >>>>> unsigned i; >>>>> uint16_t ret; >>>>> >>>>> - ret = phys_map_nodes_nb++; >>>>> + ret = next_pgtbl->phys_map_nodes_nb++; >>>>> assert(ret != PHYS_MAP_NODE_NIL); >>>>> - assert(ret != phys_map_nodes_nb_alloc); >>>>> + assert(ret != next_pgtbl->phys_map_nodes_nb_alloc); >>>>> for (i = 0; i < L2_SIZE; ++i) { >>>>> - phys_map_nodes[ret][i].is_leaf = 0; >>>>> - phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; >>>>> + next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0; >>>>> + next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; >>>>> } >>>>> return ret; >>>>> } >>>>> >>>>> -static void phys_map_nodes_reset(void) >>>>> -{ >>>>> - phys_map_nodes_nb = 0; >>>>> -} >>>>> - >>>>> - >>>>> static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index, >>>>> hwaddr *nb, uint16_t leaf, >>>>> int level) >>>>> @@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp, >>>>> hwaddr *index, >>>>> >>>>> if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) { >>>>> lp->ptr = phys_map_node_alloc(); >>>>> - p = phys_map_nodes[lp->ptr]; >>>>> + p = next_pgtbl->phys_map_nodes[lp->ptr]; >>>>> if (level == 0) { >>>>> for (i = 0; i < L2_SIZE; i++) { >>>>> p[i].is_leaf = 1; >>>>> - p[i].ptr = phys_section_unassigned; >>>>> + p[i].ptr = next_pgtbl->phys_section_unassigned; >>>>> } >>>>> } >>>>> } else { >>>>> - p = phys_map_nodes[lp->ptr]; >>>>> + p = next_pgtbl->phys_map_nodes[lp->ptr]; >>>>> } >>>>> lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)]; >>>>> >>>>> @@ -192,11 +197,13 @@ static PhysSection >>>>> *phys_section_find(AddressSpaceDispatch *d, >>>>> { >>>>> PhysPageEntry lp = d->phys_map; >>>>> PhysPageEntry *p; >>>>> + PhysSection *phys_sections = cur_pgtbl->phys_sections; >>>>> + Node *phys_map_nodes = cur_pgtbl->phys_map_nodes; >>>>> int i; >>>>> >>>>> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { >>>>> if (lp.ptr == PHYS_MAP_NODE_NIL) { >>>>> - return &phys_sections[phys_section_unassigned]; >>>>> + return &phys_sections[cur_pgtbl->phys_section_unassigned]; >>>>> } >>>>> p = phys_map_nodes[lp.ptr]; >>>>> lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; >>>>> @@ -209,6 +216,7 @@ static MemoryRegionSection >>>>> *address_space_lookup_region(AddressSpace *as, >>>>> { >>>>> PhysSection *psection; >>>>> uint16_t idx; >>>>> + PhysSection *phys_sections = cur_pgtbl->phys_sections; >>>>> >>>>> psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS); >>>>> if (psection->sub_section) { >>>>> @@ -246,7 +254,7 @@ MemoryRegionSection >>>>> *address_space_translate(AddressSpace *as, hwaddr addr, >>>>> | (addr & iotlb.addr_mask)); >>>>> len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); >>>>> if (!iotlb.perm[is_write]) { >>>>> - section = &phys_sections[phys_section_unassigned].section; >>>>> + section = >>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section; >>>>> break; >>>>> } >>>>> >>>>> @@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState >>>>> *env, >>>>> iotlb = (memory_region_get_ram_addr(section->mr) & >>>>> TARGET_PAGE_MASK) >>>>> + xlat; >>>>> if (!section->readonly) { >>>>> - iotlb |= phys_section_notdirty; >>>>> + iotlb |= cur_pgtbl->phys_section_notdirty; >>>>> } else { >>>>> - iotlb |= phys_section_rom; >>>>> + iotlb |= cur_pgtbl->phys_section_rom; >>>>> } >>>>> } else { >>>>> - iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, >>>>> section)); >>>>> + iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, >>>>> section), cur_pgtbl); >>>>> iotlb += xlat; >>>>> } >>>>> >>>>> @@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState >>>>> *env, >>>>> if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) { >>>>> /* Avoid trapping reads of pages with a write breakpoint. */ >>>>> if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) { >>>>> - iotlb = phys_section_watch + paddr; >>>>> + iotlb = cur_pgtbl->phys_section_watch + paddr; >>>>> *address |= TLB_MMIO; >>>>> break; >>>>> } >>>>> @@ -721,59 +729,40 @@ static int subsection_register(PhysSection >>>>> *psection, uint32_t start, >>>>> uint32_t end, uint16_t section); >>>>> static void subsections_init(PhysSection *psection); >>>>> >>>>> -static void destroy_page_desc(uint16_t section_index) >>>>> +/* Call after all listener has been commit. >>>>> + * we do not walk over tree, just simply drop. >>>>> + */ >>>>> +static void destroy_pagetable(PhysPageTable *pgtbl) >>>>> { >>>>> - g_free(phys_sections[section_index].sub_section); >>>>> -} >>>>> - >>>>> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level) >>>>> -{ >>>>> - unsigned i; >>>>> - PhysPageEntry *p; >>>>> + int i; >>>>> >>>>> - if (lp->ptr == PHYS_MAP_NODE_NIL) { >>>>> - return; >>>>> - } >>>>> + g_free(pgtbl->phys_map_nodes); >>>>> >>>>> - p = phys_map_nodes[lp->ptr]; >>>>> - for (i = 0; i < L2_SIZE; ++i) { >>>>> - if (!p[i].is_leaf) { >>>>> - destroy_l2_mapping(&p[i], level - 1); >>>>> + for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) { >>>>> + if (pgtbl->phys_sections[i].sub_section) { >>>>> + g_free(pgtbl->phys_sections[i].sub_section); >>>>> } else { >>>>> - destroy_page_desc(p[i].ptr); >>>>> + memory_region_unref(pgtbl->phys_sections[i].section.mr); >>>>> } >>>>> } >>>>> - lp->is_leaf = 0; >>>>> - lp->ptr = PHYS_MAP_NODE_NIL; >>>>> -} >>>>> + g_free(pgtbl->phys_sections); >>>>> >>>>> -static void destroy_all_mappings(AddressSpaceDispatch *d) >>>>> -{ >>>>> - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); >>>>> - phys_map_nodes_reset(); >>>>> + g_free(pgtbl); >>>>> } >>>> >>>> This should be a separate patch. >>>> >>> Sorry, this is part of reclaimer to play destruction of prev pgtbl. >>> Why to separate it? >> >> You are changing the algorithm by which the old pagetable is destroyed. >> This applies just as well to the current code. >> >> BTW, your patch includes Jan's subpage patch which was broken. I'll >> drop it from the branch next time I push. >> >> Paolo >> >>>>> -static uint16_t phys_section_add(MemoryRegionSection *section) >>>>> +static uint16_t phys_section_add(MemoryRegionSection *section, >>>>> PhysPageTable *pgtbl) >>>> >>>> phys_section_add will always add to next_pgtbl. >>>> >>> Ok, will drop @pgtbl. >>>>> { >>>>> - assert(phys_sections_nb < TARGET_PAGE_SIZE); >>>>> + assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE); >>>>> >>>>> - if (phys_sections_nb == phys_sections_nb_alloc) { >>>>> - phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16); >>>>> - phys_sections = g_renew(PhysSection, phys_sections, >>>>> - phys_sections_nb_alloc); >>>>> + if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) { >>>>> + pgtbl->phys_sections_nb_alloc = >>>>> MAX(pgtbl->phys_sections_nb_alloc * 2, 16); >>>>> + pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections, >>>>> + pgtbl->phys_sections_nb_alloc); >>>>> } >>>>> - phys_sections[phys_sections_nb].section = *section; >>>>> - phys_sections[phys_sections_nb].sub_section = NULL; >>>>> + pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section; >>>>> + pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL; >>>>> memory_region_ref(section->mr); >>>>> - return phys_sections_nb++; >>>>> -} >>>>> - >>>>> -static void phys_sections_clear(void) >>>>> -{ >>>>> - while (phys_sections_nb > 0) { >>>>> - PhysSection *phys_section = &phys_sections[--phys_sections_nb]; >>>>> - memory_region_unref(phys_section->section.mr); >>>>> - } >>>>> + return pgtbl->phys_sections_nb++; >>>>> } >>>>> >>>>> static void register_subsection(AddressSpaceDispatch *d, >>>>> @@ -793,18 +782,18 @@ static void >>>>> register_subsection(AddressSpaceDispatch *d, >>>>> psection->section.mr == &io_mem_unassigned); >>>>> >>>>> if (!psection->sub_section) { >>>>> - new_section = phys_section_add(&subsection); >>>>> - psection = &phys_sections[new_section]; >>>>> + new_section = phys_section_add(&subsection, next_pgtbl); >>>>> + psection = &next_pgtbl->phys_sections[new_section]; >>>>> subsections_init(psection); >>>>> phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section); >>>>> } else { >>>>> - new_section = PHYS_SECTION_ID(psection); >>>>> + new_section = PHYS_SECTION_ID(psection, next_pgtbl); >>>>> } >>>>> >>>>> - new_subsection = phys_section_add(section); >>>>> + new_subsection = phys_section_add(section, next_pgtbl); >>>>> >>>>> /* phys_section_add invalidates psection, reload it */ >>>>> - psection = &phys_sections[new_section]; >>>>> + psection = &next_pgtbl->phys_sections[new_section]; >>>>> start = section->offset_within_address_space & ~TARGET_PAGE_MASK; >>>>> end = start + section->size - 1; >>>>> subsection_register(psection, start, end, new_subsection); >>>>> @@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch >>>>> *d, MemoryRegionSection *sec >>>>> hwaddr start_addr = section->offset_within_address_space; >>>>> ram_addr_t size = section->size; >>>>> hwaddr addr; >>>>> - uint16_t section_index = phys_section_add(section); >>>>> + uint16_t section_index = phys_section_add(section, next_pgtbl); >>>>> >>>>> assert(size); >>>>> >>>>> @@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection) >>>>> { >>>>> psection->sub_section = g_malloc0(sizeof(uint16_t) * >>>>> TARGET_PAGE_SIZE); >>>>> subsection_register(psection, 0, TARGET_PAGE_SIZE-1, >>>>> - phys_section_unassigned); >>>>> + next_pgtbl->phys_section_unassigned); >>>>> } >>>>> >>>>> static uint16_t dummy_section(MemoryRegion *mr) >>>>> @@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr) >>>>> .size = UINT64_MAX, >>>>> }; >>>>> >>>>> - return phys_section_add(§ion); >>>>> + return phys_section_add(§ion, next_pgtbl); >>>>> } >>>>> >>>>> MemoryRegion *iotlb_to_region(hwaddr index) >>>>> { >>>>> - return phys_sections[index & ~TARGET_PAGE_MASK].section.mr; >>>>> + return cur_pgtbl->phys_sections[index & >>>>> ~TARGET_PAGE_MASK].section.mr; >>>>> } >>>>> >>>>> static void io_mem_init(void) >>>>> @@ -1685,21 +1674,40 @@ static void io_mem_init(void) >>>>> "watch", UINT64_MAX); >>>>> } >>>>> >>>>> +void global_begin(void) >>>>> +{ >>>>> + next_pgtbl = g_new0(PhysPageTable, 1); >>>>> + next_pgtbl->ref = 1; >>>>> + next_pgtbl->phys_section_unassigned = >>>>> dummy_section(&io_mem_unassigned); >>>>> + next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty); >>>>> + next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom); >>>>> + next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch); >>>>> +} >>>>> + >>>>> +/* other listeners finished */ >>>>> +void global_commit(void) >>>>> +{ >>>>> + PhysPageTable *t = cur_pgtbl; >>>>> + >>>>> + cur_pgtbl = next_pgtbl; >>>>> + /* Fix me, currently, we rely on each address space listener agaist >>>>> its >>>>> + * reader. So when we come here, no readers will touch old >>>>> phys_map_node. >>>>> + * After rcu, should changed to call_rcu() >>>>> + */ >>>>> + if (__sync_sub_and_fetch(&t->ref, 1) == 0) { >>>>> + destroy_pagetable(t); >>>>> + } >>>> >>>> global_commit should not be needed, and global_begin should be simply >>>> the new core_begin. It should unref next_pgtbl and reallocate a new one. >>>> >>> Good idea, thanks. >>> >>> Regards, >>> Pingfan >>>> >>>>> +} >>>>> + >>>>> static void mem_begin(MemoryListener *listener) >>>>> { >>>>> AddressSpaceDispatch *d = container_of(listener, >>>>> AddressSpaceDispatch, listener); >>>>> >>>>> - destroy_all_mappings(d); >>>>> d->phys_map.ptr = PHYS_MAP_NODE_NIL; >>>>> } >>>>> >>>>> static void core_begin(MemoryListener *listener) >>>>> { >>>>> - phys_sections_clear(); >>>>> - phys_section_unassigned = dummy_section(&io_mem_unassigned); >>>>> - phys_section_notdirty = dummy_section(&io_mem_notdirty); >>>>> - phys_section_rom = dummy_section(&io_mem_rom); >>>>> - phys_section_watch = dummy_section(&io_mem_watch); >>>>> } >>>>> >>>>> static void tcg_commit(MemoryListener *listener) >>>>> @@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace >>>>> *as) >>>>> AddressSpaceDispatch *d = as->dispatch; >>>>> >>>>> memory_listener_unregister(&d->listener); >>>>> - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1); >>>>> g_free(d); >>>>> as->dispatch = NULL; >>>>> } >>>>> @@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val) >>>>> >>>>> if (!memory_region_is_ram(section->mr) || section->readonly) { >>>>> if (memory_region_is_ram(section->mr)) { >>>>> - section = &phys_sections[phys_section_rom].section; >>>>> + section = >>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; >>>>> } >>>>> io_mem_write(section->mr, addr, val, 4); >>>>> } else { >>>>> @@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val) >>>>> >>>>> if (!memory_region_is_ram(section->mr) || section->readonly) { >>>>> if (memory_region_is_ram(section->mr)) { >>>>> - section = &phys_sections[phys_section_rom].section; >>>>> + section = >>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; >>>>> } >>>>> #ifdef TARGET_WORDS_BIGENDIAN >>>>> io_mem_write(section->mr, addr, val >> 32, 4); >>>>> @@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr, >>>>> uint32_t val, >>>>> >>>>> if (!memory_region_is_ram(section->mr) || section->readonly) { >>>>> if (memory_region_is_ram(section->mr)) { >>>>> - section = &phys_sections[phys_section_rom].section; >>>>> + section = >>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; >>>>> } >>>>> #if defined(TARGET_WORDS_BIGENDIAN) >>>>> if (endian == DEVICE_LITTLE_ENDIAN) { >>>>> @@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr, >>>>> uint32_t val, >>>>> >>>>> if (!memory_region_is_ram(section->mr) || section->readonly) { >>>>> if (memory_region_is_ram(section->mr)) { >>>>> - section = &phys_sections[phys_section_rom].section; >>>>> + section = >>>>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section; >>>>> } >>>>> #if defined(TARGET_WORDS_BIGENDIAN) >>>>> if (endian == DEVICE_LITTLE_ENDIAN) { >>>>> diff --git a/include/exec/memory.h b/include/exec/memory.h >>>>> index b97ace7..cc654fa 100644 >>>>> --- a/include/exec/memory.h >>>>> +++ b/include/exec/memory.h >>>>> @@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr, >>>>> void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, >>>>> int is_write, hwaddr access_len); >>>>> >>>>> +void global_begin(void); >>>>> +void global_commit(void); >>>>> >>>>> #endif >>>>> >>>>> diff --git a/memory.c b/memory.c >>>>> index 1a86607..da06dfd 100644 >>>>> --- a/memory.c >>>>> +++ b/memory.c >>>>> @@ -805,6 +805,7 @@ void memory_region_transaction_commit(void) >>>>> --memory_region_transaction_depth; >>>>> if (!memory_region_transaction_depth && >>>>> memory_region_update_pending) { >>>>> memory_region_update_pending = false; >>>>> + global_begin(); >>>>> MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); >>>>> >>>>> QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { >>>>> @@ -812,6 +813,7 @@ void memory_region_transaction_commit(void) >>>>> } >>>>> >>>>> MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); >>>>> + global_commit(); >>>>> } >>>>> } >>>>> >>>>> >>>> >>> >>> >>