* Singh, Brijesh (brijesh.si...@amd.com) wrote: > > > On 7/11/19 2:05 PM, Dr. David Alan Gilbert wrote: > > * Singh, Brijesh (brijesh.si...@amd.com) wrote: > >> The SEV VMs have concept of private and shared memory. The private memory > >> is encrypted with guest-specific key, while shared memory may be encrypted > >> with hyperivosr key. The KVM_GET_PAGE_ENC_BITMAP can be used to get a > >> bitmap indicating whether the guest page is private or shared. A private > >> page must be transmitted using the SEV migration commands. > >> > >> Add a cpu_physical_memory_sync_encrypted_bitmap() which can be used to sync > >> the page encryption bitmap for a given memory region. > >> > >> Signed-off-by: Brijesh Singh <brijesh.si...@amd.com> > >> --- > >> accel/kvm/kvm-all.c | 38 ++++++++++ > >> include/exec/ram_addr.h | 161 ++++++++++++++++++++++++++++++++++++++-- > >> include/exec/ramlist.h | 3 +- > >> migration/ram.c | 28 ++++++- > >> 4 files changed, 222 insertions(+), 8 deletions(-) > >> > >> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > >> index 162a2d5085..c935e9366c 100644 > >> --- a/accel/kvm/kvm-all.c > >> +++ b/accel/kvm/kvm-all.c > >> @@ -504,6 +504,37 @@ static int > >> kvm_get_dirty_pages_log_range(MemoryRegionSection *section, > >> > >> #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) > >> > >> +/* sync page_enc bitmap */ > >> +static int kvm_sync_page_enc_bitmap(KVMMemoryListener *kml, > >> + MemoryRegionSection *section, > >> + KVMSlot *mem) > > > > How AMD/SEV specific is this? i.e. should this be in a target/ specific > > place? > > > > > For now this is implemented in AMD/SEV specific kernel module. > But the interface exposed to userspace is a generic and can be > used by other vendors memory encryption feature. Because of this > I have added the syncing logic in generic kvm code.
Ok, I'm not sure if anyone else will have quite the same bitmap semantics; but we'll see. <snip> > >> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > >> index f96777bb99..6fc6864194 100644 > >> --- a/include/exec/ram_addr.h > >> +++ b/include/exec/ram_addr.h > >> @@ -51,6 +51,8 @@ struct RAMBlock { > >> unsigned long *unsentmap; > >> /* bitmap of already received pages in postcopy */ > >> unsigned long *receivedmap; > >> + /* bitmap of page encryption state for an encrypted guest */ > >> + unsigned long *encbmap; > >> }; > >> > >> static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset) > >> @@ -314,9 +316,41 @@ static inline void > >> cpu_physical_memory_set_dirty_range(ram_addr_t start, > >> } > >> > >> #if !defined(_WIN32) > >> -static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long > >> *bitmap, > >> + > >> +static inline void cpu_physical_memory_set_encrypted_range(ram_addr_t > >> start, > >> + ram_addr_t > >> length, > >> + unsigned long > >> val) > >> +{ > >> + unsigned long end, page; > >> + unsigned long * const *src; > >> + > >> + if (length == 0) { > >> + return; > >> + } > >> + > >> + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; > >> + page = start >> TARGET_PAGE_BITS; > >> + > >> + rcu_read_lock(); > >> + > >> + src = > >> atomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_ENCRYPTED])->blocks; > >> + > >> + while (page < end) { > >> + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; > >> + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; > >> + unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - > >> offset); > >> + > >> + atomic_xchg(&src[idx][BIT_WORD(offset)], val); > >> + page += num; > >> + } > >> + > >> + rcu_read_unlock(); > >> +} > >> + > >> +static inline void cpu_physical_memory_set_dirty_enc_lebitmap(unsigned > >> long *bitmap, > >> ram_addr_t > >> start, > >> - ram_addr_t > >> pages) > >> + ram_addr_t > >> pages, > >> + bool enc_map) > >> { > >> unsigned long i, j; > >> unsigned long page_number, c; > >> @@ -349,10 +383,14 @@ static inline void > >> cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, > >> if (bitmap[k]) { > >> unsigned long temp = leul_to_cpu(bitmap[k]); > >> > >> - atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], > >> temp); > >> - atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); > >> - if (tcg_enabled()) { > >> - atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], > >> temp); > >> + if (enc_map) { > >> + > >> atomic_xchg(&blocks[DIRTY_MEMORY_ENCRYPTED][idx][offset], temp); > > > > It makes me nervous that this is almost but not exactly like the other > > bitmaps; I *think* you're saying the bits here are purely a matter of > > state about whether the page is encrypted and not a matter of actually > > dirtyness; in particular a page that is encrypted and then becomes dirty > > doesn't reset or clear this flag. > > > Yes, the bits here are state of the page and they doesn't get reset or > cleared with this flag. I agree its not exactly same, initially I did > went down to the path of querying the bitmap outside the dirty tracking > infrastructure and it proved to be lot of work. This is mainly because > migration code works with RAM offset but the kernel tracks the gfn. So, > we do need to map from Memslot to offset. Dirty bitmap tracking > infrastructure has those mapping logic in-place so I ended up simply > reusing it. Hmm OK; it could be too confusing - just make sure you add a comment; e.g. 'Note: not actually dirty flags, see ...' where appropriate. You may end up renaming/cloning a few functions for clarity. > > > > >> + } else { > >> + > >> atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset], temp); > >> + atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], > >> temp); > >> + if (tcg_enabled()) { > >> + > >> atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], temp); > >> + } > >> } > >> } > >> > >> @@ -372,6 +410,17 @@ static inline void > >> cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, > >> * especially when most of the memory is not dirty. > >> */ > >> for (i = 0; i < len; i++) { > >> + > >> + /* If its encrypted bitmap update, then we need to copy the > >> bitmap > >> + * value as-is to the destination. > >> + */ > >> + if (enc_map) { > >> + cpu_physical_memory_set_encrypted_range(start + i * > >> TARGET_PAGE_SIZE, > >> + TARGET_PAGE_SIZE > >> * hpratio, > >> + > >> leul_to_cpu(bitmap[i])); > >> + continue; > >> + } > >> + > >> if (bitmap[i] != 0) { > >> c = leul_to_cpu(bitmap[i]); > >> do { > >> @@ -387,6 +436,21 @@ static inline void > >> cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, > >> } > >> } > >> } > >> + > >> +static inline void cpu_physical_memory_set_encrypted_lebitmap(unsigned > >> long *bitmap, > >> + ram_addr_t > >> start, > >> + ram_addr_t > >> pages) > >> +{ > >> + return cpu_physical_memory_set_dirty_enc_lebitmap(bitmap, start, > >> pages, true); > >> +} > >> + > >> +static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long > >> *bitmap, > >> + ram_addr_t > >> start, > >> + ram_addr_t > >> pages) > >> +{ > >> + return cpu_physical_memory_set_dirty_enc_lebitmap(bitmap, start, > >> pages, false); > >> +} > >> + > >> #endif /* not _WIN32 */ > >> > >> bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, > >> @@ -406,6 +470,7 @@ static inline void > >> cpu_physical_memory_clear_dirty_range(ram_addr_t start, > >> cpu_physical_memory_test_and_clear_dirty(start, length, > >> DIRTY_MEMORY_MIGRATION); > >> cpu_physical_memory_test_and_clear_dirty(start, length, > >> DIRTY_MEMORY_VGA); > >> cpu_physical_memory_test_and_clear_dirty(start, length, > >> DIRTY_MEMORY_CODE); > >> + cpu_physical_memory_test_and_clear_dirty(start, length, > >> DIRTY_MEMORY_ENCRYPTED); > > > > What are the ordering/consistency rules associated with this data. > > Specifically: > > > > Consider a page that transitions from being shared to encrypted > > (does that happen?) - but we've just done the sync's so we know the page > > is dirty, but we don't know it's encrypted; so we transmit the page as > > unencrypted; what happens? > > > > When a page is transitioned from private to shared, one (or two) of > the following action will be taken by the guest OS > > a) update the pgtable memory > > and > > b) update the contents of the page > > #a is must, #b is optional. The #a will dirty the pgtable memory, so > its safe to assume that pgtable will be sync'ed with correct attribute. > Similarly if #b is performed then page address will be dirty and it > will be re-transmitted with updated data. But #b is not performed by > the guest then its okay to send the page through encryption path > because the content of that page is encrypted. What's the relationship between updating the pgtable memory and this bitmap you're syncing? > > > > I *think* that means we should always sync the encryped bitmap before > > the dirty bitmap, so that if it flips we're guaranteed the dirty bitmap > > gets flipped again after the flip has happened; but my brain is starting > > to hurt.... > > > > But, even if we're guaranteed to have a dirty for the next time > > around, I think we're always at risk of transmitting a page that > > has just flipped; so we'll be sure to transmit it again correctly, > > but we might transmit an encrypted page to a non-encrypted dest or > > the reverse - is that OK? > > > > > > I don't think order matters much. If page was marked as shared in > pagetable but nobody has touched the contents of it then that page > will still contain encrypted data so its I think its OK to send as > encrypted. So are we really saying that the transfer of the contents of guest RAM doesn't matter if it's encrypted or not - you could transfer all pages as if they were encrypted even if they're actually shared - as long as the bitmap is right at the end? Dave > > >> } > >> > >> > >> @@ -474,5 +539,89 @@ uint64_t > >> cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, > >> > >> return num_dirty; > >> } > >> + > >> +static inline bool cpu_physical_memory_test_encrypted(ram_addr_t start, > >> + ram_addr_t length) > >> +{ > >> + unsigned long end, page; > >> + bool enc = false; > >> + unsigned long * const *src; > >> + > >> + if (length == 0) { > >> + return enc; > >> + } > >> + > >> + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; > >> + page = start >> TARGET_PAGE_BITS; > >> + > >> + rcu_read_lock(); > >> + > >> + src = > >> atomic_rcu_read(&ram_list.dirty_memory[DIRTY_MEMORY_ENCRYPTED])->blocks; > >> + > >> + while (page < end) { > >> + unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; > >> + unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE; > >> + unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - > >> offset); > >> + > >> + enc |= atomic_read(&src[idx][BIT_WORD(offset)]); > > > > I'm confused; I thought what I was about to get there was a long* and > > you were going to mask out a bit or set of bits. > > > > Ah good catch, thanks. Its bug, currently if one of the bit is set in > long* then I am saying its encryption which is wrong. I should be > masking out a bit and checking the specific position. > > > > >> + page += num; > >> + } > >> + > >> + rcu_read_unlock(); > >> + > >> + return enc; > >> +} > >> + > >> +static inline > >> +void cpu_physical_memory_sync_encrypted_bitmap(RAMBlock *rb, > >> + ram_addr_t start, > >> + ram_addr_t length) > >> +{ > >> + ram_addr_t addr; > >> + unsigned long word = BIT_WORD((start + rb->offset) >> > >> TARGET_PAGE_BITS); > >> + unsigned long *dest = rb->encbmap; > >> + > >> + /* start address and length is aligned at the start of a word? */ > >> + if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) == > >> + (start + rb->offset) && > >> + !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) { > >> + int k; > >> + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); > > > > Feels odd it's an int. > > > >> + unsigned long * const *src; > >> + unsigned long idx = (word * BITS_PER_LONG) / > >> DIRTY_MEMORY_BLOCK_SIZE; > >> + unsigned long offset = BIT_WORD((word * BITS_PER_LONG) % > >> + DIRTY_MEMORY_BLOCK_SIZE); > >> + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); > >> + > >> + rcu_read_lock(); > >> + > >> + src = atomic_rcu_read( > >> + &ram_list.dirty_memory[DIRTY_MEMORY_ENCRYPTED])->blocks; > >> + > >> + for (k = page; k < page + nr; k++) { > >> + unsigned long bits = atomic_read(&src[idx][offset]); > >> + dest[k] = bits; > >> + > >> + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { > >> + offset = 0; > >> + idx++; > >> + } > >> + } > >> + > >> + rcu_read_unlock(); > >> + } else { > >> + ram_addr_t offset = rb->offset; > >> + > >> + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { > >> + long k = (start + addr) >> TARGET_PAGE_BITS; > >> + if (cpu_physical_memory_test_encrypted(start + addr + offset, > >> + TARGET_PAGE_SIZE)) { > >> + set_bit(k, dest); > >> + } else { > >> + clear_bit(k, dest); > >> + } > >> + } > >> + } > >> +} > >> #endif > >> #endif > >> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h > >> index bc4faa1b00..2a5eab8b11 100644 > >> --- a/include/exec/ramlist.h > >> +++ b/include/exec/ramlist.h > >> @@ -11,7 +11,8 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; > >> #define DIRTY_MEMORY_VGA 0 > >> #define DIRTY_MEMORY_CODE 1 > >> #define DIRTY_MEMORY_MIGRATION 2 > >> -#define DIRTY_MEMORY_NUM 3 /* num of dirty bits */ > >> +#define DIRTY_MEMORY_ENCRYPTED 3 > >> +#define DIRTY_MEMORY_NUM 4 /* num of dirty bits */ > >> > >> /* The dirty memory bitmap is split into fixed-size blocks to allow > >> growth > >> * under RCU. The bitmap for a block can be accessed as follows: > >> diff --git a/migration/ram.c b/migration/ram.c > >> index 3c8977d508..d179867e1b 100644 > >> --- a/migration/ram.c > >> +++ b/migration/ram.c > >> @@ -1680,6 +1680,9 @@ static void migration_bitmap_sync_range(RAMState > >> *rs, RAMBlock *rb, > >> rs->migration_dirty_pages += > >> cpu_physical_memory_sync_dirty_bitmap(rb, 0, length, > >> > >> &rs->num_dirty_pages_period); > >> + if (kvm_memcrypt_enabled()) { > >> + cpu_physical_memory_sync_encrypted_bitmap(rb, 0, length); > >> + } > >> } > >> > >> /** > >> @@ -2465,6 +2468,22 @@ static bool save_compress_page(RAMState *rs, > >> RAMBlock *block, ram_addr_t offset) > >> return false; > >> } > >> > >> +/** > >> + * encrypted_test_bitmap: check if the page is encrypted > >> + * > >> + * Returns a bool indicating whether the page is encrypted. > >> + */ > >> +static bool encrypted_test_bitmap(RAMState *rs, RAMBlock *block, > >> + unsigned long page) > >> +{ > >> + /* ROM devices contains the unencrypted data */ > >> + if (memory_region_is_rom(block->mr)) { > >> + return false; > >> + } > >> + > >> + return test_bit(page, block->encbmap); > >> +} > >> + > >> /** > >> * ram_save_target_page: save one target page > >> * > >> @@ -2491,7 +2510,8 @@ static int ram_save_target_page(RAMState *rs, > >> PageSearchStatus *pss, > >> * will take care of accessing the guest memory and re-encrypt it > >> * for the transport purposes. > >> */ > >> - if (kvm_memcrypt_enabled()) { > >> + if (kvm_memcrypt_enabled() && > >> + encrypted_test_bitmap(rs, pss->block, pss->page)) { > >> return ram_save_encrypted_page(rs, pss, last_stage); > >> } > >> > >> @@ -2724,6 +2744,8 @@ static void ram_save_cleanup(void *opaque) > >> block->bmap = NULL; > >> g_free(block->unsentmap); > >> block->unsentmap = NULL; > >> + g_free(block->encbmap); > >> + block->encbmap = NULL; > >> } > >> > >> xbzrle_cleanup(); > >> @@ -3251,6 +3273,10 @@ static void ram_list_init_bitmaps(void) > >> block->unsentmap = bitmap_new(pages); > >> bitmap_set(block->unsentmap, 0, pages); > >> } > >> + if (kvm_memcrypt_enabled()) { > >> + block->encbmap = bitmap_new(pages); > >> + bitmap_set(block->encbmap, 0, pages); > >> + } > >> } > >> } > >> } > >> -- > >> 2.17.1 > >> > > -- > > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK