On Wed Aug 21, 2024 at 1:02 PM EEST, Dmitrii Kuvaiskii wrote: > Imagine an mmap()'d file. Two threads touch the same address at the same > time and fault. Both allocate a physical page and race to install a PTE > for that page. Only one will win the race. The loser frees its page, but > still continues handling the fault as a success and returns > VM_FAULT_NOPAGE from the fault handler. > > The same race can happen with SGX. But there's a bug: the loser in the > SGX steers into a failure path. The loser EREMOVE's the winner's EPC > page, then returns SIGBUS, likely killing the app. > > Fix the SGX loser's behavior. Check whether another thread already > allocated the page and if yes, return with VM_FAULT_NOPAGE. > > The race can be illustrated as follows: > > /* /* > * Fault on CPU1 * Fault on CPU2 > * on enclave page X * on enclave page X > */ */ > sgx_vma_fault() { sgx_vma_fault() { > > xa_load(&encl->page_array) xa_load(&encl->page_array) > == NULL --> == NULL --> > > sgx_encl_eaug_page() { sgx_encl_eaug_page() { > > ... ... > > /* /* > * alloc encl_page * alloc encl_page > */ */ > mutex_lock(&encl->lock); > /* > * alloc EPC page > */ > epc_page = sgx_alloc_epc_page(...); > /* > * add page to enclave's xarray > */ > xa_insert(&encl->page_array, ...); > /* > * add page to enclave via EAUG > * (page is in pending state) > */ > /* > * add PTE entry > */ > vmf_insert_pfn(...); > > mutex_unlock(&encl->lock); > return VM_FAULT_NOPAGE; > } > } > /* > * All good up to here: enclave page > * successfully added to enclave, > * ready for EACCEPT from user space > */ > mutex_lock(&encl->lock); > /* > * alloc EPC page > */ > epc_page = sgx_alloc_epc_page(...); > /* > * add page to enclave's xarray, > * this fails with -EBUSY as this > * page was already added by CPU2 > */ > xa_insert(&encl->page_array, ...); > > err_out_shrink: > sgx_encl_free_epc_page(epc_page) { > /* > * remove page via EREMOVE > * > * *BUG*: page added by CPU2 is > * yanked from enclave while it > * remains accessible from OS > * perspective (PTE installed) > */ > /* > * free EPC page > */ > sgx_free_epc_page(epc_page); > } > > mutex_unlock(&encl->lock); > /* > * *BUG*: SIGBUS is returned > * for a valid enclave page > */ > return VM_FAULT_SIGBUS; > } > } > > Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized > enclave") > Cc: sta...@vger.kernel.org > Reported-by: Marcelina KoĆcielnicka <m...@invisiblethingslab.com> > Suggested-by: Kai Huang <kai.hu...@intel.com> > Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvais...@intel.com> > --- > arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++-------------- > 1 file changed, 21 insertions(+), 15 deletions(-) > > diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c > index c0a3c00284c8..2aa7ced0e4a0 100644 > --- a/arch/x86/kernel/cpu/sgx/encl.c > +++ b/arch/x86/kernel/cpu/sgx/encl.c > @@ -337,6 +337,16 @@ static vm_fault_t sgx_encl_eaug_page(struct > vm_area_struct *vma, > if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) > return VM_FAULT_SIGBUS; > > + mutex_lock(&encl->lock); > + > + /* > + * Multiple threads may try to fault on the same page concurrently. > + * Re-check if another thread has already done that. > + */ > + encl_page = xa_load(&encl->page_array, PFN_DOWN(addr)); > + if (encl_page) > + goto done; > + > /* > * Ignore internal permission checking for dynamically added pages. > * They matter only for data added during the pre-initialization > @@ -345,23 +355,23 @@ static vm_fault_t sgx_encl_eaug_page(struct > vm_area_struct *vma, > */ > secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X; > encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags); > - if (IS_ERR(encl_page)) > - return VM_FAULT_OOM; > - > - mutex_lock(&encl->lock); > + if (IS_ERR(encl_page)) { > + vmret = VM_FAULT_OOM; > + goto err_out_unlock; > + } > > epc_page = sgx_encl_load_secs(encl); > if (IS_ERR(epc_page)) { > if (PTR_ERR(epc_page) == -EBUSY) > vmret = VM_FAULT_NOPAGE; > - goto err_out_unlock; > + goto err_out_encl; > } > > epc_page = sgx_alloc_epc_page(encl_page, false); > if (IS_ERR(epc_page)) { > if (PTR_ERR(epc_page) == -EBUSY) > vmret = VM_FAULT_NOPAGE; > - goto err_out_unlock; > + goto err_out_encl; > } > > va_page = sgx_encl_grow(encl, false); > @@ -376,10 +386,6 @@ static vm_fault_t sgx_encl_eaug_page(struct > vm_area_struct *vma, > > ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), > encl_page, GFP_KERNEL); > - /* > - * If ret == -EBUSY then page was created in another flow while > - * running without encl->lock > - */ > if (ret) > goto err_out_shrink; > > @@ -389,7 +395,7 @@ static vm_fault_t sgx_encl_eaug_page(struct > vm_area_struct *vma, > > ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page)); > if (ret) > - goto err_out; > + goto err_out_eaug; > > encl_page->encl = encl; > encl_page->epc_page = epc_page; > @@ -408,20 +414,20 @@ static vm_fault_t sgx_encl_eaug_page(struct > vm_area_struct *vma, > mutex_unlock(&encl->lock); > return VM_FAULT_SIGBUS; > } > +done: > mutex_unlock(&encl->lock); > return VM_FAULT_NOPAGE; > > -err_out: > +err_out_eaug: > xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc)); > - > err_out_shrink: > sgx_encl_shrink(encl, va_page); > err_out_epc: > sgx_encl_free_epc_page(epc_page); > +err_out_encl: > + kfree(encl_page); > err_out_unlock: > mutex_unlock(&encl->lock); > - kfree(encl_page); > - > return vmret; > } >
Reviewed-by: Jarkko Sakkinen <jar...@kernel.org> BR, Jarkko