On Wed Aug 21, 2024 at 1:02 PM EEST, Dmitrii Kuvaiskii wrote:
> Imagine an mmap()'d file. Two threads touch the same address at the same
> time and fault. Both allocate a physical page and race to install a PTE
> for that page. Only one will win the race. The loser frees its page, but
> still continues handling the fault as a success and returns
> VM_FAULT_NOPAGE from the fault handler.
>
> The same race can happen with SGX. But there's a bug: the loser in the
> SGX steers into a failure path. The loser EREMOVE's the winner's EPC
> page, then returns SIGBUS, likely killing the app.
>
> Fix the SGX loser's behavior. Check whether another thread already
> allocated the page and if yes, return with VM_FAULT_NOPAGE.
>
> The race can be illustrated as follows:
>
> /*                             /*
>  * Fault on CPU1                * Fault on CPU2
>  * on enclave page X            * on enclave page X
>  */                             */
> sgx_vma_fault() {              sgx_vma_fault() {
>
>   xa_load(&encl->page_array)     xa_load(&encl->page_array)
>       == NULL -->                    == NULL -->
>
>   sgx_encl_eaug_page() {         sgx_encl_eaug_page() {
>
>     ...                            ...
>
>     /*                             /*
>      * alloc encl_page              * alloc encl_page
>      */                             */
>                                    mutex_lock(&encl->lock);
>                                    /*
>                                     * alloc EPC page
>                                     */
>                                    epc_page = sgx_alloc_epc_page(...);
>                                    /*
>                                     * add page to enclave's xarray
>                                     */
>                                    xa_insert(&encl->page_array, ...);
>                                    /*
>                                     * add page to enclave via EAUG
>                                     * (page is in pending state)
>                                     */
>                                    /*
>                                     * add PTE entry
>                                     */
>                                    vmf_insert_pfn(...);
>
>                                    mutex_unlock(&encl->lock);
>                                    return VM_FAULT_NOPAGE;
>                                  }
>                                }
>                                /*
>                                 * All good up to here: enclave page
>                                 * successfully added to enclave,
>                                 * ready for EACCEPT from user space
>                                 */
>     mutex_lock(&encl->lock);
>     /*
>      * alloc EPC page
>      */
>     epc_page = sgx_alloc_epc_page(...);
>     /*
>      * add page to enclave's xarray,
>      * this fails with -EBUSY as this
>      * page was already added by CPU2
>      */
>     xa_insert(&encl->page_array, ...);
>
>   err_out_shrink:
>     sgx_encl_free_epc_page(epc_page) {
>       /*
>        * remove page via EREMOVE
>        *
>        * *BUG*: page added by CPU2 is
>        * yanked from enclave while it
>        * remains accessible from OS
>        * perspective (PTE installed)
>        */
>       /*
>        * free EPC page
>        */
>       sgx_free_epc_page(epc_page);
>     }
>
>     mutex_unlock(&encl->lock);
>     /*
>      * *BUG*: SIGBUS is returned
>      * for a valid enclave page
>      */
>     return VM_FAULT_SIGBUS;
>   }
> }
>
> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized 
> enclave")
> Cc: sta...@vger.kernel.org
> Reported-by: Marcelina Koƛcielnicka <m...@invisiblethingslab.com>
> Suggested-by: Kai Huang <kai.hu...@intel.com>
> Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvais...@intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++--------------
>  1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index c0a3c00284c8..2aa7ced0e4a0 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -337,6 +337,16 @@ static vm_fault_t sgx_encl_eaug_page(struct 
> vm_area_struct *vma,
>       if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
>               return VM_FAULT_SIGBUS;
>  
> +     mutex_lock(&encl->lock);
> +
> +     /*
> +      * Multiple threads may try to fault on the same page concurrently.
> +      * Re-check if another thread has already done that.
> +      */
> +     encl_page = xa_load(&encl->page_array, PFN_DOWN(addr));
> +     if (encl_page)
> +             goto done;
> +
>       /*
>        * Ignore internal permission checking for dynamically added pages.
>        * They matter only for data added during the pre-initialization
> @@ -345,23 +355,23 @@ static vm_fault_t sgx_encl_eaug_page(struct 
> vm_area_struct *vma,
>        */
>       secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
>       encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
> -     if (IS_ERR(encl_page))
> -             return VM_FAULT_OOM;
> -
> -     mutex_lock(&encl->lock);
> +     if (IS_ERR(encl_page)) {
> +             vmret = VM_FAULT_OOM;
> +             goto err_out_unlock;
> +     }
>  
>       epc_page = sgx_encl_load_secs(encl);
>       if (IS_ERR(epc_page)) {
>               if (PTR_ERR(epc_page) == -EBUSY)
>                       vmret = VM_FAULT_NOPAGE;
> -             goto err_out_unlock;
> +             goto err_out_encl;
>       }
>  
>       epc_page = sgx_alloc_epc_page(encl_page, false);
>       if (IS_ERR(epc_page)) {
>               if (PTR_ERR(epc_page) == -EBUSY)
>                       vmret =  VM_FAULT_NOPAGE;
> -             goto err_out_unlock;
> +             goto err_out_encl;
>       }
>  
>       va_page = sgx_encl_grow(encl, false);
> @@ -376,10 +386,6 @@ static vm_fault_t sgx_encl_eaug_page(struct 
> vm_area_struct *vma,
>  
>       ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
>                       encl_page, GFP_KERNEL);
> -     /*
> -      * If ret == -EBUSY then page was created in another flow while
> -      * running without encl->lock
> -      */
>       if (ret)
>               goto err_out_shrink;
>  
> @@ -389,7 +395,7 @@ static vm_fault_t sgx_encl_eaug_page(struct 
> vm_area_struct *vma,
>  
>       ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
>       if (ret)
> -             goto err_out;
> +             goto err_out_eaug;
>  
>       encl_page->encl = encl;
>       encl_page->epc_page = epc_page;
> @@ -408,20 +414,20 @@ static vm_fault_t sgx_encl_eaug_page(struct 
> vm_area_struct *vma,
>               mutex_unlock(&encl->lock);
>               return VM_FAULT_SIGBUS;
>       }
> +done:
>       mutex_unlock(&encl->lock);
>       return VM_FAULT_NOPAGE;
>  
> -err_out:
> +err_out_eaug:
>       xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
> -
>  err_out_shrink:
>       sgx_encl_shrink(encl, va_page);
>  err_out_epc:
>       sgx_encl_free_epc_page(epc_page);
> +err_out_encl:
> +     kfree(encl_page);
>  err_out_unlock:
>       mutex_unlock(&encl->lock);
> -     kfree(encl_page);
> -
>       return vmret;
>  }
>  

Reviewed-by: Jarkko Sakkinen <jar...@kernel.org>

BR, Jarkko

Reply via email to