From: Kristen Carlson Accardi <kris...@linux.intel.com>

Previous patches have implemented all infrastructure needed for
per-cgroup EPC page tracking and reclaiming. But all reclaimable EPC
pages are still tracked in the global LRU as sgx_epc_page_lru() always
returns reference to the global LRU.

Change sgx_epc_page_lru() to return the LRU of the cgroup in which the
given EPC page is allocated.

This makes all EPC pages tracked in per-cgroup LRUs and the global
reclaimer (ksgxd) will not be able to reclaim any pages from the global
LRU. However, in cases of over-committing, i.e., the sum of cgroup
limits greater than the total capacity, cgroups may never reclaim but
the total usage can still be near the capacity. Therefore a global
reclamation is still needed in those cases and it should be performed
from the root cgroup.

Modify sgx_reclaim_pages_global(), to reclaim from the root EPC cgroup
when cgroup is enabled, otherwise from the global LRU. Export
sgx_cgroup_reclaim_pages() in the header file so it can be reused for
this purpose.

Similarly, modify sgx_can_reclaim_global(), to check emptiness of LRUs
of all cgroups when EPC cgroup is enabled, otherwise only check the
global LRU.  Export sgx_cgroup_lru_empty() so it can be reused for this
purpose.

Finally, change sgx_reclaim_direct(), to check and ensure there are free
pages at cgroup level so forward progress can be made by the caller.
Export sgx_cgroup_should_reclaim() for reuse.

With these changes, the global reclamation and per-cgroup reclamation
both work properly with all pages tracked in per-cgroup LRUs.

Co-developed-by: Sean Christopherson <sean.j.christopher...@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopher...@intel.com>
Signed-off-by: Kristen Carlson Accardi <kris...@linux.intel.com>
Co-developed-by: Haitao Huang <haitao.hu...@linux.intel.com>
Signed-off-by: Haitao Huang <haitao.hu...@linux.intel.com>
Tested-by: Mikko Ylinen <mikko.yli...@linux.intel.com>
Tested-by: Jarkko Sakkinen <jar...@kernel.org>
---
V13:
- Use IS_ENABLED(CONFIG_CGROUP_MISC) in sgx_can_reclaim_global(). (Kai)

V12:
- Remove CONFIG_CGROUP_SGX_EPC, conditional compile SGX Cgroup for
CONFIGCONFIG_CGROUPMISC. (Jarkko)

V11:
- Reword the comments for global reclamation for allocation failure
after passing cgroup charging. (Kai)
- Add stub functions to remove ifdefs in c file (Kai)
- Add more detailed comments to clarify each page belongs to one cgroup, or the
root. (Kai)

V10:
- Add comment to clarify each page belongs to one cgroup, or the root by
default. (Kai)
- Merge the changes that expose sgx_cgroup_* functions to this patch.
- Add changes for sgx_reclaim_direct() that was missed previously.

V7:
- Split this out from the big patch, #10 in V6. (Dave, Kai)
---
 arch/x86/kernel/cpu/sgx/epc_cgroup.c |  6 ++--
 arch/x86/kernel/cpu/sgx/epc_cgroup.h | 27 ++++++++++++++++
 arch/x86/kernel/cpu/sgx/main.c       | 46 ++++++++++++++++++++++++++--
 3 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.c 
b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
index c237bc3330ee..fe2fd6034023 100644
--- a/arch/x86/kernel/cpu/sgx/epc_cgroup.c
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.c
@@ -67,7 +67,7 @@ static inline u64 sgx_cgroup_max_pages_to_root(struct 
sgx_cgroup *sgx_cg)
  *
  * Return: %true if all cgroups under the specified root have empty LRU lists.
  */
-static bool sgx_cgroup_lru_empty(struct misc_cg *root)
+bool sgx_cgroup_lru_empty(struct misc_cg *root)
 {
        struct cgroup_subsys_state *css_root;
        struct cgroup_subsys_state *pos;
@@ -115,7 +115,7 @@ static bool sgx_cgroup_lru_empty(struct misc_cg *root)
  * the LRUs are recently accessed, i.e., considered "too young" to reclaim, no
  * page will actually be reclaimed after walking the whole tree.
  */
-static void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct 
*charge_mm)
+void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct mm_struct 
*charge_mm)
 {
        struct cgroup_subsys_state *css_root;
        struct cgroup_subsys_state *pos;
@@ -156,7 +156,7 @@ static void sgx_cgroup_reclaim_pages(struct misc_cg *root, 
struct mm_struct *cha
  * threshold (%SGX_CG_MIN_FREE_PAGE) and there are reclaimable pages within the
  * cgroup.
  */
-static bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg)
+bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg)
 {
        u64 cur, max;
 
diff --git a/arch/x86/kernel/cpu/sgx/epc_cgroup.h 
b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
index 2044e0d64076..9d69608eadf6 100644
--- a/arch/x86/kernel/cpu/sgx/epc_cgroup.h
+++ b/arch/x86/kernel/cpu/sgx/epc_cgroup.h
@@ -13,6 +13,11 @@
 #define MISC_CG_RES_SGX_EPC MISC_CG_RES_TYPES
 struct sgx_cgroup;
 
+static inline struct misc_cg *misc_from_sgx(struct sgx_cgroup *sgx_cg)
+{
+       return NULL;
+}
+
 static inline struct sgx_cgroup *sgx_get_current_cg(void)
 {
        return NULL;
@@ -27,8 +32,22 @@ static inline int sgx_cgroup_try_charge(struct sgx_cgroup 
*sgx_cg, enum sgx_recl
 
 static inline void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg) { }
 
+static inline bool sgx_cgroup_lru_empty(struct misc_cg *root)
+{
+       return true;
+}
+
+static inline bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg)
+{
+       return false;
+}
+
 static inline void sgx_cgroup_init(void) { }
 
+static inline void sgx_cgroup_reclaim_pages(struct misc_cg *root, struct 
mm_struct *charge_mm)
+{
+}
+
 #else /* CONFIG_CGROUP_MISC */
 
 struct sgx_cgroup {
@@ -37,6 +56,11 @@ struct sgx_cgroup {
        struct work_struct reclaim_work;
 };
 
+static inline struct misc_cg *misc_from_sgx(struct sgx_cgroup *sgx_cg)
+{
+       return sgx_cg->cg;
+}
+
 static inline struct sgx_cgroup *sgx_cgroup_from_misc_cg(struct misc_cg *cg)
 {
        return (struct sgx_cgroup *)(cg->res[MISC_CG_RES_SGX_EPC].priv);
@@ -67,6 +91,9 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg)
 
 int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim);
 void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg);
+bool sgx_cgroup_lru_empty(struct misc_cg *root);
+bool sgx_cgroup_should_reclaim(struct sgx_cgroup *sgx_cg);
+void sgx_cgroup_reclaim_pages(struct misc_cg *root,  struct mm_struct 
*charge_mm);
 void sgx_cgroup_init(void);
 
 #endif /* CONFIG_CGROUP_MISC */
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 92bd3151a589..8748358fd00f 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -31,9 +31,30 @@ static DEFINE_XARRAY(sgx_epc_address_space);
  */
 static struct sgx_epc_lru_list sgx_global_lru;
 
+/*
+ * Get the per-cgroup or global LRU list that tracks the given reclaimable 
page.
+ */
 static inline struct sgx_epc_lru_list *sgx_epc_page_lru(struct sgx_epc_page 
*epc_page)
 {
+#ifdef CONFIG_CGROUP_MISC
+       /*
+        * epc_page->sgx_cg here is never NULL during a reclaimable epc_page's
+        * life between sgx_alloc_epc_page() and sgx_free_epc_page():
+        *
+        * In sgx_alloc_epc_page(), epc_page->sgx_cg is set to the return from
+        * sgx_get_current_cg() which is the misc cgroup of the current task, or
+        * the root by default even if the misc cgroup is disabled by kernel
+        * command line.
+        *
+        * epc_page->sgx_cg is only unset by sgx_free_epc_page().
+        *
+        * This function is never used before sgx_alloc_epc_page() or after
+        * sgx_free_epc_page().
+        */
+       return &epc_page->sgx_cg->lru;
+#else
        return &sgx_global_lru;
+#endif
 }
 
 /*
@@ -41,7 +62,10 @@ static inline struct sgx_epc_lru_list 
*sgx_epc_page_lru(struct sgx_epc_page *epc
  */
 static inline bool sgx_can_reclaim_global(void)
 {
-       return !list_empty(&sgx_global_lru.reclaimable);
+       if (IS_ENABLED(CONFIG_CGROUP_MISC))
+               return !sgx_cgroup_lru_empty(misc_cg_root());
+       else
+               return !list_empty(&sgx_global_lru.reclaimable);
 }
 
 static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0);
@@ -403,7 +427,10 @@ static bool sgx_should_reclaim_global(unsigned long 
watermark)
 
 static void sgx_reclaim_pages_global(struct mm_struct *charge_mm)
 {
-       sgx_reclaim_pages(&sgx_global_lru, charge_mm);
+       if (IS_ENABLED(CONFIG_CGROUP_MISC))
+               sgx_cgroup_reclaim_pages(misc_cg_root(), charge_mm);
+       else
+               sgx_reclaim_pages(&sgx_global_lru, charge_mm);
 }
 
 /*
@@ -413,6 +440,15 @@ static void sgx_reclaim_pages_global(struct mm_struct 
*charge_mm)
  */
 void sgx_reclaim_direct(void)
 {
+       struct sgx_cgroup *sgx_cg = sgx_get_current_cg();
+
+       /* Make sure there are some free pages at cgroup level */
+       if (sgx_cg && sgx_cgroup_should_reclaim(sgx_cg)) {
+               sgx_cgroup_reclaim_pages(misc_from_sgx(sgx_cg), current->mm);
+               sgx_put_cg(sgx_cg);
+       }
+
+       /* Make sure there are some free pages at global level */
        if (sgx_should_reclaim_global(SGX_NR_LOW_PAGES))
                sgx_reclaim_pages_global(current->mm);
 }
@@ -615,6 +651,12 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum 
sgx_reclaim reclaim)
                        break;
                }
 
+               /*
+                * At this point, the usage within this cgroup is under its
+                * limit but there is no physical page left for allocation.
+                * Perform a global reclaim to get some pages released from any
+                * cgroup with reclaimable pages.
+                */
                sgx_reclaim_pages_global(current->mm);
                cond_resched();
        }
-- 
2.25.1


Reply via email to