On 4/7/25 2:12 PM, Li Wang wrote:
In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
This can cause test flakiness when immediately asserting usage after
deleting a child cgroup. To address this, add a helper function
`assert_with_retry()` that checks usage values with a timeout-based retry.
This improves test stability without relying on fixed sleep delays.

Also bump up the tolerance size to 7MB.

To avoid False Positives:
   ...
   # Assert memory charged correctly for child only use.
   # actual a = 11 MB
   # expected a = 0 MB
   # fail
   # cleanup
   # [FAIL]
   not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
   # 0
   # SUMMARY: PASS=10 SKIP=0 FAIL=1


I was also seeing this failure. I have tested this patch on my powerPC
setup and it is passing now.

./hugetlb_reparenting_test.sh -cgroup-v2
cleanup

Test charge, rmdir, uncharge
mkdir
write
Writing to this path: /mnt/huge/test
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

rmdir
uncharge
cleanup
done


Test child only hugetlb usage
setup
write
Writing to this path: /mnt/huge/test2
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

Assert memory charged correctly for child only use.
actual = 10 MB
expected = 0 MB
cleanup


Feel free to add
Tested-by Donet Tom <donet...@linux.ibm.com>



Signed-off-by: Li Wang <liw...@redhat.com>
Cc: Waiman Long <long...@redhat.com>
Cc: Anshuman Khandual <anshuman.khand...@arm.com>
Cc: Dev Jain <dev.j...@arm.com>
Cc: Kirill A. Shuemov <kirill.shute...@linux.intel.com>
Cc: Shuah Khan <sh...@kernel.org>
---
  .../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
  1 file changed, 41 insertions(+), 55 deletions(-)

diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh 
b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..1c172c6999f4 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
      do_umount=1
    fi
  fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
    hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -60,6 +60,41 @@ function cleanup() {
    set -e
  }
+function assert_with_retry() {
+  local actual_path="$1"
+  local expected="$2"
+  local tolerance=$((7 * 1024 * 1024))
+  local timeout=20
+  local interval=1
+  local start_time
+  local now
+  local elapsed
+  local actual
+
+  start_time=$(date +%s)
+
+  while true; do
+    actual="$(cat "$actual_path")"
+
+    if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+        [[ $actual -le $(($expected + $tolerance)) ]]; then
+      return 0
+    fi
+
+    now=$(date +%s)
+    elapsed=$((now - start_time))
+
+    if [[ $elapsed -ge $timeout ]]; then
+      echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+      echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+      cleanup
+      exit 1
+    fi
+
+    sleep $interval
+  done
+}
+
  function assert_state() {
    local expected_a="$1"
    local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
      expected_b="$3"
      expected_b_hugetlb="$4"
    fi
-  local tolerance=$((5 * 1024 * 1024))
-
-  local actual_a
-  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
-  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
-    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
-    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
-    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  local actual_a_hugetlb
-  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
-    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
-    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
-    return
-  fi
-
-  local actual_b
-  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
-  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
-    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
-    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
-    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
- local actual_b_hugetlb
-  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
-    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
-    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
+  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" 
"$expected_a_hugetlb"
- cleanup
-    exit 1
+  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" 
"$expected_b_hugetlb"
    fi
  }
@@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages. cleanup -echo
  echo
  echo Test charge, rmdir, uncharge
  setup
@@ -195,7 +184,6 @@ cleanup
echo done
  echo
-echo
  if [[ ! $cgroup2 ]]; then
    echo "Test parent and child hugetlb usage"
    setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
    assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
-  sleep 5
    echo Assert memory reparent correctly.
    assert_state 0 $(($size * 2))
@@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
    cleanup
  fi
-echo
  echo
  echo "Test child only hugetlb usage"
  echo setup

Reply via email to