Hi everyone,

the following is a preparation for running the OVN CI on
custom hosted runners. It currently only focuses on "Build and Test" and
not on other workflows.

The problem with normal Github Actions is that the CI Job is run
directly on the CI runner. For the public runners they are spawned and
afterwards destroyed by github themselves.
For custom CI runners that makes things quite complicated as there is no
nice implementation of it.

However github actions also supports running the actions within
containers. This brings the benefit that we have finally a well known
system where we start from and that the building host will not be filled
with trash.
Also it seems that custom runners natively support this which would make
custom runners significantly easier to use.

If you want to try this out you can just push it to a branch on your
fork of the github repo. Note that this job will temporarily create a
"ovn-ci-tmp" package/registry entry for your user.

Note that i did not yet try this out with a custom runner. However since
i will be unavailable for the next two weeks i wanted to share my
current state.

Thanks a lot,
Felix

Signed-off-by: Felix Huettner <[email protected]>
---
v1->v2:
  * natively use a container in the "build-linux" job
  * ensure a init system is available to stop processes correctly
  * add a cleanup job to remove the created container images
  * modified restart_ovsdb_controller_updates: this is needed as we run
      within a docker container which creates iptables nat rules. nft
      therefor prints a warning
  * modified dhcpd commands for tests: it seems that the dhcpd process
      in there does not search for the leases file in the same directory
      as the config per default.

 .ci/ci.sh                  |   6 +-
 .ci/linux-build.sh         |   4 +-
 .ci/linux-util.sh          |  10 +--
 .github/workflows/test.yml | 139 ++++++++++++++++++++++++++-----------
 tests/ovn-macros.at        |   2 +-
 tests/system-ovn.at        |   4 +-
 6 files changed, 109 insertions(+), 56 deletions(-)

diff --git a/.ci/ci.sh b/.ci/ci.sh
index 76c364868..55cb75ca2 100755
--- a/.ci/ci.sh
+++ b/.ci/ci.sh
@@ -174,11 +174,13 @@ fi
 CONTAINER_ID="$($CONTAINER_CMD run --privileged -d \
     --pids-limit=-1 \
     --security-opt apparmor=unconfined \
+    --cgroupns=host \
+    --cgroups=no-conmon \
     --env ASAN_OPTIONS=$ASAN_OPTIONS \
-    -v /lib/modules/$(uname -r):/lib/modules/$(uname -r):ro \
+    -v /host/lib/modules/$(uname -r):/lib/modules/$(uname -r):ro \
     -v $OVN_PATH:$CONTAINER_WORKSPACE/ovn:Z \
     -v $OVS_PATH:$CONTAINER_WORKSPACE/ovs:Z \
-    $IMAGE_NAME)"
+    $IMAGE_NAME tail -f /dev/null)"
 trap remove_container EXIT
 
 copy_sources_to_workdir
diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index d9b49b7b6..0b272d275 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -19,7 +19,7 @@ TIMEOUT=${TIMEOUT:-"0"}
 
 function install_dpdk()
 {
-    local DPDK_INSTALL_DIR="$(pwd)/dpdk-dir"
+    local DPDK_INSTALL_DIR="/workspace/dpdk-dir"
     local VERSION_FILE="${DPDK_INSTALL_DIR}/cached-version"
     local DPDK_PC=$(find $DPDK_INSTALL_DIR -type f -name libdpdk-libs.pc)
 
@@ -168,7 +168,7 @@ function execute_tests()
     fi
 
     if [ "$UNSTABLE" ]; then
-        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable" RECHECK=yes \
+        if ! SKIP_UNSTABLE=no TEST_RANGE="-k unstable -v" RECHECK=yes \
                 run_tests; then
             unstable_rc=1
         fi
diff --git a/.ci/linux-util.sh b/.ci/linux-util.sh
index b5bd1f8c9..e4f5da377 100755
--- a/.ci/linux-util.sh
+++ b/.ci/linux-util.sh
@@ -36,16 +36,8 @@ function fix_etc_hosts()
     cp /etc/hosts ./hosts.bak
     sed -E -n \
       '/^[[:space:]]*(#.*|[0-9a-fA-F:.]+([[:space:]]+[a-zA-Z0-9.-]+)+|)$/p' \
-      ./hosts.bak | sudo tee /etc/hosts
+      ./hosts.bak | tee /etc/hosts
 
     diff -u ./hosts.bak /etc/hosts || true
 }
 
-# Workaround until https://github.com/actions/runner-images/issues/10015
-# is resolved in some way.
-function disable_apparmor()
-{
-    # https://bugs.launchpad.net/ubuntu/+source/apparmor/+bug/2093797
-    sudo aa-teardown || true
-    sudo systemctl disable --now apparmor.service
-}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 64073b228..55c79069e 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -7,6 +7,9 @@ on:
     # Run Sunday at midnight
     - cron: '0 0 * * 0'
 
+env:
+  CI_IMAGE: &ci_image ghcr.io/${{ github.repository_owner }}/ovn-ci-tmp:${{ 
github.sha }}
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || 
github.run_id }}
   cancel-in-progress: true
@@ -22,30 +25,34 @@ jobs:
     # +-------+-------------------+-------------------+
     # | !main |  Builds - Ubuntu  | xxxxxxxxxxxxxxxxx |
     # +-------+-------------------+-------------------+
+    defaults:
+      run:
+        shell: bash
     env:
-      DEPENDENCIES: podman
+      DEPENDENCIES: podman make
     name: Prepare container
     if: github.repository_owner == 'ovn-org' || github.event_name != 'schedule'
     runs-on: ubuntu-24.04
+    container:
+      image: ubuntu:24.04
+      options: --privileged
 
     steps:
       - uses: actions/checkout@v6
 
       - name: Update APT cache
-        run: sudo apt update
+        run: apt update
 
       - name: Install dependencies
-        run: sudo apt install -y ${{ env.DEPENDENCIES }}
+        run: apt install -y ${{ env.DEPENDENCIES }}
 
       - name: Fix /etc/hosts file
         run: |
           . .ci/linux-util.sh
           fix_etc_hosts
 
-      - name: Disable apparmor
-        run: |
-          . .ci/linux-util.sh
-          disable_apparmor
+      - name: Log in container registry
+        run: echo "${{ secrets.GITHUB_TOKEN }}" | podman login ghcr.io -u ${{ 
github.actor }} --password-stdin
 
       - name: Choose image distro
         if: github.event_name == 'push' || github.event_name == 'pull_request'
@@ -67,20 +74,16 @@ jobs:
         run: podman pull ghcr.io/ovn-org/ovn-tests:${{ env.IMAGE_DISTRO }}
 
       - name: Tag image
-        run: podman tag ovn-org/ovn-tests:${{ env.IMAGE_DISTRO }} 
ovn-org/ovn-tests
+        run: podman tag ovn-org/ovn-tests:${{ env.IMAGE_DISTRO }} ${{ 
env.CI_IMAGE }}
 
       - name: Export image
-        run: podman save -o /tmp/image.tar --format oci-archive 
ovn-org/ovn-tests
-
-      - name: Cache image
-        id: image_cache
-        uses: actions/cache@v5
-        with:
-          path: /tmp/image.tar
-          key: ${{ github.sha }}/${{ github.event_name }}
+        run: podman push ${{ env.CI_IMAGE }}
 
   build-linux:
     needs: [prepare-container]
+    defaults:
+      run:
+        shell: bash
     env:
       ARCH:        ${{ matrix.cfg.arch }}
       CC:          ${{ matrix.cfg.compiler }}
@@ -91,9 +94,13 @@ jobs:
       TEST_RANGE:  ${{ matrix.cfg.test_range }}
       SANITIZERS:  ${{ matrix.cfg.sanitizers }}
       UNSTABLE:    ${{ matrix.cfg.unstable }}
+      DEPENDENCIES: build-essential git podman
 
     name: linux ${{ join(matrix.cfg.*, ' ') }}
     runs-on: ubuntu-24.04
+    container:
+      image: *ci_image
+      options: --privileged --init
 
     strategy:
       fail-fast: false
@@ -126,11 +133,17 @@ jobs:
         - { arch: x86, compiler: gcc, opts: --disable-ssl }
 
     steps:
+    - name: Update APT cache
+      run: apt update
+
+    - name: Install dependencies
+      run: apt install -y ${{ env.DEPENDENCIES }}
+
     - name: system-level-dependencies
       if: ${{ startsWith(matrix.cfg.testsuite, 'system-test') }}
       run: |
-        sudo apt update
-        sudo apt -y install linux-modules-extra-$(uname -r)
+        apt update
+        apt -y install linux-modules-extra-$(uname -r)
 
     - name: checkout
       if: github.event_name == 'push' || github.event_name == 'pull_request'
@@ -166,38 +179,84 @@ jobs:
         . .ci/linux-util.sh
         fix_etc_hosts
 
-    - name: Disable apparmor
+    - name: Trust git repo
+      run: git config --global --add safe.directory $(pwd)
+
+    - name: Setup hugepages
+      if: matrix.cfg.dpdk == 'dpdk'
       run: |
-        . .ci/linux-util.sh
-        disable_apparmor
+        echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+        mkdir /dev/hugepages
+        mount -t hugetlbfs none /dev/hugepages
 
-    - name: image cache
-      id: image_cache
-      uses: actions/cache@v5
-      with:
-        path: /tmp/image.tar
-        key: ${{ github.sha }}/${{ github.event_name }}
-
-    - name: load image
+    - name: build and test
       run: |
-        sudo podman load -i /tmp/image.tar
-        podman load -i /tmp/image.tar
-        rm -rf /tmp/image.tar
+        ARCH=$ARCH CC=$CC LIBS=$LIBS OPTS=$OPTS TESTSUITE=$TESTSUITE \
+           TEST_RANGE=$TEST_RANGE SANITIZERS=$SANITIZERS DPDK=$DPDK \
+           RECHECK=$RECHECK UNSTABLE=$UNSTABLE TIMEOUT=2h \
+           BASE_VERSION=$BASE_VERSION ./.ci/linux-build.sh
 
-    - name: build
-      if: ${{ startsWith(matrix.cfg.testsuite, 'system-test') }}
-      run: sudo -E ./.ci/ci.sh --archive-logs --timeout=2h
-
-    - name: build
-      if: ${{ !startsWith(matrix.cfg.testsuite, 'system-test') }}
-      run: ./.ci/ci.sh --archive-logs --timeout=2h
+    - name: collect logs on failure
+      if: failure() || cancelled()
+      run: |
+        mkdir -p /tmp/logcollector
+        cp config.log /tmp/logcollector
+        cp -r tests/testsuite.* /tmp/logcollector || true
+        cp -r tests/system-*-testsuite.* /tmp/logcollector || true
+        cp -r tests/upgrade-testsuite.* /tmp/logcollector || true
+        tar -czf /tmp/logs.tgz /tmp/logcollector
 
     - name: upload logs on failure
       if: failure() || cancelled()
       uses: actions/upload-artifact@v7
       with:
         name: logs-linux-${{ join(matrix.cfg.*, '-') }}
-        path: logs.tgz
+        path: /tmp/logs.tgz
+
+  cleanup:
+    needs: [build-linux]
+    name: Cleanup
+    if: always()
+    runs-on: ubuntu-24.04
+    container:
+      image: ubuntu:24.04
+    permissions:
+      packages: write
+
+    steps:
+      - name: Update APT cache
+        run: apt update
+
+      - name: Install dependencies
+        run: apt install -y curl jq
+
+      - name: delete temporary image
+        run: |
+          if [ "${{ github.event.repository.owner.type }}" = "Organization" ]; 
then
+            TYPE="orgs"
+          else
+            TYPE="users"
+          fi
+          PACKAGE_URL="https://api.github.com/${TYPE}/${{ 
github.repository_owner }}/packages/container/ovn-ci-tmp"
+          PACKAGE_VERSION_URL="${PACKAGE_URL}/versions"
+          VERSION_ID=$(curl -s \
+            -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+            -H "Accept: application/vnd.github+json" \
+            "$PACKAGE_VERSION_URL" | jq -r '.[] | 
select(.metadata.container.tags[] == "${{ github.sha }}") | .id')
+          if [ -n "$VERSION_ID" ]; then
+            if curl --fail-with-body -s -X DELETE \
+              -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+              -H "Accept: application/vnd.github+json" \
+              "$PACKAGE_VERSION_URL/$VERSION_ID"; then
+                echo "Package version deleted successfully"
+            else
+                echo "Package version seems to be the last one. Attempting to 
delete the whole package"
+                curl --fail-with-body -s -X DELETE \
+                  -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
+                  -H "Accept: application/vnd.github+json" \
+                  "$PACKAGE_URL"
+            fi
+          fi
 
   build-osx:
     env:
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index aeb4149d5..601c116ec 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -1438,7 +1438,7 @@ stop_ovsdb_controller_updates() {
 restart_ovsdb_controller_updates() {
   TCP_PORT=$1
   echo Restarting updates from ovn-controller to ovsdb
-  AT_CHECK([nft list ruleset | grep $TCP_PORT], [0], [ignore])
+  AT_CHECK([nft list ruleset | grep $TCP_PORT], [0], [ignore], [ignore])
   AT_CHECK([nft delete table ip ovn-test])
 }
 
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 8d1f21609..1b11cab94 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -12746,7 +12746,7 @@ chmod 775 $DHCP_TEST_DIR
 chmod 664 $DHCP_TEST_DIR/dhcpd.leases
 
 
-NETNS_DAEMONIZE([server], [dhcpd -4 -f -cf $DHCP_TEST_DIR/dhcpd.conf s1 > 
dhcpd.log 2>&1], [dhcpd.pid])
+NETNS_DAEMONIZE([server], [dhcpd -4 -f -cf $DHCP_TEST_DIR/dhcpd.conf -lf 
$DHCP_TEST_DIR/dhcpd.leases s1 > dhcpd.log 2>&1], [dhcpd.pid])
 
 NS_CHECK_EXEC([server], [tcpdump -l -nvv -i s1  udp > pkt.pcap 2>tcpdump_err 
&])
 OVS_WAIT_UNTIL([grep "listening" tcpdump_err])
@@ -21370,7 +21370,7 @@ chmod 775 $DHCP_TEST_DIR
 chmod 664 $DHCP_TEST_DIR/dhcpd.leases
 
 # Start dhcpd as DHCP server in the server namespace.
-NETNS_DAEMONIZE([server], [dhcpd -4 -f -cf $DHCP_TEST_DIR/dhcpd.conf server > 
$DHCP_TEST_DIR/dhcpd.log 2>&1], [dhcpd.pid])
+NETNS_DAEMONIZE([server], [dhcpd -4 -f -cf $DHCP_TEST_DIR/dhcpd.conf -lf 
$DHCP_TEST_DIR/dhcpd.leases server > $DHCP_TEST_DIR/dhcpd.log 2>&1], 
[dhcpd.pid])
 
 # Give dhcpd time to start.
 sleep 1

base-commit: b4d2c0369f92f5d57d850802934ac05feb2979d9
-- 
2.43.0


   
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to