This is an automated email from the ASF dual-hosted git repository.

DImuthuUpe pushed a commit to branch slurm-integration-test
in repository https://gitbox.apache.org/repos/asf/airavata-custos.git

commit 3eb24bbdd39af5017e8b7acb0d8ece20e477417d
Author: DImuthuUpe <[email protected]>
AuthorDate: Thu May 21 08:38:45 2026 -0400

    Setting up a local slurm cluster and running tests against it
---
 .../operations/accounts_integration_test.go        | 131 ++++++++++++++++++
 dev-ops/local-slurm/Makefile                       |  33 +++++
 dev-ops/local-slurm/compose.yaml                   | 150 +++++++++++++++++++++
 dev-ops/local-slurm/docker/Dockerfile.base         |  33 +++++
 dev-ops/local-slurm/docker/Dockerfile.login        |  11 ++
 dev-ops/local-slurm/docker/Dockerfile.slurmctld    |   8 ++
 dev-ops/local-slurm/docker/Dockerfile.slurmd       |   6 +
 dev-ops/local-slurm/docker/Dockerfile.slurmdbd     |   7 +
 dev-ops/local-slurm/docker/Dockerfile.slurmrestd   |   8 ++
 .../local-slurm/scripts/bootstrap-accounting.sh    |  30 +++++
 dev-ops/local-slurm/scripts/entrypoint-ctld.sh     |  19 +++
 dev-ops/local-slurm/scripts/entrypoint-dbd.sh      |  24 ++++
 dev-ops/local-slurm/scripts/entrypoint-login.sh    |  17 +++
 dev-ops/local-slurm/scripts/entrypoint-restd.sh    |  24 ++++
 dev-ops/local-slurm/scripts/entrypoint-slurmd.sh   |  24 ++++
 dev-ops/local-slurm/scripts/init-keys.sh           |  30 +++++
 dev-ops/local-slurm/slurm/cgroup.conf              |   1 +
 dev-ops/local-slurm/slurm/gres.conf                |   2 +
 dev-ops/local-slurm/slurm/slurm.conf               |  38 ++++++
 dev-ops/local-slurm/slurm/slurmdbd.conf            |  18 +++
 20 files changed, 614 insertions(+)

diff --git 
a/connectors/SLURM/Association-Mapper/internal/operations/accounts_integration_test.go
 
b/connectors/SLURM/Association-Mapper/internal/operations/accounts_integration_test.go
new file mode 100644
index 000000000..fc86c33b3
--- /dev/null
+++ 
b/connectors/SLURM/Association-Mapper/internal/operations/accounts_integration_test.go
@@ -0,0 +1,131 @@
+package operations
+
+import (
+       "os"
+       "testing"
+)
+
+func crearteAndValidateAccount(t *testing.T, client *Client) {
+
+       err := client.CreateAccount(Account{
+               Name:         "test_account",
+               Description:  "Test account for integration testing",
+               Organization: "Test Organization",
+       }, "artisan")
+
+       if err != nil {
+               t.Fatalf("Failed to create account: %v", err)
+       }
+
+       accounts, err := client.ListAccounts()
+       if err != nil {
+               t.Fatalf("Failed to list accounts: %v", err)
+       }
+
+       if len(accounts) == 0 {
+               t.Fatal("No accounts found after creation")
+       }
+
+       for _, account := range accounts {
+               if account.Name == "test_account" {
+                       t.Logf("Successfully created account: %+v\n", account)
+                       return
+               }
+       }
+}
+
+func isLocalSlurmConfigAvailable() bool {
+       if os.Getenv("TEST_SLURM_API") == "" || os.Getenv("TEST_SLURM_USER") == 
"" || os.Getenv("TEST_SLURM_TOKEN") == "" || 
os.Getenv("TEST_SLURM_API_VERSION") == "" {
+               return false
+       }
+       return true
+}
+
+func TestAccountCreatiion_Integration(t *testing.T) {
+
+       if !isLocalSlurmConfigAvailable() {
+               t.Skip("Skipping integration test for account creation because 
local SLURM config is not available")
+       }
+
+       apiUrl := os.Getenv("TEST_SLURM_API")
+       user := os.Getenv("TEST_SLURM_USER")
+       token := os.Getenv("TEST_SLURM_TOKEN")
+       apiVersion := os.Getenv("TEST_SLURM_API_VERSION")
+
+       client := New(apiUrl, user, token, apiVersion)
+
+       client.DeleteAccount("test_account")       // clean up before test in 
case it was left over from a previous failed test run
+       defer client.DeleteAccount("test_account") // clean up after test
+       crearteAndValidateAccount(t, client)
+}
+
+func TestAccountDeletion_Integration(t *testing.T) {
+
+       if !isLocalSlurmConfigAvailable() {
+               t.Skip("Skipping integration test for account deletion because 
local SLURM config is not available")
+       }
+
+       apiUrl := os.Getenv("TEST_SLURM_API")
+       user := os.Getenv("TEST_SLURM_USER")
+       token := os.Getenv("TEST_SLURM_TOKEN")
+       apiVersion := os.Getenv("TEST_SLURM_API_VERSION")
+
+       client := New(apiUrl, user, token, apiVersion)
+
+       crearteAndValidateAccount(t, client)
+
+       err := client.DeleteAccount("test_account")
+       if err != nil {
+               t.Fatalf("Failed to delete account: %v", err)
+       }
+
+       accounts, err := client.ListAccounts()
+       if err != nil {
+               t.Fatalf("Failed to list accounts: %v", err)
+       }
+
+       for _, account := range accounts {
+               if account.Name == "test_account" {
+                       t.Fatalf("Account was not deleted: %+v\n", account)
+               }
+       }
+
+       t.Logf("Successfully deleted account. Remaining accounts: %+v\n", 
accounts)
+}
+
+func TestGetAccount_Integration(t *testing.T) {
+
+       if !isLocalSlurmConfigAvailable() {
+               t.Skip("Skipping integration test for get account because local 
SLURM config is not available")
+       }
+
+       apiUrl := os.Getenv("TEST_SLURM_API")
+       user := os.Getenv("TEST_SLURM_USER")
+       token := os.Getenv("TEST_SLURM_TOKEN")
+       apiVersion := os.Getenv("TEST_SLURM_API_VERSION")
+
+       client := New(apiUrl, user, token, apiVersion)
+
+       client.DeleteAccount("test_account")       // clean up before test in 
case it was left over from a previous failed test run
+       defer client.DeleteAccount("test_account") // clean up after test
+       crearteAndValidateAccount(t, client)
+
+       account, err := client.GetAccount("test_account")
+       if err != nil {
+               t.Fatalf("Failed to get account: %v", err)
+       }
+
+       if account.Name != "test_account" {
+               t.Fatalf("Expected account name 'test_account', got '%s'", 
account.Name)
+       }
+
+       if account.Description != "Test account for integration testing" {
+               t.Fatalf("Expected account description 'Test account for 
integration testing', got '%s'", account.Description)
+       }
+
+       if account.Organization != "Test Organization" {
+               t.Fatalf("Expected account organization 'Test Organization', 
got '%s'", account.Organization)
+       }
+
+       t.Logf("Successfully retrieved account: %+v\n", account)
+}
diff --git a/dev-ops/local-slurm/Makefile b/dev-ops/local-slurm/Makefile
new file mode 100644
index 000000000..e7ac5f228
--- /dev/null
+++ b/dev-ops/local-slurm/Makefile
@@ -0,0 +1,33 @@
+# Makefile
+SHELL := /bin/bash
+.PHONY: base up down build cli test test-integration smoke lint keys logs
+
+base:
+       docker build -f docker/Dockerfile.base -t slurmrest/base:24.05 .
+
+up: base
+       docker compose up -d --build
+
+down:
+       docker compose down -v
+
+build: base
+       docker compose build
+
+smoke:
+       docker compose exec login sbatch --wrap 'hostname' -o /tmp/out.txt
+       sleep 5
+       docker compose exec login sacct -n -o JobID,State --starttime 
now-5minutes | tail -n 5
+       docker compose exec login cat /tmp/out.txt
+
+lint:
+       cd cli && go vet ./...
+
+keys:
+       docker compose up init-keys
+
+logs:
+       docker compose logs -f --tail=100
+
+token:
+       docker compose exec login scontrol token
\ No newline at end of file
diff --git a/dev-ops/local-slurm/compose.yaml b/dev-ops/local-slurm/compose.yaml
new file mode 100644
index 000000000..f86beb03b
--- /dev/null
+++ b/dev-ops/local-slurm/compose.yaml
@@ -0,0 +1,150 @@
+# compose.yaml
+name: slurmrest
+
+x-slurm-env: &slurm-env
+  MARIADB_PASSWORD: ${MARIADB_PASSWORD:-slurm}
+
+services:
+  init-keys:
+    image: slurmrest/base:24.05
+    command: ["/usr/local/bin/init-keys.sh"]
+    volumes:
+      - munge-key:/etc/munge
+      - jwt-key:/keys
+      - ./scripts/init-keys.sh:/usr/local/bin/init-keys.sh:ro
+    restart: "no"
+
+  mariadb:
+    image: mariadb:11
+    environment:
+      MARIADB_ROOT_PASSWORD: ${MARIADB_ROOT_PASSWORD:-rootpass}
+      MARIADB_DATABASE: slurm_acct_db
+      MARIADB_USER: slurm
+      MARIADB_PASSWORD: ${MARIADB_PASSWORD:-slurm}
+    volumes:
+      - mariadb-data:/var/lib/mysql
+    healthcheck:
+      test: ["CMD", "mariadb-admin", "ping", "-uslurm", 
"-p${MARIADB_PASSWORD:-slurm}"]
+      interval: 5s
+      timeout: 3s
+      retries: 20
+
+  slurmdbd:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.slurmdbd
+    hostname: slurmdbd
+    environment: *slurm-env
+    depends_on:
+      init-keys:
+        condition: service_completed_successfully
+      mariadb:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pgrep -x slurmdbd >/dev/null"]
+      interval: 5s
+      timeout: 3s
+      retries: 20
+
+  slurmctld:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.slurmctld
+    hostname: slurmctld
+    environment:
+      CLUSTER_NAME: ${CLUSTER_NAME:-artisan}
+    depends_on:
+      slurmdbd:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+      - slurmctld-state:/var/spool/slurm
+    healthcheck:
+      test: ["CMD-SHELL", "scontrol ping >/dev/null 2>&1"]
+      interval: 5s
+      timeout: 3s
+      retries: 30
+
+  c1:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.slurmd
+    hostname: c1
+    environment:
+      SLURMD_NODENAME: c1
+    depends_on:
+      slurmctld:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pgrep -x slurmd >/dev/null"]
+      interval: 5s
+      timeout: 3s
+      retries: 20
+
+  c2:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.slurmd
+    hostname: c2
+    environment:
+      SLURMD_NODENAME: c2
+    depends_on:
+      slurmctld:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pgrep -x slurmd >/dev/null"]
+      interval: 5s
+      timeout: 3s
+      retries: 20
+
+  login:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.login
+    hostname: login
+    ports:
+      - "${LOGIN_SSH_PORT:-2222}:22"
+    depends_on:
+      slurmctld:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+
+  slurmrestd:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.slurmrestd
+    hostname: slurmrestd
+    ports:
+      - "${REST_PORT:-6820}:6820"
+    depends_on:
+      slurmctld:
+        condition: service_healthy
+      slurmdbd:
+        condition: service_healthy
+    volumes:
+      - munge-key:/etc/munge:ro
+      - jwt-key:/keys:ro
+      - ./slurm:/etc/slurm.readonly:ro
+
+volumes:
+  munge-key:
+  jwt-key:
+  mariadb-data:
+  slurmctld-state:
diff --git a/dev-ops/local-slurm/docker/Dockerfile.base 
b/dev-ops/local-slurm/docker/Dockerfile.base
new file mode 100644
index 000000000..f252d76d5
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.base
@@ -0,0 +1,33 @@
+# docker/Dockerfile.base
+FROM rockylinux:9
+
+ARG SLURM_VERSION=24.05.5
+
+RUN dnf -y install epel-release \
+ && dnf -y install dnf-plugins-core \
+ && dnf config-manager --set-enabled crb \
+ && dnf -y install \
+      munge munge-libs munge-devel \
+      mariadb-connector-c mariadb-connector-c-devel \
+      http-parser-devel json-c-devel libyaml-devel libjwt-devel \
+      dbus-devel \
+      pam-devel readline-devel perl perl-Switch \
+      gcc make wget which procps-ng iproute bzip2 \
+      openssh-server openssh-clients \
+      python3 python3-pip \
+ && dnf clean all
+
+RUN useradd -r -u 995 -g 0 -s /sbin/nologin slurm \
+ && install -d -o slurm -g 0 -m 0755 /var/spool/slurm /var/log/slurm 
/var/run/slurm /etc/slurm
+
+RUN wget -q https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 \
+ && tar -xjf slurm-${SLURM_VERSION}.tar.bz2 \
+ && cd slurm-${SLURM_VERSION} \
+ && ./configure --prefix=/usr --sysconfdir=/etc/slurm \
+      --enable-slurmrestd \
+ && make -j"$(nproc)" && make install \
+ && cd .. && rm -rf slurm-${SLURM_VERSION}*
+
+RUN ssh-keygen -A
+
+CMD ["/bin/bash"]
diff --git a/dev-ops/local-slurm/docker/Dockerfile.login 
b/dev-ops/local-slurm/docker/Dockerfile.login
new file mode 100644
index 000000000..204834631
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.login
@@ -0,0 +1,11 @@
+FROM slurmrest/base:24.05
+
+RUN echo "PermitRootLogin yes"            >> /etc/ssh/sshd_config \
+ && echo "PasswordAuthentication yes"     >> /etc/ssh/sshd_config \
+ && echo "root:rootpass" | chpasswd
+
+COPY scripts/entrypoint-login.sh /usr/local/bin/entrypoint-login.sh
+RUN chmod +x /usr/local/bin/entrypoint-login.sh
+
+EXPOSE 22
+ENTRYPOINT ["/usr/local/bin/entrypoint-login.sh"]
diff --git a/dev-ops/local-slurm/docker/Dockerfile.slurmctld 
b/dev-ops/local-slurm/docker/Dockerfile.slurmctld
new file mode 100644
index 000000000..27aa426ee
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.slurmctld
@@ -0,0 +1,8 @@
+# docker/Dockerfile.slurmctld
+FROM slurmrest/base:24.05
+
+COPY scripts/entrypoint-ctld.sh /usr/local/bin/entrypoint-ctld.sh
+COPY scripts/bootstrap-accounting.sh /usr/local/bin/bootstrap-accounting.sh
+RUN chmod +x /usr/local/bin/entrypoint-ctld.sh 
/usr/local/bin/bootstrap-accounting.sh
+
+ENTRYPOINT ["/usr/local/bin/entrypoint-ctld.sh"]
diff --git a/dev-ops/local-slurm/docker/Dockerfile.slurmd 
b/dev-ops/local-slurm/docker/Dockerfile.slurmd
new file mode 100644
index 000000000..97ab1c857
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.slurmd
@@ -0,0 +1,6 @@
+FROM slurmrest/base:24.05
+
+COPY scripts/entrypoint-slurmd.sh /usr/local/bin/entrypoint-slurmd.sh
+RUN chmod +x /usr/local/bin/entrypoint-slurmd.sh
+
+ENTRYPOINT ["/usr/local/bin/entrypoint-slurmd.sh"]
diff --git a/dev-ops/local-slurm/docker/Dockerfile.slurmdbd 
b/dev-ops/local-slurm/docker/Dockerfile.slurmdbd
new file mode 100644
index 000000000..9b3060658
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.slurmdbd
@@ -0,0 +1,7 @@
+# docker/Dockerfile.slurmdbd
+FROM slurmrest/base:24.05
+
+COPY scripts/entrypoint-dbd.sh /usr/local/bin/entrypoint-dbd.sh
+RUN chmod +x /usr/local/bin/entrypoint-dbd.sh
+
+ENTRYPOINT ["/usr/local/bin/entrypoint-dbd.sh"]
diff --git a/dev-ops/local-slurm/docker/Dockerfile.slurmrestd 
b/dev-ops/local-slurm/docker/Dockerfile.slurmrestd
new file mode 100644
index 000000000..70bf9915e
--- /dev/null
+++ b/dev-ops/local-slurm/docker/Dockerfile.slurmrestd
@@ -0,0 +1,8 @@
+# docker/Dockerfile.slurmrestd
+FROM slurmrest/base:24.05
+
+COPY scripts/entrypoint-restd.sh /usr/local/bin/entrypoint-restd.sh
+RUN chmod +x /usr/local/bin/entrypoint-restd.sh
+
+EXPOSE 6820
+ENTRYPOINT ["/usr/local/bin/entrypoint-restd.sh"]
diff --git a/dev-ops/local-slurm/scripts/bootstrap-accounting.sh 
b/dev-ops/local-slurm/scripts/bootstrap-accounting.sh
new file mode 100755
index 000000000..a8494393c
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/bootstrap-accounting.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# Idempotent: creates the demo cluster, root account, root admin user.
+set -euo pipefail
+
+SENTINEL=/var/spool/slurm/ctld/.bootstrap-done
+
+if [[ -f "$SENTINEL" ]]; then
+  echo "[bootstrap] sentinel present, skipping"
+  exit 0
+fi
+
+# Wait until slurmdbd answers
+until sacctmgr -i show cluster >/dev/null 2>&1; do
+  echo "[bootstrap] waiting for slurmdbd..."
+  sleep 2
+done
+
+CLUSTER="${CLUSTER_NAME:-artisan}"
+if ! sacctmgr -in show cluster format=cluster | grep -qw "$CLUSTER"; then
+  sacctmgr -i add cluster "$CLUSTER"
+fi
+if ! sacctmgr -in show account format=account | grep -qw "root"; then
+  sacctmgr -i add account root Description="root account" 
Organization="$CLUSTER"
+fi
+if ! sacctmgr -in show user format=user | grep -qw "root"; then
+  sacctmgr -i add user root Account=root AdminLevel=Administrator
+fi
+
+touch "$SENTINEL"
+echo "[bootstrap] done"
diff --git a/dev-ops/local-slurm/scripts/entrypoint-ctld.sh 
b/dev-ops/local-slurm/scripts/entrypoint-ctld.sh
new file mode 100755
index 000000000..f4cec4ca4
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/entrypoint-ctld.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+install -m 0644 /etc/slurm.readonly/slurm.conf /etc/slurm/slurm.conf
+install -m 0644 /etc/slurm.readonly/cgroup.conf /etc/slurm/cgroup.conf
+ln -sf /keys/jwt.hs256.key /etc/slurm/jwt.hs256.key
+
+# Ensure StateSaveLocation exists (the slurmctld-state named volume is empty
+# on first boot; /var/spool/slurm itself is created by the base image).
+install -d -m 0755 -o slurm -g 0 /var/spool/slurm/ctld
+
+install -d -m 0755 -o munge -g munge /var/run/munge
+install -d -m 0700 -o munge -g munge /var/log/munge /var/lib/munge
+runuser -u munge -- /usr/sbin/munged --force
+
+# Bootstrap accounting in the background after slurmctld comes up
+( sleep 5; /usr/local/bin/bootstrap-accounting.sh ) &
+
+exec slurmctld -D -vvv
diff --git a/dev-ops/local-slurm/scripts/entrypoint-dbd.sh 
b/dev-ops/local-slurm/scripts/entrypoint-dbd.sh
new file mode 100755
index 000000000..40a7becd8
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/entrypoint-dbd.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Copy slurmdbd.conf with the 0600 perms that slurmdbd requires
+# (slurm user's primary group is root/gid 0 — there is no 'slurm' group)
+install -m 0600 -o slurm -g 0 /etc/slurm.readonly/slurmdbd.conf 
/etc/slurm/slurmdbd.conf
+ln -sf /keys/jwt.hs256.key /etc/slurm/jwt.hs256.key
+
+# Start munge. /var/run/munge must be world-readable (0755) so non-munge
+# users (slurm) can open the munge socket; /var/log and /var/lib stay 0700.
+install -d -m 0755 -o munge -g munge /var/run/munge
+install -d -m 0700 -o munge -g munge /var/log/munge /var/lib/munge
+runuser -u munge -- /usr/sbin/munged --force
+
+# Wait for MariaDB to accept TCP connections. The compose healthcheck on the
+# mariadb service already gates startup via depends_on, but this adds a
+# belt-and-suspenders TCP probe (base image has no mariadb client binary).
+until (exec 3<>/dev/tcp/mariadb/3306) 2>/dev/null; do
+  echo "[slurmdbd] waiting for mariadb..."
+  sleep 2
+done
+exec 3<&- 3>&- || true
+
+exec slurmdbd -D -vvv
diff --git a/dev-ops/local-slurm/scripts/entrypoint-login.sh 
b/dev-ops/local-slurm/scripts/entrypoint-login.sh
new file mode 100755
index 000000000..4f77e0ca5
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/entrypoint-login.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+install -m 0644 /etc/slurm.readonly/slurm.conf /etc/slurm/slurm.conf
+ln -sf /keys/jwt.hs256.key /etc/slurm/jwt.hs256.key
+
+install -d -m 0755 -o munge -g munge /var/run/munge
+install -d -m 0700 -o munge -g munge /var/log/munge /var/lib/munge
+runuser -u munge -- /usr/sbin/munged --force
+
+# Ensure the default test user exists
+id -u testuser >/dev/null 2>&1 || useradd -m -s /bin/bash testuser
+id -u testuser2 >/dev/null 2>&1 || useradd -m -s /bin/bash testuser2
+id -u testuser3 >/dev/null 2>&1 || useradd -m -s /bin/bash testuser3
+
+# Start sshd in the foreground
+exec /usr/sbin/sshd -D -e
diff --git a/dev-ops/local-slurm/scripts/entrypoint-restd.sh 
b/dev-ops/local-slurm/scripts/entrypoint-restd.sh
new file mode 100755
index 000000000..2060025b7
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/entrypoint-restd.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+install -m 0644 /etc/slurm.readonly/slurm.conf /etc/slurm/slurm.conf
+ln -sf /keys/jwt.hs256.key /etc/slurm/jwt.hs256.key
+
+install -d -m 0755 -o munge -g munge /var/run/munge
+install -d -m 0700 -o munge -g munge /var/log/munge /var/lib/munge
+runuser -u munge -- /usr/sbin/munged --force
+
+# slurmrestd must not run as root.
+# SLURM_JWT=daemon makes slurmrestd trust its own internal JWT for 
daemon-to-daemon calls;
+# external requests still require X-SLURM-USER-TOKEN.
+# SLURMRESTD_SECURITY flags:
+#   disable_unshare_sysv/files: Docker denies CLONE_NEWIPC without 
CAP_SYS_ADMIN,
+#     which we don't want to grant; skip those hardening steps.
+#   disable_user_check: the base image's 'slurm' user is slurm:0 (no slurm 
group
+#     exists), matching how slurmdbd/slurmctld already run. slurmrestd's 
default
+#     check rejects root primary group; we opt out since the daemon itself is 
not
+#     running as uid 0.
+exec runuser -u slurm -- env \
+  SLURM_JWT=daemon \
+  
SLURMRESTD_SECURITY=disable_unshare_sysv,disable_unshare_files,disable_user_check
 \
+  slurmrestd -f /etc/slurm/slurm.conf -a rest_auth/jwt 0.0.0.0:6820 -vvv
diff --git a/dev-ops/local-slurm/scripts/entrypoint-slurmd.sh 
b/dev-ops/local-slurm/scripts/entrypoint-slurmd.sh
new file mode 100755
index 000000000..15166d449
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/entrypoint-slurmd.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+install -m 0644 /etc/slurm.readonly/slurm.conf /etc/slurm/slurm.conf
+install -m 0644 /etc/slurm.readonly/cgroup.conf /etc/slurm/cgroup.conf
+install -m 0644 /etc/slurm.readonly/gres.conf /etc/slurm/gres.conf
+ln -sf /keys/jwt.hs256.key /etc/slurm/jwt.hs256.key
+
+# Ensure SlurmdSpoolDir exists (slurm.conf sets it to /var/spool/slurm/d;
+# /var/spool/slurm itself is created by the base image but the subdir is not).
+install -d -m 0755 -o slurm -g 0 /var/spool/slurm/d
+
+# Create two fake GPU device files so slurmd can register Gres=gpu:2 against
+# distinct File= entries. These are just /dev/null-style sinks — there are no
+# real GPUs. gres.conf references /dev/nullgpu0 and /dev/nullgpu1.
+for i in 0 1; do
+  [ -e "/dev/nullgpu${i}" ] || mknod -m 0666 "/dev/nullgpu${i}" c 1 3
+done
+
+install -d -m 0755 -o munge -g munge /var/run/munge
+install -d -m 0700 -o munge -g munge /var/log/munge /var/lib/munge
+runuser -u munge -- /usr/sbin/munged --force
+
+exec slurmd -D -N "${SLURMD_NODENAME:-$(hostname)}" -vvv
diff --git a/dev-ops/local-slurm/scripts/init-keys.sh 
b/dev-ops/local-slurm/scripts/init-keys.sh
new file mode 100755
index 000000000..5bfdac8c5
--- /dev/null
+++ b/dev-ops/local-slurm/scripts/init-keys.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+# scripts/init-keys.sh — generate munge + JWT keys into shared volumes if 
missing.
+set -euo pipefail
+
+MUNGE_KEY=/etc/munge/munge.key
+# jwt-key volume is mounted at /keys; each service symlinks into /etc/slurm/
+JWT_KEY=/keys/jwt.hs256.key
+
+if [[ ! -s "$MUNGE_KEY" ]]; then
+  echo "[init-keys] generating $MUNGE_KEY"
+  install -d -m 0700 -o munge -g munge /etc/munge
+  dd if=/dev/urandom of="$MUNGE_KEY" bs=1 count=1024 status=none
+  chown munge:munge "$MUNGE_KEY"
+  chmod 0400 "$MUNGE_KEY"
+else
+  echo "[init-keys] $MUNGE_KEY already present"
+fi
+
+if [[ ! -s "$JWT_KEY" ]]; then
+  echo "[init-keys] generating $JWT_KEY"
+  install -d -m 0755 /keys
+  openssl rand -base64 32 | tr -d '\n' > "$JWT_KEY"
+  # slurm user's primary group is root (gid 0) in the base image; chown 
accordingly
+  chown slurm:0 "$JWT_KEY"
+  chmod 0400 "$JWT_KEY"
+else
+  echo "[init-keys] $JWT_KEY already present"
+fi
+
+echo "[init-keys] done"
diff --git a/dev-ops/local-slurm/slurm/cgroup.conf 
b/dev-ops/local-slurm/slurm/cgroup.conf
new file mode 100644
index 000000000..e59e9aeea
--- /dev/null
+++ b/dev-ops/local-slurm/slurm/cgroup.conf
@@ -0,0 +1 @@
+CgroupPlugin=cgroup/v1
diff --git a/dev-ops/local-slurm/slurm/gres.conf 
b/dev-ops/local-slurm/slurm/gres.conf
new file mode 100644
index 000000000..52e0c7e41
--- /dev/null
+++ b/dev-ops/local-slurm/slurm/gres.conf
@@ -0,0 +1,2 @@
+Name=gpu File=/dev/nullgpu0
+Name=gpu File=/dev/nullgpu1
diff --git a/dev-ops/local-slurm/slurm/slurm.conf 
b/dev-ops/local-slurm/slurm/slurm.conf
new file mode 100644
index 000000000..49bd34e35
--- /dev/null
+++ b/dev-ops/local-slurm/slurm/slurm.conf
@@ -0,0 +1,38 @@
+# slurm/slurm.conf
+ClusterName=artisan
+SlurmctldHost=slurmctld
+
+AuthType=auth/munge
+AuthAltTypes=auth/jwt
+AuthAltParameters=jwt_key=/etc/slurm/jwt.hs256.key
+CredType=cred/munge
+
+SlurmUser=slurm
+SlurmctldPort=6817
+SlurmdPort=6818
+StateSaveLocation=/var/spool/slurm/ctld
+SlurmdSpoolDir=/var/spool/slurm/d
+SwitchType=switch/none
+MpiDefault=none
+ProctrackType=proctrack/linuxproc
+TaskPlugin=task/none
+ReturnToService=2
+SlurmdParameters=config_overrides
+
+SlurmctldLogFile=/var/log/slurm/slurmctld.log
+SlurmdLogFile=/var/log/slurm/slurmd.log
+
+SelectType=select/cons_tres
+SelectTypeParameters=CR_CPU_Memory
+GresTypes=gpu
+
+AccountingStorageType=accounting_storage/slurmdbd
+AccountingStorageHost=slurmdbd
+AccountingStorageTRES=gres/gpu
+AccountingStorageEnforce=associations,limits,qos,safe
+AccountingStoreFlags=job_comment
+JobAcctGatherType=jobacct_gather/linux
+JobAcctGatherFrequency=30
+
+NodeName=c[1-2] CPUs=4 RealMemory=8000 Gres=gpu:2 State=UNKNOWN
+PartitionName=compute Nodes=c[1-2] Default=YES MaxTime=INFINITE State=UP
diff --git a/dev-ops/local-slurm/slurm/slurmdbd.conf 
b/dev-ops/local-slurm/slurm/slurmdbd.conf
new file mode 100644
index 000000000..f34ea5429
--- /dev/null
+++ b/dev-ops/local-slurm/slurm/slurmdbd.conf
@@ -0,0 +1,18 @@
+# slurm/slurmdbd.conf — file mode must be 0600 when mounted
+AuthType=auth/munge
+AuthAltTypes=auth/jwt
+AuthAltParameters=jwt_key=/etc/slurm/jwt.hs256.key
+
+DbdHost=slurmdbd
+DbdPort=6819
+SlurmUser=slurm
+
+StorageType=accounting_storage/mysql
+StorageHost=mariadb
+StoragePort=3306
+StorageUser=slurm
+StoragePass=slurm
+StorageLoc=slurm_acct_db
+
+LogFile=/var/log/slurm/slurmdbd.log
+PidFile=/var/run/slurm/slurmdbd.pid

Reply via email to