This is an automated email from the ASF dual-hosted git repository.

ssulav pushed a commit to branch HDDS-14668
in repository https://gitbox.apache.org/repos/asf/ozone-installer.git


The following commit(s) were added to refs/heads/HDDS-14668 by this push:
     new 0367ef8  HDDS-14668. Add support for master and worker node options
0367ef8 is described below

commit 0367ef804a78f834c01ddf977de9f5928622886d
Author: Soumitra Sulav <[email protected]>
AuthorDate: Thu Feb 19 22:21:20 2026 +0530

    HDDS-14668. Add support for master and worker node options
---
 README.md          |  22 +++++--
 hosts.txt.example  |  24 +++++---
 ozone_installer.py | 177 +++++++++++++++++++++++++++++++++++++++--------------
 3 files changed, 165 insertions(+), 58 deletions(-)

diff --git a/README.md b/README.md
index 1905887..fb2dac7 100644
--- a/README.md
+++ b/README.md
@@ -116,7 +116,7 @@ python3 ozone_installer.py -H host1.domain -v 2.0.0
 # HA upstream (3+ hosts) - mode auto-detected
 python3 ozone_installer.py -H "host{1..3}.domain" -v 2.0.0
 
-# Using host file instead of CLI (one host per line, supports user@host:port 
format)
+# Host file with [masters] and [workers] sections (masters=SCM,OM,Recon; 
workers=Datanode,S3G)
 python3 ozone_installer.py -F hosts.txt -v 2.0.0
 
 # Local snapshot build
@@ -160,7 +160,21 @@ Add `ansible_python_interpreter=/usr/bin/python3.9` to 
each host line in your in
 
 ### Host file format
 
-When using `-F/--host-file`, create a text file with one host per line. See 
`hosts.txt.example` for a complete example.
+When using `-F/--host-file`, two formats are supported:
+
+**1) Master/worker split** – use `[masters]` and `[workers]` sections 
(INI-style). Masters run SCM, OM, Recon; workers run Datanode, S3G:
+```
+[masters]
+master1.domain
+master2.domain
+master3.domain
+
+[workers]
+worker1.domain
+worker2.domain
+```
+
+**2) All-in-one** – Plain list, one host per line. All hosts run datanode 
roles and first 3 runs OM/SCM. Supports `user@host:port` format.
 
 
 ### Interactive prompts and version selection
@@ -261,8 +275,8 @@ ANSIBLE_CONFIG=ansible.cfg ansible-playbook -i 
inventories/dev/hosts.ini playboo
 ## Components and config mapping
 
 - Components (per the Ozone docs): Ozone Manager (OM), Storage Container 
Manager (SCM), Datanodes (DN), and Recon. The installer maps:
-  - Non‑HA: first host runs OM+SCM+Recon; all hosts are DNs.
-  - HA: first three hosts serve as OM and SCM sets; all hosts are DNs; first 
host is Recon.
+  - **Master/worker mode** (host file with `[masters]` and `[workers]` 
sections): Masters run SCM, OM, Recon; workers run Datanode, S3G. HA requires 
3+ masters.
+  - **Legacy mode** (`-H`/`-F`): Non‑HA: first host runs OM+SCM+Recon; all 
hosts are DNs. HA: first three hosts serve as OM and SCM sets; all hosts are 
DNs; first host is Recon.
 - `ozone-site.xml` is rendered from templates based on inventory groups:
   - `ozone.scm.names`, `ozone.scm.client.address`, `ozone.om.address` or HA 
service IDs
   - `ozone.metadata.dirs`, `hdds.datanode.dir`, and related paths map to 
`data_base` (comma-separated dirs are expanded per property)
diff --git a/hosts.txt.example b/hosts.txt.example
index dcd13a8..f05a264 100644
--- a/hosts.txt.example
+++ b/hosts.txt.example
@@ -16,13 +16,23 @@
 # Example host file for ozone_installer.py
 # Usage: python3 ozone_installer.py -F hosts.txt.example -v 2.0.0
 #
-# Format: One host per line
-# Supports: user@host:port
-# Comments and empty lines are ignored
-
-# Simple hostname
-# host1.example.com
-
+# Two formats supported:
+#
+# 1) Master/worker: [masters] and [workers] sections (masters=SCM,OM,Recon; 
workers=Datanode,S3G)
+# 2) Plain list, one host per line (all hosts run datanodes and SCM/OM runs on 
first three hosts)
+#
+# Supports: user@host:port. Comments (#) and empty lines are ignored.
+#
+# --- Master/worker format example ---
+# [masters]
+# master1.example.com
+# master2.example.com
+# master3.example.com
+#
+# [workers]
+# worker1.example.com
+# worker2.example.com
+#
 # With SSH user
 # [email protected]
 
diff --git a/ozone_installer.py b/ozone_installer.py
index c044ac9..c3eaa19 100755
--- a/ozone_installer.py
+++ b/ozone_installer.py
@@ -79,7 +79,7 @@ def parse_args(argv: List[str]) -> argparse.Namespace:
         description="Ozone Ansible Installer (Python trigger) - mirrors bash 
installer flags"
     )
     p.add_argument("-H", "--host", help="Target host(s). Non-HA: host. HA: 
comma-separated or brace expansion host{1..n}")
-    p.add_argument("-F", "--host-file", help="File containing target hosts 
(one per line, supports @, : for user/port)")
+    p.add_argument("-F", "--host-file", help="Host file. Plain list = 
all-in-one. Use [masters] and [workers] sections for master/worker split")
     p.add_argument("-m", "--auth-method", choices=["password", "key"], 
default=None)
     p.add_argument("-p", "--password", help="SSH password (for 
--auth-method=password)")
     p.add_argument("-k", "--keyfile", help="SSH private key file (for 
--auth-method=key)")
@@ -310,57 +310,97 @@ def parse_hosts(hosts_raw: Optional[str]) -> List[dict]:
             out.append({"host": host, "user": user, "port": port})
     return out
 
-def read_hosts_from_file(filepath: str) -> Optional[str]:
+def read_hosts_from_file(filepath: str) -> Tuple[Optional[str], Optional[str]]:
     """
-    Reads hosts from a file (one host per line).
-    Lines starting with # are treated as comments and ignored.
-    Empty lines are ignored.
-    Supports same format as CLI: user@host:port
-    Returns comma-separated host string suitable for parse_hosts().
+    Reads hosts from a file.
+
+    Two formats supported:
+    1) Master/worker: [masters] and [workers] sections (INI-style). Returns 
(masters_csv, workers_csv).
+    2) Legacy: plain list, one host per line. Returns (hosts_csv, None).
+
+    Lines starting with # are comments. Empty lines ignored. Supports 
user@host:port.
     """
     logger = get_logger()
     try:
         path = Path(filepath)
         if not path.exists():
             logger.error(f"Host file not found: {filepath}")
-            return None
-        hosts = []
+            return (None, None)
+        masters: List[str] = []
+        workers: List[str] = []
+        flat: List[str] = []
+        current_section: Optional[str] = None
         with path.open('r') as f:
             for line in f:
                 line = line.strip()
-                # Skip empty lines and comments
                 if not line or line.startswith('#'):
                     continue
-                hosts.append(line)
-        if hosts:
-            logger.info(f"Read {len(hosts)} host(s) from {filepath}")
-            return ','.join(hosts)
-        else:
-            logger.error(f"No valid hosts found in {filepath}")
-            return None
+                if line.startswith('[') and line.endswith(']'):
+                    current_section = line[1:-1].lower()
+                    continue
+                if current_section == "masters":
+                    masters.append(line)
+                elif current_section == "workers":
+                    workers.append(line)
+                elif current_section is None:
+                    flat.append(line)
+        if masters and workers:
+            logger.info(f"Read {len(masters)} master(s) and {len(workers)} 
worker(s) from {filepath}")
+            return (','.join(masters), ','.join(workers))
+        if flat:
+            logger.info(f"Read {len(flat)} host(s) from {filepath}")
+            return (','.join(flat), None)
+        logger.error(f"No valid hosts found in {filepath}")
+        return (None, None)
     except Exception as e:
         logger.error(f"Error reading host file {filepath}: {e}")
-        return None
+        return (None, None)
 
-def auto_cluster_mode(hosts: List[dict], forced: Optional[str] = None) -> str:
+def auto_cluster_mode(hosts: List[dict], forced: Optional[str] = None, 
master_count: Optional[int] = None) -> str:
     if forced in ("non-ha", "ha"):
         return forced
-    return "ha" if len(hosts) >= 3 else "non-ha"
-
-def build_inventory(hosts: List[dict], ssh_user: Optional[str] = None, 
keyfile: Optional[str] = None, password: Optional[str] = None, cluster_mode: 
str = "non-ha", python_interpreter: Optional[str] = None) -> str:
+    n = master_count if master_count is not None else len(hosts)
+    return "ha" if n >= 3 else "non-ha"
+
+def build_inventory(
+    hosts: Optional[List[dict]] = None,
+    master_hosts: Optional[List[dict]] = None,
+    worker_hosts: Optional[List[dict]] = None,
+    ssh_user: Optional[str] = None,
+    keyfile: Optional[str] = None,
+    password: Optional[str] = None,
+    cluster_mode: str = "non-ha",
+    python_interpreter: Optional[str] = None,
+) -> str:
     """
     Returns INI inventory text for our groups: [om], [scm], [datanodes], 
[recon], [s3g]
+
+    Either (hosts) for all-in-one, or (master_hosts, worker_hosts) for 
master/worker split.
+    Masters run SCM, OM, Recon. Workers run Datanode, S3G.
     """
+    use_master_worker = master_hosts is not None and worker_hosts is not None
+    if use_master_worker:
+        if not master_hosts or not worker_hosts:
+            return ""
+        # Master/worker: masters -> OM, SCM, Recon; workers -> Datanodes, S3G
+        om = master_hosts[:3] if cluster_mode == "ha" and len(master_hosts) >= 
3 else master_hosts[:1]
+        scm = master_hosts[:3] if cluster_mode == "ha" and len(master_hosts) 
>= 3 else master_hosts[:1]
+        recon = [master_hosts[0]]
+        dn = worker_hosts
+        s3g = [worker_hosts[0]] if worker_hosts else []
+        return _render_inv_groups(
+            om=om, scm=scm, dn=dn, recon=recon, s3g=s3g,
+            ssh_user=ssh_user, keyfile=keyfile, password=password, 
python_interpreter=python_interpreter
+        )
+    # Legacy: single host list, all roles derived from it
     if not hosts:
         return ""
-    # Non-HA mapping: OM/SCM on first host; all hosts as datanodes; recon on 
first
     if cluster_mode == "non-ha":
         h = hosts[0]
         return _render_inv_groups(
             om=[h], scm=[h], dn=hosts, recon=[h], s3g=[h],
             ssh_user=ssh_user, keyfile=keyfile, password=password, 
python_interpreter=python_interpreter
         )
-    # HA: first 3 go to OM and SCM; all to datanodes; recon is first if present
     om = hosts[:3] if len(hosts) >= 3 else hosts
     scm = hosts[:3] if len(hosts) >= 3 else hosts
     dn = hosts
@@ -449,22 +489,41 @@ def main(argv: List[str]) -> int:
         except Exception:
             last_cfg = None
 
-    # Gather inputs interactively where missing
-    hosts_raw_default = (last_cfg.get("hosts_raw") if last_cfg else None)
-    # Check if hosts are provided via file first, then CLI, then default/prompt
-    if args.host_file:
-        hosts_raw = read_hosts_from_file(args.host_file)
-        if not hosts_raw:
+    # Gather inputs: from host file ([masters]/[workers] sections) or -H 
(legacy)
+    masters_raw = None
+    workers_raw = None
+    hosts_raw = None
+    master_hosts: List[dict] = []
+    worker_hosts: List[dict] = []
+    hosts: List[dict] = []
+    host_file_path = args.host_file or (last_cfg.get("host_file") if last_cfg 
else None)
+
+    if host_file_path:
+        file_masters, file_workers = read_hosts_from_file(host_file_path)
+        if file_masters is None and file_workers is None:
             logger = get_logger()
-            logger.error(f"Error: Could not read hosts from file: 
{args.host_file}")
+            logger.error(f"Error: Could not read hosts from file: 
{host_file_path}")
             return 2
+        if file_workers is not None:
+            # File has [masters] and [workers] sections
+            masters_raw = file_masters
+            workers_raw = file_workers
+            master_hosts = parse_hosts(masters_raw) if masters_raw else []
+            worker_hosts = parse_hosts(workers_raw) if workers_raw else []
+        else:
+            # Legacy: plain host list
+            hosts_raw = file_masters
+            hosts = parse_hosts(hosts_raw) if hosts_raw else []
     else:
+        hosts_raw_default = (last_cfg.get("hosts_raw") if last_cfg else None)
         hosts_raw = args.host or hosts_raw_default or prompt("Target host(s) 
[non-ha: host | HA: h1,h2,h3 or brace expansion]", default="", yes_mode=yes)
-    hosts = parse_hosts(hosts_raw) if hosts_raw else []
-    # Initialize per-run logger as soon as we have hosts_raw
+        hosts = parse_hosts(hosts_raw) if hosts_raw else []
+
+    use_master_worker = bool(masters_raw is not None and workers_raw is not 
None)
+    # Initialize per-run logger as soon as we have host info
     try:
         ts = datetime.now().strftime("%Y%m%d-%H%M%S")
-        raw_hosts_for_name = (hosts_raw or "").strip()
+        raw_hosts_for_name = (hosts_raw or masters_raw or workers_raw or 
"").strip()
         safe_hosts = re.sub(r"[^A-Za-z0-9_.-]+", "-", raw_hosts_for_name)[:80] 
or "hosts"
         run_log_path = LOGS_DIR / f"ansible-{ts}-{safe_hosts}.log"
         logger = get_logger(run_log_path)
@@ -474,23 +533,29 @@ def main(argv: List[str]) -> int:
         logger = get_logger(run_log_path)
         logger.info(f"Logging to: {run_log_path} (fallback)")
 
-    if not hosts:
-        logger.error("Error: No hosts provided (-H/--host or -F/--host-file).")
-        return 2
-    # Decide HA vs Non-HA with user input; default depends on host count
+    if use_master_worker:
+        if not master_hosts or not worker_hosts:
+            logger.error("Error: Host file must have both [masters] and 
[workers] sections with at least one host each.")
+            return 2
+    else:
+        if not hosts:
+            logger.error("Error: No hosts provided (-H/--host or 
-F/--host-file).")
+            return 2
+    # Decide HA vs Non-HA with user input; default depends on master count
+    master_count = len(master_hosts) if use_master_worker else len(hosts)
     resume_cluster_mode = (last_cfg.get("cluster_mode") if last_cfg else None)
     if args.cluster_mode:
         cluster_mode = args.cluster_mode
     elif resume_cluster_mode:
         cluster_mode = resume_cluster_mode
     else:
-        default_mode = "ha" if len(hosts) >= 3 else "non-ha"
+        default_mode = auto_cluster_mode(hosts or [], 
master_count=master_count)
         selected = prompt("Deployment type (option: ha or non-ha)", 
default=default_mode, yes_mode=yes)
         cluster_mode = (selected or default_mode).strip().lower()
         if cluster_mode not in ("ha", "non-ha"):
             cluster_mode = default_mode
-    if cluster_mode == "ha" and len(hosts) < 3:
-        logger.error("Error: HA requires at least 3 hosts (to map 3 OMs and 3 
SCMs).")
+    if cluster_mode == "ha" and master_count < 3:
+        logger.error("Error: HA requires at least 3 master hosts (to map 3 OMs 
and 3 SCMs).")
         return 2
 
     # Resolve download base early for version selection
@@ -583,7 +648,11 @@ def main(argv: List[str]) -> int:
         local_path = str(candidate)
 
     # Build a human-friendly summary table of inputs before continuing
-    host_list_display = str(hosts_raw or "")
+    host_list_display = (
+        f"Masters: {masters_raw or ''} | Workers: {workers_raw or ''}"
+        if use_master_worker
+        else str(hosts_raw or "")
+    )
     summary_rows: List[Tuple[str, str]] = [
         ("Hosts", host_list_display),
         ("Cluster mode", cluster_mode),
@@ -614,8 +683,17 @@ def main(argv: List[str]) -> int:
         logger.info("Python interpreter will be auto-detected by playbook")
     
     # Prepare dynamic inventory and extra-vars
-    inventory_text = build_inventory(hosts, ssh_user=ssh_user, 
keyfile=keyfile, password=password,
-                                     cluster_mode=cluster_mode, 
python_interpreter=python_interpreter)
+    if use_master_worker:
+        inventory_text = build_inventory(
+            master_hosts=master_hosts, worker_hosts=worker_hosts,
+            ssh_user=ssh_user, keyfile=keyfile, password=password,
+            cluster_mode=cluster_mode, python_interpreter=python_interpreter
+        )
+    else:
+        inventory_text = build_inventory(
+            hosts=hosts, ssh_user=ssh_user, keyfile=keyfile, password=password,
+            cluster_mode=cluster_mode, python_interpreter=python_interpreter
+        )
     # Decide cleanup behavior up-front (so we can pass it into the unified 
play)
     do_cleanup = False
     if args.clean:
@@ -670,7 +748,8 @@ def main(argv: List[str]) -> int:
             inv_path = persisted_inv
             ev_path = persisted_ev
             # Save effective simple config for future resume
-            LAST_RUN_FILE.write_text(json.dumps({
+            last_run = {
+                "host_file": host_file_path if host_file_path else None,
                 "hosts_raw": hosts_raw,
                 "cluster_mode": cluster_mode,
                 "ozone_version": ozone_version,
@@ -689,7 +768,11 @@ def main(argv: List[str]) -> int:
                 "local_shared_path": local_shared_path or "",
                 "local_ozone_dirname": local_oz_dir or "",
                 "python_interpreter": python_interpreter or "",
-            }, indent=2), encoding="utf-8")
+            }
+            if use_master_worker:
+                last_run["masters_raw"] = masters_raw
+                last_run["workers_raw"] = workers_raw
+            LAST_RUN_FILE.write_text(json.dumps(last_run, indent=2), 
encoding="utf-8")
         except Exception:
             # Fall back to temp files if persisting fails
             pass
@@ -731,7 +814,7 @@ def main(argv: List[str]) -> int:
             pass
 
         try:
-            example_host = hosts[0]["host"] if hosts else "HOSTNAME"
+            example_host = (master_hosts[0]["host"] if use_master_worker and 
master_hosts else hosts[0]["host"] if hosts else "HOSTNAME")
             logger.info(f"To view process logs: ssh to the node and read 
{install_base}/current/logs/ozone-{service_user}-<process>-<host>.log "
                         f"(e.g., 
{install_base}/current/logs/ozone-{service_user}-recon-{example_host}.log)")
         except Exception:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to