This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/couchdb-infra-cm.git
The following commit(s) were added to refs/heads/main by this push:
new b6c0fc1 Switch to having agents dial into Jenkins
b6c0fc1 is described below
commit b6c0fc120d3b39aa7ad2152cdb49d56c8978d1ad
Author: Paul J. Davis <[email protected]>
AuthorDate: Tue Jan 7 15:41:55 2020 -0600
Switch to having agents dial into Jenkins
This changes the configuration of Jenkins agents to dial into the
Jenkins master instead of having Jenkins SSH to each node. This allows
us to expand our private cloud worker pool much further.
---
README.md | 50 ++++++++---
ansible.cfg | 4 +
bastions.yml | 4 +
group_vars/{ci_agents.yaml => bastions.yml} | 0
group_vars/ci_agents.yml | 2 +
host_vars/10.240.0.10.yml | 2 +
host_vars/10.240.0.14.yml | 1 +
host_vars/10.240.0.15.yml | 1 +
host_vars/10.240.0.16.yml | 1 +
host_vars/10.240.0.4.yml | 1 +
host_vars/10.240.0.5.yml | 1 +
host_vars/10.240.0.6.yml | 1 +
host_vars/10.240.0.9.yml | 2 +
production | 134 ++++++++++++++++++++++------
roles/ci_agent/files/runit-logs | 2 +
roles/ci_agent/tasks/main.yml | 41 +++++++--
roles/ci_agent/templates/runit-main.j2 | 6 ++
ssh.cfg | 13 +++
tools/gen-inventory | 51 ++++++++++-
19 files changed, 269 insertions(+), 48 deletions(-)
diff --git a/README.md b/README.md
index 5094d0b..c1321b1 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,27 @@ Setup
$ source venv/bin/activate
$ pip install -r requirements.txt
+Provisioning VMs
+---
+
+Our main workhorse is the cx2-4x8 instance type. There are also a few
+ppc64le nodes for doing full builds as well. Whoever provisions a VM should
+make sure to generate a new inventory as well as perform the first Ansible
+run against the new node so that other CouchDB infra members will have access.
+
+
+Bastion VMs
+---
+
+There should be a single bastion VM setup for each subnet. We just use the
+cheapest cx2-2x4 instance for these nodes so that we can jump to the other
+hosts.
+
+If the bastion changes public IP addresses we have to update
`group_vars/ci_agents.yml`
+and set the `ansible_ssh_common_args` to use the new public IP for contacting
+servers. We should also update `ssh.cfg` in this repository to make it easier
+for contacting servers manually.
+
Generating Inventory Listings
---
@@ -24,23 +45,28 @@ inventory file:
$ ./tools/gen-inventory > production
-Setting up CI workers for Jenkins
+
+Configuring Jenkins
---
-Once the a new VM has been added into the `production` inventory whoever
-provisioned the VM will need to execute the first Ansible run so that
-the CouchDB infra group has access (where infra group is defined as
-the list of GitHub users in `roles/common/tasks/main.yml`).
+Once a CI worker has been provisioned we must also configure Jenkins to have
+the JAR url and secret ready. The easiest approach here is to just copy the
+existing configuration from one of the existing nodes. When viewing the
+conifguration page we then dump the secret value into an encrypted vault
+file in the `host_vars` directory.
- $ ansible-playbook -i production ci_agents.yml
-Once this playbook finishes the new VM should be configured to be usable
-as a Jenkins agent.
+Running Ansible
+---
+
+ $ ansible-playbook -i production ci_agents.yml
-Configuring Jenkins
+Useful Commands:
---
-Once Ansible has run against a new VM configuring it as an agent in
-Jenkins is fairly straightforward. You can just copy an existing node's
-configuration and update the SSH host IP address.
\ No newline at end of file
+If you want to ssh directly to a node, you can do:
+
+```bash
+$ ssh -F ssh.cfg $private_ip
+```
diff --git a/ansible.cfg b/ansible.cfg
index 472e641..d5122a1 100644
--- a/ansible.cfg
+++ b/ansible.cfg
@@ -1,3 +1,7 @@
[defaults]
inventory = ./production
vault_password_file = ~/.couchdb-ansible-vault
+
+[ssh_connection]
+ssh_args = -F ./ssh.cfg
+control_path = ~/.ssh/ansible-%%r@%%h:%%p
diff --git a/bastions.yml b/bastions.yml
new file mode 100644
index 0000000..dbaed03
--- /dev/null
+++ b/bastions.yml
@@ -0,0 +1,4 @@
+---
+- hosts: bastions
+ roles:
+ - common
diff --git a/group_vars/ci_agents.yaml b/group_vars/bastions.yml
similarity index 100%
rename from group_vars/ci_agents.yaml
rename to group_vars/bastions.yml
diff --git a/group_vars/ci_agents.yml b/group_vars/ci_agents.yml
new file mode 100644
index 0000000..f77d855
--- /dev/null
+++ b/group_vars/ci_agents.yml
@@ -0,0 +1,2 @@
+ansible_ssh_user: root
+ansible_ssh_common_args: -o StrictHostKeyChecking=no -o ProxyCommand="ssh -W
%h:%p -q [email protected]"
\ No newline at end of file
diff --git a/host_vars/10.240.0.10.yml b/host_vars/10.240.0.10.yml
new file mode 100644
index 0000000..7940cd6
--- /dev/null
+++ b/host_vars/10.240.0.10.yml
@@ -0,0 +1,2 @@
+jenkins_node_id: couchdb-ci-worker-dal-1-5
+jenkins_secret:
6cbf9efc3299c72612a283bea390032cd4ad04d11b8251d4ef1f954e2bf8937f
\ No newline at end of file
diff --git a/host_vars/10.240.0.14.yml b/host_vars/10.240.0.14.yml
new file mode 100644
index 0000000..f884c26
--- /dev/null
+++ b/host_vars/10.240.0.14.yml
@@ -0,0 +1 @@
+jenkins_secret:
9914f48c219a7c37197946f66786ab803e994548bd77d28e624a2230f236dfb5
\ No newline at end of file
diff --git a/host_vars/10.240.0.15.yml b/host_vars/10.240.0.15.yml
new file mode 100644
index 0000000..20eab39
--- /dev/null
+++ b/host_vars/10.240.0.15.yml
@@ -0,0 +1 @@
+jenkins_secret:
b50a842ef90be46a0fb85054d663e06890a31cb9b3a39d6379aeaca2a0589160
\ No newline at end of file
diff --git a/host_vars/10.240.0.16.yml b/host_vars/10.240.0.16.yml
new file mode 100644
index 0000000..43fd2f2
--- /dev/null
+++ b/host_vars/10.240.0.16.yml
@@ -0,0 +1 @@
+jenkins_secret:
f617d42eab78f0b40bf52263755114e665a387796cad1e0581e1e4ca24158d45
\ No newline at end of file
diff --git a/host_vars/10.240.0.4.yml b/host_vars/10.240.0.4.yml
new file mode 100644
index 0000000..11bb995
--- /dev/null
+++ b/host_vars/10.240.0.4.yml
@@ -0,0 +1 @@
+jenkins_secret:
f069d36923c9ae8feee7a21e58dca22610ee71b9128a09d5b36438a8eaaef8a4
\ No newline at end of file
diff --git a/host_vars/10.240.0.5.yml b/host_vars/10.240.0.5.yml
new file mode 100644
index 0000000..27724fa
--- /dev/null
+++ b/host_vars/10.240.0.5.yml
@@ -0,0 +1 @@
+jenkins_secret:
4c794498ddea4aa59e99cab39c358e5353a3eff358773ed9cb315c426bf94d81
\ No newline at end of file
diff --git a/host_vars/10.240.0.6.yml b/host_vars/10.240.0.6.yml
new file mode 100644
index 0000000..7a624cc
--- /dev/null
+++ b/host_vars/10.240.0.6.yml
@@ -0,0 +1 @@
+jenkins_secret:
d3cb5c0c4065cebb5f14073cb2748a2aec138e812bb7f4f2bdd6e4f3cf77b7a5
\ No newline at end of file
diff --git a/host_vars/10.240.0.9.yml b/host_vars/10.240.0.9.yml
new file mode 100644
index 0000000..33511b7
--- /dev/null
+++ b/host_vars/10.240.0.9.yml
@@ -0,0 +1,2 @@
+jenkins_node_id: couchdb-ci-worker-dal-1-4
+jenkins_secret:
0beff7b7ee96a7d2106b5d9ef921cf1e71cd6eff5c924edd49b0141ab702a0b7
\ No newline at end of file
diff --git a/production b/production
index 31efd3c..6134f11 100644
--- a/production
+++ b/production
@@ -1,27 +1,105 @@
all:
children:
+ bastions:
+ hosts:
+ 169.48.153.153:
+ boot_volume:
+ device: 0717-24ba0f68-404a-4f68-82c8-0e885fc3e759-nx629
+ name: couchdb-ci-bastion-dal-1-1-boot
+ instance:
+ created_at: '2020-01-07T18:38:33Z'
+ id: 0717_5ecb1169-95ac-465b-a505-d172093972d1
+ name: couchdb-ci-bastion-dal-1-1
+ profile: cx2-2x4
+ subnet: couchdb-ci-farm-dal-1
+ vpc: couchdb-ci-farm-vpc
+ zone: us-south-1
+ ip_addrs:
+ private: 10.240.0.11
+ public: 169.48.153.153
+ system:
+ arch: amd64
+ num_cpus: 2
+ ram: 4
ci_agents:
hosts:
- 169.48.153.210:
+ 10.240.0.10:
boot_volume:
- device: 0717_5afac964-7ec6-4dad-a84d-b09b4d992949-vgqqr
- name: couchdb-ci-worker-dal-1-2-boot
+ device: 0717-3de36e3f-40ab-49f6-b757-181f07e0ebf2-2mg2b
+ name: couchdb-ci-worker-dal-1-5-boot
instance:
- created_at: '2019-12-11T16:51:02Z'
- id: 0717_d97c67df-1f04-41f8-9461-9b1d5721e408
- name: couchdb-ci-worker-dal-1-2
+ created_at: '2020-01-07T17:53:40Z'
+ id: 0717_37a9351f-99a9-484d-aec5-c0da940c2e29
+ name: couchdb-ci-worker-dal-1-5
profile: cx2-4x8
subnet: couchdb-ci-farm-dal-1
vpc: couchdb-ci-farm-vpc
zone: us-south-1
ip_addrs:
- private: 10.240.0.5
- public: 169.48.153.210
+ private: 10.240.0.10
+ public: null
+ system:
+ arch: amd64
+ num_cpus: 4
+ ram: 8
+ 10.240.0.14:
+ boot_volume:
+ device: 0717-2f6e67ea-d065-4ea0-92cb-5abc75070994-x9ntk
+ name: couchdb-ci-worker-dal-1-6-boot
+ instance:
+ created_at: '2020-01-07T21:03:39Z'
+ id: 0717_001ae386-bf78-4d1b-bde5-9bddd5de9089
+ name: couchdb-ci-worker-dal-1-6
+ profile: cx2-4x8
+ subnet: couchdb-ci-farm-dal-1
+ vpc: couchdb-ci-farm-vpc
+ zone: us-south-1
+ ip_addrs:
+ private: 10.240.0.14
+ public: null
+ system:
+ arch: amd64
+ num_cpus: 4
+ ram: 8
+ 10.240.0.15:
+ boot_volume:
+ device: 0717-87fed9c8-4f01-4ef3-92fb-67e7b9751a9f-zjjms
+ name: couchdb-ci-worker-dal-1-7-boot
+ instance:
+ created_at: '2020-01-07T21:04:06Z'
+ id: 0717_8455adf5-78bc-466f-ad18-44ce6988576d
+ name: couchdb-ci-worker-dal-1-7
+ profile: cx2-4x8
+ subnet: couchdb-ci-farm-dal-1
+ vpc: couchdb-ci-farm-vpc
+ zone: us-south-1
+ ip_addrs:
+ private: 10.240.0.15
+ public: null
system:
arch: amd64
num_cpus: 4
ram: 8
- 169.48.153.7:
+ 10.240.0.16:
+ boot_volume:
+ device: 0717-1bde8488-3508-4824-9526-6c2e48c193b0-tfszz
+ name: couchdb-ci-worker-dal-1-8-boot
+ instance:
+ created_at: '2020-01-07T21:04:49Z'
+ id: 0717_e00b3214-e4f7-426e-b644-b40ae1c3fa79
+ name: couchdb-ci-worker-dal-1-8
+ profile: cx2-4x8
+ subnet: couchdb-ci-farm-dal-1
+ vpc: couchdb-ci-farm-vpc
+ zone: us-south-1
+ ip_addrs:
+ private: 10.240.0.16
+ public: null
+ system:
+ arch: amd64
+ num_cpus: 4
+ ram: 8
+ 10.240.0.4:
boot_volume:
device: 0717_72564344-27ce-4e79-91d8-aacfaba35421-vv2gd
name: couchdb-ci-worker-dal-1-1-boot
@@ -35,31 +113,31 @@ all:
zone: us-south-1
ip_addrs:
private: 10.240.0.4
- public: 169.48.153.7
+ public: null
system:
arch: amd64
num_cpus: 4
ram: 8
- 169.48.154.118:
+ 10.240.0.5:
boot_volume:
- device: 0717_4abf905c-b565-4537-a4f3-b9e365d945ed-tbfg5
- name: couchdb-ci-worker-dal-1-4-boot
+ device: 0717_5afac964-7ec6-4dad-a84d-b09b4d992949-vgqqr
+ name: couchdb-ci-worker-dal-1-2-boot
instance:
- created_at: '2019-12-11T16:51:39Z'
- id: 0717_c4b21ff3-96e9-45a5-a77c-a90d6ac723dc
- name: couchdb-ci-worker-dal-1-4
+ created_at: '2019-12-11T16:51:02Z'
+ id: 0717_d97c67df-1f04-41f8-9461-9b1d5721e408
+ name: couchdb-ci-worker-dal-1-2
profile: cx2-4x8
subnet: couchdb-ci-farm-dal-1
vpc: couchdb-ci-farm-vpc
zone: us-south-1
ip_addrs:
- private: 10.240.0.7
- public: 169.48.154.118
+ private: 10.240.0.5
+ public: null
system:
arch: amd64
num_cpus: 4
ram: 8
- 169.48.154.14:
+ 10.240.0.6:
boot_volume:
device: 0717_f51ebb9c-5081-47f0-bbf9-07a1b1ba5e73-nwzzg
name: couchdb-ci-worker-dal-1-3-boot
@@ -73,26 +151,26 @@ all:
zone: us-south-1
ip_addrs:
private: 10.240.0.6
- public: 169.48.154.14
+ public: null
system:
arch: amd64
num_cpus: 4
ram: 8
- 169.48.154.35:
+ 10.240.0.9:
boot_volume:
- device: 0717_1a5c43f9-a22a-4258-9514-13703dfc5fb0-wkn8z
- name: couchdb-ci-worker-dal-1-5-boot
+ device: 0717-cd555806-1455-4329-8f77-d2bbccaa2352-s2zmh
+ name: couchdb-ci-worker-dal-1-4-boot
instance:
- created_at: '2019-12-11T16:51:55Z'
- id: 0717_e4857481-a79e-4848-a1c5-38e2577f815c
- name: couchdb-ci-worker-dal-1-5
+ created_at: '2020-01-07T17:53:05Z'
+ id: 0717_e8cb32f9-4861-48be-b22d-2b20d6e23b79
+ name: couchdb-ci-worker-dal-1-4
profile: cx2-4x8
subnet: couchdb-ci-farm-dal-1
vpc: couchdb-ci-farm-vpc
zone: us-south-1
ip_addrs:
- private: 10.240.0.8
- public: 169.48.154.35
+ private: 10.240.0.9
+ public: null
system:
arch: amd64
num_cpus: 4
diff --git a/roles/ci_agent/files/runit-logs b/roles/ci_agent/files/runit-logs
new file mode 100644
index 0000000..3195b01
--- /dev/null
+++ b/roles/ci_agent/files/runit-logs
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec chpst svlogd -tt ./main
diff --git a/roles/ci_agent/tasks/main.yml b/roles/ci_agent/tasks/main.yml
index 055160f..6d8805d 100644
--- a/roles/ci_agent/tasks/main.yml
+++ b/roles/ci_agent/tasks/main.yml
@@ -58,12 +58,6 @@
state: present
shell: /bin/bash
-- name: Add Apache Infra ssh key
- become: yes
- authorized_key:
- user: jenkins
- key: ssh-rsa
AAAAB3NzaC1yc2EAAAABIwAAAIEAtxkcKDiPh1OaVzaVdc80daKq2sRy8aAgt8u2uEcLClzMrnv/g19db7XVggfT4+HPCqcbFbO3mtVnUnWWtuSEpDjqriWnEcSj2G1P53zsdKEu9qCGLmEFMgwcq8b5plv78PRdAQn09WCBI1QrNMypjxgCKhNNn45WqV4AD8Jp7/8=
-
- name: Install kill-old-docker.sh
become: yes
copy:
@@ -84,3 +78,38 @@
name: Docker prune
hour: '19'
job: /usr/bin/docker system prune -a -f --filter "until=72h"
+
+- name: Create Jenkins runit service directory
+ become: yes
+ file:
+ path: /etc/sv/jenkins
+ state: directory
+
+- name: Create Jenkins runit log directory
+ become: yes
+ file:
+ path: /etc/sv/jenkins/log/main
+ state: directory
+
+- name : Create Jenkins runit run script
+ become: yes
+ template:
+ src: runit-main.j2
+ dest: /etc/sv/jenkins/run
+ mode: 0755
+
+- name: Create Jenkins runit logs run script
+ become: yes
+ copy:
+ src: runit-logs
+ dest: /etc/sv/jenkins/log/run
+ mode: 0755
+
+- name: Enable Jenkins runit service
+ become: yes
+ file:
+ src: /etc/sv/jenkins
+ dest: /etc/service/jenkins
+ state: link
+
+
diff --git a/roles/ci_agent/templates/runit-main.j2
b/roles/ci_agent/templates/runit-main.j2
new file mode 100644
index 0000000..27e62e5
--- /dev/null
+++ b/roles/ci_agent/templates/runit-main.j2
@@ -0,0 +1,6 @@
+#!/bin/sh
+exec 2>&1
+cd /home/jenkins
+curl https://ci-couchdb.apache.org/jnlpJars/agent.jar --output
/home/jenkins/agent.jar
+chown jenkins:jenkins /home/jenkins/agent.jar
+exec chpst -u jenkins:jenkins:docker java -jar agent.jar -jnlpUrl
https://ci-couchdb.apache.org/computer/{{
hostvars[inventory_hostname]["instance"]["name"] }}/slave-agent.jnlp -secret {{
jenkins_secret }} -workDir "/home/jenkins"
diff --git a/ssh.cfg b/ssh.cfg
new file mode 100644
index 0000000..c408c8b
--- /dev/null
+++ b/ssh.cfg
@@ -0,0 +1,13 @@
+Host 169.48.153.7
+ Hostname 169.48.153.7
+ User root
+ ForwardAgent yes
+ ControlMaster auto
+ ControlPath ~/.ssh/ansible-%r@%h:%p
+ ControlPersist 30m
+
+
+Host 10.240.0.*
+ User root
+ StrictHostKeyChecking no
+ ProxyCommand /usr/bin/ssh -W %h:%p -q [email protected]
diff --git a/tools/gen-inventory b/tools/gen-inventory
index 95e9abb..8160b63 100755
--- a/tools/gen-inventory
+++ b/tools/gen-inventory
@@ -89,15 +89,54 @@ def load_ci_agent(ci_agents, instance):
return
name = instance["name"]
+
+ public_ip_addr = None
net_iface = instance["primary_network_interface"]
+ ip_addr = net_iface["primary_ipv4_address"]
floating_ips = net_iface.get("floating_ips", [])
+ if floating_ips:
+ public_ip_addr = floating_ips[0]["address"]
- if not floating_ips:
+ ci_agents[ip_addr] = {
+ "instance": {
+ "id": instance["id"],
+ "name": instance["name"],
+ "created_at": instance["created_at"],
+ "profile": instance["profile"]["name"],
+ "vpc": instance["vpc"]["name"],
+ "zone": instance["zone"]["name"],
+ "subnet": net_iface["subnet"]["name"]
+ },
+ "ip_addrs": {
+ "public": public_ip_addr,
+ "private": net_iface["primary_ipv4_address"]
+ },
+ "boot_volume": {
+ "device": instance["boot_volume_attachment"]["device"]["id"],
+ "name": instance["boot_volume_attachment"]["volume"]["name"]
+ },
+ "system": {
+ "arch": instance["vcpu"]["architecture"],
+ "num_cpus": instance["vcpu"]["count"],
+ "ram": instance["memory"]
+ }
+ }
+
+
+def load_bastion(bastions, instance):
+ if instance["status"] != "running":
return
+ name = instance["name"]
+ ip_addr = None
+ net_iface = instance["primary_network_interface"]
+ floating_ips = net_iface.get("floating_ips", [])
+ if not floating_ips:
+ print "Bastion is missing a public IP: %s" % name
+ exit(2)
ip_addr = floating_ips[0]["address"]
- ci_agents[ip_addr] = {
+ bastions[ip_addr] = {
"instance": {
"id": instance["id"],
"name": instance["name"],
@@ -123,19 +162,27 @@ def load_ci_agent(ci_agents, instance):
}
+
def main():
init()
ci_agents = {}
+ bastions = {}
for instance in list_instances():
if instance["name"].startswith("couchdb-ci-worker"):
load_ci_agent(ci_agents, instance)
+ elif instance["name"].startswith("couchdb-ci-bastion"):
+ load_bastion(bastions, instance)
+
inventory = {"all": {
"children": {
"ci_agents": {
"hosts": ci_agents
+ },
+ "bastions": {
+ "hosts": bastions
}
}
}}