Rush has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/403072 )
Change subject: WIP: toolforge: ferm hook to restart components post updates ...................................................................... WIP: toolforge: ferm hook to restart components post updates * Ferm is not playing nice with other iptables tenants * Tested an /etc/ferm/conf.d/00_hooks to see it run external scripts in what seems like a totally post updates state. This hopefully let's kube-proxy, flannel, and docker deal with Ferm stomping all around. This is a midterm fix where other options are being explored in the context of the task. Right now any update to Ferm, even a definition MAC, results in an outage for k8s in Toolforge. Bug: T182722 Change-Id: I5c700a2c8bce6050e8cb761450d3716a6b3f33c9 --- M modules/role/manifests/toollabs/k8s/master.pp M modules/role/manifests/toollabs/proxy.pp A modules/toollabs/files/ferm_restart_handler.sh A modules/toollabs/manifests/ferm_restart_handler.pp M modules/toollabs/manifests/proxy.pp 5 files changed, 40 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/puppet refs/changes/72/403072/1 diff --git a/modules/role/manifests/toollabs/k8s/master.pp b/modules/role/manifests/toollabs/k8s/master.pp index 81647b4..1c0d78c 100644 --- a/modules/role/manifests/toollabs/k8s/master.pp +++ b/modules/role/manifests/toollabs/k8s/master.pp @@ -2,8 +2,9 @@ class role::toollabs::k8s::master( $use_puppet_certs = false, ) { - include ::base::firewall include ::toollabs::infrastructure + include ::base::firewall + include ::toollabs::ferm_restart_handler $master_host = hiera('k8s::master_host', $::fqdn) $etcd_url = prefix(suffix(hiera('k8s::etcd_hosts'), ':2379'), 'https://') diff --git a/modules/role/manifests/toollabs/proxy.pp b/modules/role/manifests/toollabs/proxy.pp index be70d49..c82cfef 100644 --- a/modules/role/manifests/toollabs/proxy.pp +++ b/modules/role/manifests/toollabs/proxy.pp @@ -2,6 +2,8 @@ class role::toollabs::proxy { include ::toollabs::proxy include ::role::toollabs::k8s::webproxy + include ::base::firewall + include ::toollabs::ferm_restart_handler ferm::service { 'proxymanager': proto => 'tcp', diff --git a/modules/toollabs/files/ferm_restart_handler.sh b/modules/toollabs/files/ferm_restart_handler.sh new file mode 100644 index 0000000..9bffa6b --- /dev/null +++ b/modules/toollabs/files/ferm_restart_handler.sh @@ -0,0 +1,16 @@ +#/bin/bash + +/usr/bin/logger -t ${0} "restart firewall components post ferm management" + +# Ferm expects to handle all firewall state +# and that does not mesh well with dynamic chain management. +# We tell the k8s stack here to restart +# +# This should be no more invasive than a rescheduling +# of a POD to another worker. +# +# If we are living an nftables world when you read +# this, then this should be totally rethought. +sudo service docker restart +sudo service flannel restart +sudo service kube-proxy restart diff --git a/modules/toollabs/manifests/ferm_restart_handler.pp b/modules/toollabs/manifests/ferm_restart_handler.pp new file mode 100644 index 0000000..2fc3034 --- /dev/null +++ b/modules/toollabs/manifests/ferm_restart_handler.pp @@ -0,0 +1,20 @@ +# tldr; hook post ferm updates to let other interested +# parties resync their iptables state. +# See: T182722 +class toollabs::ferm_restart_handler{ + + file {'/usr/local/sbin/ferm_restart_handler': + source => 'puppet:///modules/toollabs/ferm_restart_handler.sh', + owner => 'root', + group => 'root', + mode => '0555', + } + + # http://ferm.foo-projects.org/download/2.1/ferm.html#hooks + # https://phabricator.wikimedia.org/T182722 + ferm::conf{'ferm_firewall_processing': + prio => 00, + content => '@hook post "/usr/local/sbin/ferm_restart_handler"', + subscribe => File['/usr/local/sbin/ferm_restart_handler'], + } +} diff --git a/modules/toollabs/manifests/proxy.pp b/modules/toollabs/manifests/proxy.pp index 63953dd..9befba2 100644 --- a/modules/toollabs/manifests/proxy.pp +++ b/modules/toollabs/manifests/proxy.pp @@ -9,7 +9,6 @@ include ::toollabs::infrastructure include ::redis::client::python - include ::base::firewall if $ssl_install_certificate { sslcert::certificate { $ssl_certificate_name: -- To view, visit https://gerrit.wikimedia.org/r/403072 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I5c700a2c8bce6050e8cb761450d3716a6b3f33c9 Gerrit-PatchSet: 1 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Rush <r...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits