Elukey has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/354107 )
Change subject: role::aqs: use profile::cassandra ...................................................................... role::aqs: use profile::cassandra Also add a switch to profile::cassandra to allow opening connections to the analytics network in case of need, and fix handling of the case we have no TLS encryption. Change-Id: I727779606ff6c83f95e2dd308df3fa34f14e3f77 --- M hieradata/hosts/aqs1004.yaml M hieradata/hosts/aqs1005.yaml M hieradata/hosts/aqs1006.yaml M hieradata/hosts/aqs1007.yaml M hieradata/hosts/aqs1008.yaml M hieradata/hosts/aqs1009.yaml M hieradata/role/common/aqs.yaml M hieradata/role/common/restbase/dev_cluster.yaml M hieradata/role/common/restbase/production.yaml M hieradata/role/common/restbase/test_cluster.yaml M modules/profile/manifests/cassandra.pp M modules/role/manifests/aqs.pp 12 files changed, 99 insertions(+), 190 deletions(-) Approvals: Elukey: Looks good to me, approved jenkins-bot: Verified Filippo Giunchedi: Looks good to me, but someone else must approve diff --git a/hieradata/hosts/aqs1004.yaml b/hieradata/hosts/aqs1004.yaml index 3d2168d..5af54e2 100644 --- a/hieradata/hosts/aqs1004.yaml +++ b/hieradata/hosts/aqs1004.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack1 instances -cassandra::rack: 'rack1' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.0.126 #aqs1004-a.eqiad.wmnet - rpc_address: 10.64.0.126 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.0.127 #aqs1004-b.eqiad.wmnet - rpc_address: 10.64.0.127 - rpc_interface: eth0 +profile::cassandra::rack: 'rack1' diff --git a/hieradata/hosts/aqs1005.yaml b/hieradata/hosts/aqs1005.yaml index a0f7435..cab90eb 100644 --- a/hieradata/hosts/aqs1005.yaml +++ b/hieradata/hosts/aqs1005.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack2 instances -cassandra::rack: 'rack2' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.32.189 #aqs1005-a.eqiad.wmnet - rpc_address: 10.64.32.189 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.32.190 #aqs1005-b.eqiad.wmnet - rpc_address: 10.64.32.190 - rpc_interface: eth0 \ No newline at end of file +profile::cassandra::rack: 'rack2' diff --git a/hieradata/hosts/aqs1006.yaml b/hieradata/hosts/aqs1006.yaml index 1484eec..ea5174f 100644 --- a/hieradata/hosts/aqs1006.yaml +++ b/hieradata/hosts/aqs1006.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack3 instances -cassandra::rack: 'rack3' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.48.148 #aqs1006-a.eqiad.wmnet - rpc_address: 10.64.48.148 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.48.149 #aqs1006-b.eqiad.wmnet - rpc_address: 10.64.48.149 - rpc_interface: eth0 \ No newline at end of file +profile::cassandra::rack: 'rack3' diff --git a/hieradata/hosts/aqs1007.yaml b/hieradata/hosts/aqs1007.yaml index 40795bf..5af54e2 100644 --- a/hieradata/hosts/aqs1007.yaml +++ b/hieradata/hosts/aqs1007.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack1 instances -cassandra::rack: 'rack1' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.0.213 #aqs1007-a.eqiad.wmnet - rpc_address: 10.64.0.213 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.0.237 #aqs1007-b.eqiad.wmnet - rpc_address: 10.64.0.237 - rpc_interface: eth0 +profile::cassandra::rack: 'rack1' diff --git a/hieradata/hosts/aqs1008.yaml b/hieradata/hosts/aqs1008.yaml index eb5e3b9..cab90eb 100644 --- a/hieradata/hosts/aqs1008.yaml +++ b/hieradata/hosts/aqs1008.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack2 instances -cassandra::rack: 'rack2' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.16.74 #aqs1008-a.eqiad.wmnet - rpc_address: 10.64.16.74 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.16.78 #aqs1008-b.eqiad.wmnet - rpc_address: 10.64.16.78 - rpc_interface: eth0 +profile::cassandra::rack: 'rack2' diff --git a/hieradata/hosts/aqs1009.yaml b/hieradata/hosts/aqs1009.yaml index 660a401..ea5174f 100644 --- a/hieradata/hosts/aqs1009.yaml +++ b/hieradata/hosts/aqs1009.yaml @@ -1,15 +1,3 @@ # Analytics Query Service - rack3 instances -cassandra::rack: 'rack3' - -cassandra::instances: - a: - jmx_port: 7189 - listen_address: 10.64.48.122 #aqs1009-a.eqiad.wmnet - rpc_address: 10.64.48.122 - rpc_interface: eth0 - b: - jmx_port: 7190 - listen_address: 10.64.48.123 #aqs1009-b.eqiad.wmnet - rpc_address: 10.64.48.123 - rpc_interface: eth0 \ No newline at end of file +profile::cassandra::rack: 'rack3' diff --git a/hieradata/role/common/aqs.yaml b/hieradata/role/common/aqs.yaml index 6dc8cf4..281582d 100644 --- a/hieradata/role/common/aqs.yaml +++ b/hieradata/role/common/aqs.yaml @@ -1,4 +1,7 @@ # Analytics Query Service +# +# General +# cluster: aqs admin::groups: @@ -12,70 +15,78 @@ # # Cassandra for AQS # -cassandra::metrics::graphite_host: 'graphite-in.eqiad.wmnet' -cassandra::metrics::blacklist: +graphite_host: 'graphite-in.eqiad.wmnet' +profile::cassandra::allow_analytics: true + +# We use false to make all if statements fail in cassandra::metrics +profile::cassandra::metrics_whitelist: false +profile::cassandra::metrics_blacklist: - .*\.metrics\.Table\..*$ -cassandra::start_rpc: 'false' +# Instances +profile::cassandra::instances: + "aqs1004.eqiad.wmnet": + "a": { listen_address: 10.64.0.126 } + "b": { listen_address: 10.64.0.127 } + "aqs1005.eqiad.wmnet": + "a": { listen_address: 10.64.32.189 } + "b": { listen_address: 10.64.32.190 } + "aqs1006.eqiad.wmnet": + "a": { listen_address: 10.64.48.148 } + "b": { listen_address: 10.64.48.149 } + "aqs1007.eqiad.wmnet": + "a": { listen_address: 10.64.0.213 } + "b": { listen_address: 10.64.0.237 } + "aqs1008.eqiad.wmnet": + "a": { listen_address: 10.64.16.74 } + "b": { listen_address: 10.64.16.78 } + "aqs1009.eqiad.wmnet": + "a": { listen_address: 10.64.48.122 } + "b": { listen_address: 10.64.48.123 } -cassandra::target_version: '2.2' -# TODO: set up a cluster variable similar to MySQL clusters to share -# cassandra cluster configs between cassandra & clients +profile::cassandra::settings: + dc: "%{::site}" + cluster_name: "Analytics Query Service Storage" + start_rpc: 'false' + target_version: '2.2' + max_heap_size: 16g + # 1/4 heap size, no more than 100m/thread + heap_newsize: 2048m -cassandra::seeds: - - aqs1004-a.eqiad.wmnet - - aqs1004-b.eqiad.wmnet - - aqs1005-a.eqiad.wmnet - - aqs1005-b.eqiad.wmnet - - aqs1006-a.eqiad.wmnet - - aqs1006-b.eqiad.wmnet - - aqs1007-a.eqiad.wmnet - - aqs1007-b.eqiad.wmnet - - aqs1008-a.eqiad.wmnet - - aqs1008-b.eqiad.wmnet - - aqs1009-a.eqiad.wmnet - - aqs1009-b.eqiad.wmnet -cassandra::max_heap_size: 16g -# 1/4 heap size, no more than 100m/thread -cassandra::heap_newsize: 2048m + # Special compaction settings, following suggesions in: + # https://docs.datastax.com/en/cassandra/2.1/cassandra/configuration/configCassandra_yaml_r.html + # All values are divided by two since we have two instances running on each node + # Assumption: 32 cores with ht on each host, so 16 * 1.5 = 24 cores considered + # in the calculations. + compaction_throughput_mb_per_sec: 256 + concurrent_compactors: 12 + concurrent_writes: 64 + concurrent_reads: 64 -# Special compaction settings, following suggesions in: -# https://docs.datastax.com/en/cassandra/2.1/cassandra/configuration/configCassandra_yaml_r.html -# All values are divided by two since we have two instances running on each node -# Assumption: 32 cores with ht on each host, so 16 * 1.5 = 24 cores considered -# in the calculations. -cassandra::compaction_throughput_mb_per_sec: 256 -cassandra::concurrent_compactors: 12 -cassandra::concurrent_writes: 64 -cassandra::concurrent_reads: 64 + # The CassandraAuthorizer Auth mandates non trivial checks for + # each read/write operation to make sure that permissions are honored. + # This could be a problem in already heavy loaded clusters like AQS, + # so we need to increase caching to allow better performances + # (default value 2s). + permissions_validity_in_ms: 600000 -# The CassandraAuthorizer Auth mandates non trivial checks for -# each read/write operation to make sure that permissions are honored. -# This could be a problem in already heavy loaded clusters like AQS, -# so we need to increase caching to allow better performances -# (default value 2s). -cassandra::permissions_validity_in_ms: 600000 + # AQS Cassandra user -# AQS Cassandra user + # This configuration creates the adduser.cql script that must be used + # to create the new user on the Cassandra cluster. This configuration alone + # does not create any user on the Cassandra cluster. + application_username: aqs + application_password: "%{passwords::aqs::aqs_user}" -# This configuration creates the adduser.cql script that must be used -# to create the new user on the Cassandra cluster. This configuration alone -# does not create any user on the Cassandra cluster. -cassandra::application_username: aqs -cassandra::application_password: "%{passwords::aqs::aqs_user}" - +# +# AQS service +# # This configuration forces Restbase to use a specific user. Please make sure # to create the user first. aqs::cassandra_user: aqs aqs::cassandra_password: "%{passwords::aqs::aqs_user}" -cassandra::dc: "%{::site}" -cassandra::cluster_name: "Analytics Query Service Storage" - -# -# AQS service -# aqs::seeds: - aqs1004-a.eqiad.wmnet - aqs1004-b.eqiad.wmnet @@ -104,4 +115,4 @@ - aqs1006.eqiad.wmnet - aqs1007.eqiad.wmnet - aqs1008.eqiad.wmnet - - aqs1009.eqiad.wmnet \ No newline at end of file + - aqs1009.eqiad.wmnet diff --git a/hieradata/role/common/restbase/dev_cluster.yaml b/hieradata/role/common/restbase/dev_cluster.yaml index 736debe..fc07654 100644 --- a/hieradata/role/common/restbase/dev_cluster.yaml +++ b/hieradata/role/common/restbase/dev_cluster.yaml @@ -43,7 +43,7 @@ # ## Metrics graphite_host: 'graphite1003.eqiad.wmnet' - +profile::cassandra::allow_analytics: false profile::cassandra::metrics_whitelist: - .*\.metrics\.Table\.local_group_.*\.meta\.CoordinatorReadLatency\..*$ - .*\.metrics\.Table\.local_group_.*\.meta\.CoordinatorScanLatency\..*$ diff --git a/hieradata/role/common/restbase/production.yaml b/hieradata/role/common/restbase/production.yaml index acc22b5..9bc84bb 100644 --- a/hieradata/role/common/restbase/production.yaml +++ b/hieradata/role/common/restbase/production.yaml @@ -26,6 +26,8 @@ # Cassandra # graphite_host: 'graphite1003.eqiad.wmnet' +profile::cassandra::allow_analytics: false + profile::cassandra::metrics_whitelist: - .*\.metrics\.ColumnFamily\.local_group_.*\.meta\.CoordinatorReadLatency\..*$ - .*\.metrics\.ColumnFamily\.local_group_.*\.meta\.CoordinatorScanLatency\..*$ diff --git a/hieradata/role/common/restbase/test_cluster.yaml b/hieradata/role/common/restbase/test_cluster.yaml index 4d76d30..f533e28 100644 --- a/hieradata/role/common/restbase/test_cluster.yaml +++ b/hieradata/role/common/restbase/test_cluster.yaml @@ -39,6 +39,7 @@ # ## Metrics graphite_host: 'graphite1003.eqiad.wmnet' +profile::cassandra::allow_analytics: false profile::cassandra::metrics_whitelist: - .*\.metrics\.ColumnFamily\.local_group_.*\.meta\.CoordinatorReadLatency\..*$ diff --git a/modules/profile/manifests/cassandra.pp b/modules/profile/manifests/cassandra.pp index 3ad9aa4..7981fac 100644 --- a/modules/profile/manifests/cassandra.pp +++ b/modules/profile/manifests/cassandra.pp @@ -8,6 +8,7 @@ $metrics_whitelist = hiera('profile::cassandra::metrics_whitelist'), $graphite_host = hiera('graphite_host'), $prometheus_nodes = hiera('prometheus_nodes'), + $allow_analytics = hiera('profile::cassandra::allow_analytics') ) { include ::passwords::cassandra $instances = $all_instances[$::fqdn] @@ -38,8 +39,12 @@ vm_dirty_background_bytes => 25165824, } - $tls_cluster_name = $cassandra_settings['tls_cluster_name'] - if $instances != {} { + if $cassandra_settings['tls_cluster_name'] { + $tls_cluster_name = $cassandra_settings['tls_cluster_name'] + } else { + $tls_cluster_name = '' + } + if $instances { $instance_names = keys($instances) ::cassandra::instance::monitoring{ $instance_names: instances => $instances, @@ -74,12 +79,16 @@ port => '7000', srange => "@resolve((${cassandra_hosts_ferm}))", } - # Cassandra intra-node SSL messaging - ferm::service { 'cassandra-intra-node-ssl': - proto => 'tcp', - port => '7001', - srange => "@resolve((${cassandra_hosts_ferm}))", + + if $cassandra_settings['tls_cluster_name'] { + # Cassandra intra-node SSL messaging + ferm::service { 'cassandra-intra-node-ssl': + proto => 'tcp', + port => '7001', + srange => "@resolve((${cassandra_hosts_ferm}))", + } } + # Cassandra JMX/RMI ferm::service { 'cassandra-jmx-rmi': proto => 'tcp', @@ -99,5 +108,15 @@ port => '7800', srange => "@resolve((${prometheus_nodes_ferm}))", } + if $allow_analytics { + include ::network::constants + $analytics_networks = join($network::constants::analytics_networks, ' ') + ferm::service { 'cassandra-analytics-cql': + proto => 'tcp', + port => '9042', + srange => "(@resolve((${cassandra_hosts_ferm})) ${analytics_networks})", + } + + } } diff --git a/modules/role/manifests/aqs.pp b/modules/role/manifests/aqs.pp index 095646f..524583a 100644 --- a/modules/role/manifests/aqs.pp +++ b/modules/role/manifests/aqs.pp @@ -18,59 +18,7 @@ # # Set up Cassandra for AQS. # - - # Parameters to be set by Hiera - include ::cassandra - include ::cassandra::metrics - include ::cassandra::logging - - $cassandra_instances = $::cassandra::instances - - if $cassandra_instances { - $instance_names = keys($cassandra_instances) - ::cassandra::instance::monitoring { $instance_names: - contact_group => 'admins,team-services,analytics', - } - } else { - $default_instances = { - 'default' => { - 'listen_address' => $::cassandra::listen_address, - } - } - ::cassandra::instance::monitoring { 'default': - instances => $default_instances, - contact_group => 'admins,team-services,analytics', - } - } - - $cassandra_hosts_ferm = join(hiera('cassandra::seeds'), ' ') - - # Cassandra intra-node messaging - ferm::service { 'cassandra-analytics-intra-node': - proto => 'tcp', - port => '7000', - srange => "@resolve((${cassandra_hosts_ferm}))", - } - # Cassandra JMX/RMI - ferm::service { 'cassandra-analytics-jmx-rmi': - proto => 'tcp', - port => '7199', - srange => "@resolve((${cassandra_hosts_ferm}))", - } - # Allow analytics networks to populate cassandra - include network::constants - $analytics_networks = join($network::constants::analytics_networks, ' ') - - # In addition to the IP assigned to the Cassandra multi instances, these rules - # grant access from the actual AQS hosts - $aqs_hosts_ferm = join(hiera('aqs_hosts'), ' ') - - # Cassandra CQL query interface - ferm::service { 'cassandra-analytics-cql': - proto => 'tcp', - port => '9042', - srange => "(@resolve((${cassandra_hosts_ferm})) @resolve((${aqs_hosts_ferm})) ${analytics_networks})", - } + include ::profile::cassandra # # Set up AQS -- To view, visit https://gerrit.wikimedia.org/r/354107 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I727779606ff6c83f95e2dd308df3fa34f14e3f77 Gerrit-PatchSet: 6 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Giuseppe Lavagetto <glavage...@wikimedia.org> Gerrit-Reviewer: Eevans <eev...@wikimedia.org> Gerrit-Reviewer: Elukey <ltosc...@wikimedia.org> Gerrit-Reviewer: Filippo Giunchedi <fgiunch...@wikimedia.org> Gerrit-Reviewer: Giuseppe Lavagetto <glavage...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits