Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/360900 )
Change subject: Remove datasets.wikimedia.org site, redirect to
analytics.wikimedia.org/datasets/archive
......................................................................
Remove datasets.wikimedia.org site, redirect to
analytics.wikimedia.org/datasets/archive
Bug: T159409
Change-Id: Ib1b7791d4d6dd59a60818a04ff7b991f1119b54b
---
M manifests/site.pp
M modules/statistics/manifests/sites/analytics.pp
D modules/statistics/manifests/sites/datasets.pp
M modules/statistics/templates/analytics.wikimedia.org.erb
D modules/statistics/templates/datasets.wikimedia.org.erb
5 files changed, 13 insertions(+), 109 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git a/manifests/site.pp b/manifests/site.pp
index 9050a09..ae81ffd 100644
--- a/manifests/site.pp
+++ b/manifests/site.pp
@@ -2213,7 +2213,8 @@
node 'thorium.eqiad.wmnet' {
# thorium is mainly used to host Analytics websites like:
# - https://stats.wikimedia.org (Wikistats)
- # - https://datasets.wikimedia.org
+ # - https://analytics.wikimedia.org (Analytics dashboards and datasets)
+ # - https://datasets.wikimedia.org (deprecated, redirects to
analytics.wm.org/datasets/archive)
# - https://metrics.wikimedia.org (https://metrics.wmflabs.org/
(Wikimetrics))
# - https://pivot.wikimedia.org (Imply's Pivot UI for Druid data)
# - https://hue.wikimedia.org (Hadoop User Experience GUI)
diff --git a/modules/statistics/manifests/sites/analytics.pp
b/modules/statistics/manifests/sites/analytics.pp
index f79091b..a5fa332 100644
--- a/modules/statistics/manifests/sites/analytics.pp
+++ b/modules/statistics/manifests/sites/analytics.pp
@@ -47,6 +47,9 @@
}
include ::apache::mod::headers
+ # mod rewrite is used to redirect the deprecated datasets.wikimedia.org to
+ # analytics.wikimedia.org/datasets/archive
+ include ::apache::mod::rewrite
apache::site { 'analytics':
content => template('statistics/analytics.wikimedia.org.erb'),
require => File[$document_root],
diff --git a/modules/statistics/manifests/sites/datasets.pp
b/modules/statistics/manifests/sites/datasets.pp
deleted file mode 100644
index 9a351b9..0000000
--- a/modules/statistics/manifests/sites/datasets.pp
+++ /dev/null
@@ -1,75 +0,0 @@
-# == Class statistics::sites::datasets
-# datasets.wikimedia.org
-#
-# TODO: Parameterize rsync source hostnames
-#
-# NOTE: This class has nothing to do with the
-# dataset1001 datasets_mount.
-#
-class statistics::sites::datasets {
- require ::statistics::web
-
- # $working_path should be /srv
- $working_path = $::statistics::working_path
- # TODO: This site will be deprecated and redirected from analytics.wm.org
as part of T132594.
- $document_root = "${working_path}/datasets.wikimedia.org"
-
- file { [
- # /srv/datasets contains various datasets that are intended to be
exposed publicly.
- $document_root,
- "${working_path}/public-datasets",
- "${working_path}/aggregate-datasets",
- "${working_path}/limn-public-data",
- ]:
- ensure => 'directory',
- owner => 'root',
- group => 'www-data',
- mode => '0775',
- }
-
- # symlink $document_root/public-datasets to /srv/public-datasets
- file { "${document_root}/public-datasets":
- ensure => 'link',
- target => "${working_path}/public-datasets",
- owner => 'root',
- group => 'www-data',
- }
-
- # symlink $document_root/aggregate-datasets to /srv/aggregate-datasets
- file { "${document_root}/aggregate-datasets":
- ensure => 'link',
- target => "${working_path}/aggregate-datasets",
- owner => 'root',
- group => 'www-data',
- }
-
- # symlink $document_root/limn-public-data to /srv/limn-public-data
- file { "${document_root}/limn-public-data":
- ensure => 'link',
- target => "${working_path}/limn-public-data",
- owner => 'root',
- group => 'www-data',
- }
-
- # rsync from stat1003:/srv/public-datasets to $working_path/public-datasets
- cron { 'rsync public datasets':
- command => "/usr/bin/rsync -rt --delete
stat1003.eqiad.wmnet::srv/public-datasets/* ${working_path}/public-datasets/",
- require => File["${working_path}/public-datasets"],
- user => 'root',
- minute => '*/30',
- }
-
- # rsync from stat1002:/srv/aggregate-datasets to
$working_path/aggregate-datasets
- cron { 'rsync aggregate datasets from stat1002':
- command => "/usr/bin/rsync -rt --delete
stat1002.eqiad.wmnet::srv/aggregate-datasets/*
${working_path}/aggregate-datasets/",
- require => File["${working_path}/aggregate-datasets"],
- user => 'root',
- minute => '*/30',
- }
-
- include ::apache::mod::headers
- apache::site { 'datasets':
- content => template('statistics/datasets.wikimedia.org.erb'),
- require => File[$document_root],
- }
-}
diff --git a/modules/statistics/templates/analytics.wikimedia.org.erb
b/modules/statistics/templates/analytics.wikimedia.org.erb
index 6fe276c..59306a1 100644
--- a/modules/statistics/templates/analytics.wikimedia.org.erb
+++ b/modules/statistics/templates/analytics.wikimedia.org.erb
@@ -1,5 +1,6 @@
<VirtualHost *:80>
ServerName analytics.wikimedia.org
+ ServerAlias datasets.wikimedia.org
DocumentRoot <%= @document_root %>
@@ -25,6 +26,13 @@
ExpiresDefault "modification plus 1 hour"
</IfModule>
+ <IfModule mod_rewrite.c>
+ # Redirect all requests to datasets.wikimedia.org to
analytics.wikimedia.org/datasets/archive
+ RewriteEngine On
+ RewriteCond %{HTTP_HOST} ^datasets.wikimedia.org [nocase]
+ RewriteRule ^(.*)$
https://analytics.wikimedia.org/datasets/archive$1 [last,redirect=301]
+ </IfModule>
+
LogLevel warn
ErrorLog /var/log/apache2/analytics_error.log
CustomLog /var/log/apache2/analytics_access.log wmf
diff --git a/modules/statistics/templates/datasets.wikimedia.org.erb
b/modules/statistics/templates/datasets.wikimedia.org.erb
deleted file mode 100644
index ff201b2..0000000
--- a/modules/statistics/templates/datasets.wikimedia.org.erb
+++ /dev/null
@@ -1,33 +0,0 @@
-<VirtualHost *:80>
- ServerName datasets.wikimedia.org
-
- DocumentRoot <%= @document_root %>
-
- <Directory <%= @document_root %> >
- Options Indexes FollowSymLinks MultiViews
- AllowOverride None
- Require all granted
-
- # enable CORS requests
- Header set Access-Control-Allow-Origin "*"
- </Directory>
-
- # Cache json, yaml, csv, and tsv files 1 day
- # (could be all files but wanted to be more restrictive to start)
- <IfModule mod_headers.c>
- <FilesMatch "\.(json|yaml|csv|tsv)$">
- Header set Cache-Control "max-age=86400, public, must-revalidate"
- </FilesMatch>
- </IfModule>
-
- # M86400 -> issue conditional request 1 day after modification
- <IfModule mod_expires.c>
- ExpiresActive On
- ExpiresDefault M86400
- </IfModule>
-
- LogLevel warn
- ErrorLog /var/log/apache2/datasets_error.log
- CustomLog /var/log/apache2/datasets_access.log wmf
- ServerSignature Off
-</VirtualHost>
--
To view, visit https://gerrit.wikimedia.org/r/360900
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib1b7791d4d6dd59a60818a04ff7b991f1119b54b
Gerrit-PatchSet: 2
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits