Nuria has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/327845 )

Change subject: i[WIP] POC of loading tile data into pivot
......................................................................

i[WIP] POC of loading tile data into pivot

Code and config likely to be much improved

Bug: T151832
Change-Id: Ie3ee2a33aeef68c970cd0284529a20803bfc7700
---
A oozie/maps/druid/load_map_tiles.template.json
A oozie/maps/druid/tiles_table.hql
2 files changed, 105 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/45/327845/1

diff --git a/oozie/maps/druid/load_map_tiles.template.json 
b/oozie/maps/druid/load_map_tiles.template.json
new file mode 100644
index 0000000..442ee84
--- /dev/null
+++ b/oozie/maps/druid/load_map_tiles.template.json
@@ -0,0 +1,73 @@
+{
+  "type" : "index_hadoop",
+  "spec" : {
+    "ioConfig" : {
+      "type" : "hadoop",
+      "inputSpec" : {
+        "type" : "static",
+        "paths" : "*INPUT_PATH*"
+      }
+    },
+    "dataSchema" : {
+      "dataSource" : "tiles-poc",
+      "granularitySpec" : {
+        "type" : "uniform",
+        "segmentGranularity" : "day",
+        "queryGranularity" : "hour",
+        "intervals" : *INTERVALS_ARRAY*
+      },
+      "parser" : {
+        "type" : "string",
+        "parseSpec" : {
+          "format" : "json",
+          "dimensionsSpec" : {
+            "dimensions" : [
+                "project",
+                "agent_type",
+                "language",
+                "continent",
+                "country_code",
+                "country",
+                "ua_browser_family",
+                "ua_browser_major",
+                "ua_os_family",
+                "ua_os_major",
+                "ua_os_minor",
+                "host",
+                "used_on",
+                "style",
+                 "zoom",
+                 "scale",
+                 "format",
+            ]
+          },
+          "timestampSpec" : {
+            "format" : "auto",
+            "column" : "ts"
+          }
+        }
+      },
+      "metricsSpec" : [
+        {
+          "name" : "tiles",
+          "type" : "count",
+          "fieldName": "tiles"
+        }
+      ]
+    },
+    "tuningConfig" : {
+      "type" : "hadoop",
+      "ignoreInvalidRows" : false,
+      "partitionsSpec" : {
+        "type" : "hashed",
+        "numShards" : 8
+      },
+      "jobProperties" : {
+        "mapreduce.reduce.memory.mb" : "8192",
+        "mapreduce.output.fileoutputformat.compress": 
"org.apache.hadoop.io.compress.GzipCodec",
+        "mapreduce.job.queuename": "*HADOOP_QUEUE*"
+      }
+    }
+  }
+}
+
diff --git a/oozie/maps/druid/tiles_table.hql b/oozie/maps/druid/tiles_table.hql
new file mode 100644
index 0000000..d94d9cc
--- /dev/null
+++ b/oozie/maps/druid/tiles_table.hql
@@ -0,0 +1,32 @@
+SET parquet.compression              = SNAPPY;
+
+CREATE TABLE IF NOT EXISTS ${destination_table} (
+  ts STRING COMMENT 'Timestamp, formatted as YYYY-MM-DDTHH:00:00Z',
+  agent_type STRING COMMENT 'user or bot/tool',
+  continent STRING COMMENT 'Continent of the accessing agents (computed using 
maxmind GeoIP database)',
+  country_code STRING COMMENT 'Country ISO code of the accessing agents 
(computed using maxmind GeoIP database)',
+  country STRING COMMENT 'Country (text) of the accessing agents (computed 
using maxmind GeoIP database)',
+  ua_browser_family STRING COMMENT 'Browser extracted from UA (e.g. Firefox)',
+  ua_browser_major STRING,
+  ua_device_family STRING COMMENT 'Device extracted from User Agent',
+  ua_os_family STRING COMMENT 'Operating system extracted from User Agent',
+  ua_os_major STRING,
+  ua_os_minor STRING,
+  host STRING COMMENT 'Host of request (wikimedia or other)',
+  project STRING COMMENT 'Project name from request referer if it is a WMF 
site (e.g. wikivoyage)',
+  language STRING COMMENT 'Language prefix from request referer if it is a WMF 
site (e.g. ru)',
+  used_on STRING COMMENT 'A specific wiki page, Not a specific wiki page, 
Wikimedia Labs, GeoHack, or -',
+  style STRING COMMENT 'e.g. osm-intl',
+  zoom STRING COMMENT 'zoom level of the tiles (1-18)',
+  scale STRING COMMENT 'e.g. 1.5',
+  format STRING COMMENT 'e.g. png',
+  tiles BIGINT COMMENT 'Number of tiles successfully requested'
+)
+COMMENT 'This is a table of Kartotherian usage (counts of tiles successfully 
served).'
+PARTITIONED BY (
+  year INT COMMENT 'Unpadded year',
+  month INT COMMENT 'Unpadded month',
+  day INT COMMENT 'Unpadded day',
+  hour INT COMMENT 'Unpadded hour'
+)
+STORED AS SEQUENCEFILE;

-- 
To view, visit https://gerrit.wikimedia.org/r/327845
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie3ee2a33aeef68c970cd0284529a20803bfc7700
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Nuria <nu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to