Bearloga has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/356474 )

Change subject: Use new path & add README
......................................................................


Use new path & add README

Bug: T166724
Change-Id: Iec24bb600f24c3695ecc46c5b26b1a3f1e4d52fb
---
A README.md
M utils.R
2 files changed, 36 insertions(+), 16 deletions(-)

Approvals:
  Bearloga: Verified; Looks good to me, approved



diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5fd2f2b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,20 @@
+# Wikipedia.org Portal Dashboard
+
+This project is part of the [Discovery 
Dashboards](https://discovery.wmflabs.org/) project, using datasets publicly 
available at 
[analytics.wikimedia.org/datasets/discovery](https://analytics.wikimedia.org/datasets/discovery/).
 For more information on the datasets, refer to [README on the GitHub 
mirror](https://github.com/wikimedia/wikimedia-discovery-golden/blob/master/docs/README.md).
+
+## Quick start
+
+Install the dependencies:
+
+```R
+install.packages(c("devtools", "shiny", "reshape2", "data.table"))
+devtools::install_git("https://gerrit.wikimedia.org/r/wikimedia/discovery/polloi";)
+```
+
+Run the app:
+
+```R
+shiny::runApp(launch.browser = 0)
+```
+
+Please note that this project is released with a [Contributor Code of 
Conduct](CONDUCT.md). By participating in this project you agree to abide by 
its terms.
diff --git a/utils.R b/utils.R
index da2b8ee..d0e7ab5 100644
--- a/utils.R
+++ b/utils.R
@@ -6,22 +6,22 @@
 read_clickthrough <- function(){
   # Read in and format the high-level data
   clickthrough_rate <<- data.table::as.data.table(
-    polloi::read_dataset("discovery/portal/clickthrough_rate.tsv", col_types = 
"Dci")
+    polloi::read_dataset("discovery/metrics/portal/clickthrough_rate.tsv", 
col_types = "Dci")
   )[, j = list(`clickthrough rate` = 100 * (events[type == 
"clickthrough"]/sum(events))), by = "date"]
   # Read in and format the breakdown data
   interim <- data.table::as.data.table(
-    polloi::read_dataset("discovery/portal/clickthrough_breakdown.tsv", 
col_types = "Dci")
+    
polloi::read_dataset("discovery/metrics/portal/clickthrough_breakdown.tsv", 
col_types = "Dci")
   )[, j = list(section_used = section_used, proportion = 100 * 
(events/sum(events))), by = "date"]
   action_breakdown <<- tidyr::spread(dplyr::distinct(interim, date, 
section_used, .keep_all = TRUE),
                                      section_used, proportion, fill = 0)
   # Read in most common section per visit data
   interim <- data.table::as.data.table(
-    polloi::read_dataset("discovery/portal/most_common_per_visit.tsv", 
col_types = "Dci")
+    polloi::read_dataset("discovery/metrics/portal/most_common_per_visit.tsv", 
col_types = "Dci")
   )[, j = list(section_used = section_used, proportion = 100 * 
(visits/sum(visits))), by = "date"]
   most_common <<- tidyr::spread(dplyr::distinct(interim, date, section_used, 
.keep_all = TRUE),
                                 section_used, proportion, fill = 0)
   # Read in first visit clickthrough rates
-  interim <- 
polloi::read_dataset("discovery/portal/clickthrough_firstvisit.tsv", col_types 
= "Ddddddd")
+  interim <- 
polloi::read_dataset("discovery/metrics/portal/clickthrough_firstvisit.tsv", 
col_types = "Ddddddd")
   interim[, -1] <- 100 * interim[, -1] # first column is always going to be 
the date
   interim$`language search` <- 0
   first_visit_ctrs <<- interim[, names(action_breakdown)]
@@ -29,7 +29,7 @@
 }
 
 read_langs <- function() {
-  interim <- polloi::read_dataset("discovery/portal/language_destination.tsv", 
col_types = "Dciiiii")
+  interim <- 
polloi::read_dataset("discovery/metrics/portal/language_destination.tsv", 
col_types = "Dciiiii")
   suppressMessages({
     prefixes <- polloi::get_prefixes()[, -2]
   })
@@ -38,13 +38,13 @@
 }
 
 read_dwelltime <- function(){
-  dwelltime_data <<- 
data.table::as.data.table(polloi::read_dataset("discovery/portal/dwell_metrics.tsv",
 col_types = "Dddd"))
+  dwelltime_data <<- 
data.table::as.data.table(polloi::read_dataset("discovery/metrics/portal/dwell_metrics.tsv",
 col_types = "Dddd"))
   return(invisible())
 }
 
 read_country <- function(){
   interim <- data.table::as.data.table(
-    polloi::read_dataset("discovery/portal/country_data.tsv", col_types = 
"Dci")
+    polloi::read_dataset("discovery/metrics/portal/country_data.tsv", 
col_types = "Dci")
   )[, list(country = country, events = 100 * (events/sum(events))), by = 
"date"]
   country_data <<- tidyr::spread(
     dplyr::distinct(interim, date, country, .keep_all = TRUE),
@@ -54,7 +54,7 @@
 }
 
 read_useragents <- function(){
-  interim <- polloi::read_dataset("discovery/portal/user_agent_data.tsv", 
col_types = "Dccd")
+  interim <- 
polloi::read_dataset("discovery/metrics/portal/user_agent_data.tsv", col_types 
= "Dccd")
   interim$browser[interim$browser == "Chrome Mobile"] <- "Chrome Mobile 
(Android)"
   interim$browser[interim$browser == "Chrome Mobile iOS"] <- "Chrome Mobile 
(iOS)"
   interim$browser[interim$browser == "Mobile Safari"] <- "Safari Mobile"
@@ -94,7 +94,7 @@
 
 read_pageviews <- function(){
   pageview_data <<- dplyr::distinct(polloi::read_dataset(
-    "discovery/portal/pageviews.tsv", col_types = "Diii-", skip = 1,
+    "discovery/metrics/portal/pageviews.tsv", col_types = "Diii-", skip = 1,
     col_names = c("date", "total pageviews", "high-volume clients' PVs", 
"low-volume clients' PVs")
   ), date, .keep_all = TRUE)
   return(invisible())
@@ -102,7 +102,7 @@
 
 read_referrals <- function(){
   # Read in the initial data
-  interim <- polloi::read_dataset("discovery/portal/referer_data.tsv", 
col_types = "Dlcci")
+  interim <- polloi::read_dataset("discovery/metrics/portal/referer_data.tsv", 
col_types = "Dlcci")
   interim$search_engine[interim$search_engine == "none"] <- "Not referred by 
search"
   # Write out the overall values for traffic
   summary_traffic_data <<- interim %>%
@@ -136,12 +136,12 @@
 }
 
 read_sisproj <- function() {
-  sisproj_clicks <<- 
polloi::read_dataset("discovery/portal/clickthrough_sisterprojects.tsv", 
col_types = "Dcii") %>%
+  sisproj_clicks <<- 
polloi::read_dataset("discovery/metrics/portal/clickthrough_sisterprojects.tsv",
 col_types = "Dcii") %>%
     dplyr::filter(!grepl("(Privacy_policy|Terms_of_Use|.com|servlet)", 
destination), !grepl("^http", destination))
 }
 
 read_applinks <- function() {
-  applink_clicks <<- 
polloi::read_dataset("discovery/portal/app_link_clicks.tsv", col_types = "Dcci")
+  applink_clicks <<- 
polloi::read_dataset("discovery/metrics/portal/app_link_clicks.tsv", col_types 
= "Dcci")
 }
 
 find_region <- function(country_names, countrycode_data) {
@@ -158,10 +158,10 @@
 
 read_geo <- function() {
 
-  all_country_data <- 
polloi::read_dataset("discovery/portal/all_country_data.tsv", col_types = 
"Dcididid")
-  first_visits_country <- 
polloi::read_dataset("discovery/portal/first_visits_country.tsv", col_types = 
"Dccid")
-  last_action_country <- 
polloi::read_dataset("discovery/portal/last_action_country.tsv", col_types = 
"Dccid")
-  most_common_country <- 
polloi::read_dataset("discovery/portal/most_common_country.tsv", col_types = 
"Dccid")
+  all_country_data <- 
polloi::read_dataset("discovery/metrics/portal/all_country_data.tsv", col_types 
= "Dcididid")
+  first_visits_country <- 
polloi::read_dataset("discovery/metrics/portal/first_visits_country.tsv", 
col_types = "Dccid")
+  last_action_country <- 
polloi::read_dataset("discovery/metrics/portal/last_action_country.tsv", 
col_types = "Dccid")
+  most_common_country <- 
polloi::read_dataset("discovery/metrics/portal/most_common_country.tsv", 
col_types = "Dccid")
   data("countrycode_data", package = "countrycode")
   # Note: version 0.19 (published on CRAN on 2017-02-06) has renamed 
'country.name' to 'country.name.en'
   countrycode_data$continent[countrycode_data$country.name.en %in% c("British 
Indian Ocean Territory", "Christmas Island", "Taiwan, Province of China")] <- 
"Asia"

-- 
To view, visit https://gerrit.wikimedia.org/r/356474
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iec24bb600f24c3695ecc46c5b26b1a3f1e4d52fb
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/prince
Gerrit-Branch: master
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>
Gerrit-Reviewer: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to