Bearloga has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/295583

Change subject: Group US states into regions
......................................................................

Group US states into regions

This patch takes the new geodata from Wikipedia Portal event logs
  (where we now record the U.S. visitor's state) and groups the
  states into five regions: Northeast, Midwest, West, South, and
  Pacific.

Bug: T136257
Depends on: Id067003604e91332d50d53b42a883a8d83b8878b
Change-Id: Ia1d27922b911a3efd3106ac165ffda689e685261
---
M portal/portal.R
1 file changed, 28 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden 
refs/changes/83/295583/1

diff --git a/portal/portal.R b/portal/portal.R
index be7d8a6..de970ee 100644
--- a/portal/portal.R
+++ b/portal/portal.R
@@ -43,17 +43,34 @@
   data <- data[order(data$type, decreasing = FALSE),]
   breakdown_data <- data[,j = list(events = .N), by = c("date","section_used")]
   
-  # Generate by-country breakdown
-  countries <- c("US", "GB", "CA", "DE", "IN", "AU", "CN", "RU", "PH", "FR")
-  country_breakdown <- data[,j = list(events = .N), by = c("date", "country")]
-  others <- data.table::data.table(date = date,
-                                   country = "Other",
-                                   events = 
sum(country_breakdown$events[!country_breakdown$country %in% countries]))
-  country_data <- rbind(country_breakdown[country_breakdown$country %in% 
countries,], others)
-  country_data <- country_data[order(country_data$country, decreasing = TRUE),]
-  country_data$country <- c("United States", "Russia", "Philippines", "Other", 
"India",
-                            "United Kingdom", "France", "Germany", "China", 
"Canada",
-                            "Australia")
+  # Generate by-country breakdown with regional data for US
+  regions <- data.frame(abb = paste0("US:", as.character(state.abb)),
+                        region = paste0("U.S. (", as.character(state.region), 
")"),
+                        state = state.name,
+                        stringsAsFactors = FALSE)
+  regions$region[regions$region == "U.S. (North Central)"] <- "U.S. (Midwest)"
+  regions$region[state.division == "Pacific"] <- "U.S. (Pacific)" # see 
https://phabricator.wikimedia.org/T136257#2399411
+  countries <- data.frame(abb = c(regions$abb, "GB", "CA",
+                                  "DE", "IN", "AU", "CN",
+                                  "RU", "PH", "FR"),
+                          name = c(regions$region, "United Kingdom", "Canada",
+                                   "Germany", "India", "Australia", "China",
+                                   "Russia", "Philippines", "France"),
+                          stringsAsFactors = FALSE)
+  ## BEGIN PROTOTYPE
+  # # This can be used to test out the processing code before 
https://gerrit.wikimedia.org/r/#/c/295572/ is merged.
+  # data$country[data$country == "US" & !is.na(data$country)] <- 
sample(unique(regions$abb), sum(data$country == "US", na.rm = TRUE), replace = 
TRUE)
+  ## END PROTOTYPE
+  country_data <- as.data.frame(data) %>%
+    dplyr::mutate(country = ifelse(country %in% countries$abb, country, 
"Other")) %>%
+    dplyr::left_join(countries, by = c("country" = "abb")) %>%
+    dplyr::mutate(name = ifelse(is.na(name), "Other", name)) %>%
+    dplyr::select(-country) %>% dplyr::rename(country = name) %>%
+    dplyr::group_by(country) %>%
+    dplyr::summarize(events = n()) %>%
+    dplyr::mutate(date = date) %>%
+    dplyr::select(c(date, country, events)) %>%
+    dplyr::arrange(desc(country))
 
   # Get user agent data
   wmf::set_proxies() # To allow for the latest YAML to be retrieved.

-- 
To view, visit https://gerrit.wikimedia.org/r/295583
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia1d27922b911a3efd3106ac165ffda689e685261
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to