Bearloga has uploaded a new change for review. https://gerrit.wikimedia.org/r/295583
Change subject: Group US states into regions ...................................................................... Group US states into regions This patch takes the new geodata from Wikipedia Portal event logs (where we now record the U.S. visitor's state) and groups the states into five regions: Northeast, Midwest, West, South, and Pacific. Bug: T136257 Depends on: Id067003604e91332d50d53b42a883a8d83b8878b Change-Id: Ia1d27922b911a3efd3106ac165ffda689e685261 --- M portal/portal.R 1 file changed, 28 insertions(+), 11 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden refs/changes/83/295583/1 diff --git a/portal/portal.R b/portal/portal.R index be7d8a6..de970ee 100644 --- a/portal/portal.R +++ b/portal/portal.R @@ -43,17 +43,34 @@ data <- data[order(data$type, decreasing = FALSE),] breakdown_data <- data[,j = list(events = .N), by = c("date","section_used")] - # Generate by-country breakdown - countries <- c("US", "GB", "CA", "DE", "IN", "AU", "CN", "RU", "PH", "FR") - country_breakdown <- data[,j = list(events = .N), by = c("date", "country")] - others <- data.table::data.table(date = date, - country = "Other", - events = sum(country_breakdown$events[!country_breakdown$country %in% countries])) - country_data <- rbind(country_breakdown[country_breakdown$country %in% countries,], others) - country_data <- country_data[order(country_data$country, decreasing = TRUE),] - country_data$country <- c("United States", "Russia", "Philippines", "Other", "India", - "United Kingdom", "France", "Germany", "China", "Canada", - "Australia") + # Generate by-country breakdown with regional data for US + regions <- data.frame(abb = paste0("US:", as.character(state.abb)), + region = paste0("U.S. (", as.character(state.region), ")"), + state = state.name, + stringsAsFactors = FALSE) + regions$region[regions$region == "U.S. (North Central)"] <- "U.S. (Midwest)" + regions$region[state.division == "Pacific"] <- "U.S. (Pacific)" # see https://phabricator.wikimedia.org/T136257#2399411 + countries <- data.frame(abb = c(regions$abb, "GB", "CA", + "DE", "IN", "AU", "CN", + "RU", "PH", "FR"), + name = c(regions$region, "United Kingdom", "Canada", + "Germany", "India", "Australia", "China", + "Russia", "Philippines", "France"), + stringsAsFactors = FALSE) + ## BEGIN PROTOTYPE + # # This can be used to test out the processing code before https://gerrit.wikimedia.org/r/#/c/295572/ is merged. + # data$country[data$country == "US" & !is.na(data$country)] <- sample(unique(regions$abb), sum(data$country == "US", na.rm = TRUE), replace = TRUE) + ## END PROTOTYPE + country_data <- as.data.frame(data) %>% + dplyr::mutate(country = ifelse(country %in% countries$abb, country, "Other")) %>% + dplyr::left_join(countries, by = c("country" = "abb")) %>% + dplyr::mutate(name = ifelse(is.na(name), "Other", name)) %>% + dplyr::select(-country) %>% dplyr::rename(country = name) %>% + dplyr::group_by(country) %>% + dplyr::summarize(events = n()) %>% + dplyr::mutate(date = date) %>% + dplyr::select(c(date, country, events)) %>% + dplyr::arrange(desc(country)) # Get user agent data wmf::set_proxies() # To allow for the latest YAML to be retrieved. -- To view, visit https://gerrit.wikimedia.org/r/295583 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia1d27922b911a3efd3106ac165ffda689e685261 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/golden Gerrit-Branch: master Gerrit-Owner: Bearloga <mpo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits