Bearloga has submitted this change and it was merged. Change subject: Generate invoke source and click position aggregates ......................................................................
Generate invoke source and click position aggregates To be added to search metrics dashboard Bug: T143726 Change-Id: Ib4fde72c1d3a898eab9256be89409790ff34afcd --- M search/app.R 1 file changed, 34 insertions(+), 15 deletions(-) Approvals: Bearloga: Verified; Looks good to me, approved diff --git a/search/app.R b/search/app.R index c82f0bf..553309c 100644 --- a/search/app.R +++ b/search/app.R @@ -6,7 +6,7 @@ main <- function(date = NULL){ # Retrieve data using the query builder in ./common.R - data <- rbind(wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS date, + data1 <- wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS date, CASE event_action WHEN 'click' THEN 'clickthroughs' WHEN 'start' THEN 'search sessions' WHEN 'results' THEN 'Result pages opened' END AS action, @@ -14,29 +14,46 @@ userAgent", date = date, table = "MobileWikiAppSearch_10641988", - conditionals = "event_action IN ('click','start','results')"), - wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS date, + conditionals = "event_action IN ('click','start','results')") + data2 <- wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS date, CASE event_action WHEN 'click' THEN 'clickthroughs' WHEN 'start' THEN 'search sessions' WHEN 'results' THEN 'Result pages opened' END AS action, event_timeToDisplayResults AS load_time, - userAgent", + userAgent, + event_source AS invoke_source, + event_position AS click_position", date = date, table = "MobileWikiAppSearch_15729321", - conditionals = "event_action IN ('click','start','results')")) + conditionals = "event_action IN ('click','start','results')") # See https://phabricator.wikimedia.org/T143447 for more info on why we're combining # events from these two different schema revisions. - data <- data.table::as.data.table(data) - data$date <- lubridate::ymd(data$date) - data$platform[grepl(x = data$userAgent, pattern = "Android", fixed = TRUE)] <- "Android" - data$platform[is.na(data$platform)] <- "iOS" - data <- data[,userAgent := NULL,] - + data1 <- data.table::as.data.table(rbind(data1, data2[,!(names(data2) %in% c("invoke_source", "click_position"))])) + data1$date <- lubridate::ymd(data1$date) + data1$platform[grepl(x = data1$userAgent, pattern = "Android", fixed = TRUE)] <- "Android" + data1$platform[is.na(data1$platform)] <- "iOS" + data1 <- data1[,userAgent := NULL,] + + data2 <- data.table::as.data.table(data2) + data2$date <- lubridate::ymd(data2$date) + data2$platform[grepl(x = data2$userAgent, pattern = "Android", fixed = TRUE)] <- "Android" + data2$platform[is.na(data2$platform)] <- "iOS" + data2 <- data2[,c("userAgent","load_time"):=NULL,] + # Generate aggregates - app_results <- data[,j = list(events = .N), by = c("date", "action", "platform")] - + app_results <- data1[,j = list(events = .N), by = c("date", "action", "platform")] + data2$click_position <- as.numeric(data2$click_position)+1 + data2$click_position <- ifelse(data2$click_position>=10 & data2$click_position<20, '10-19', + ifelse(data2$click_position>=20 & data2$click_position <=100, '20-100', + ifelse(data2$click_position>100, '100+', data2$click_position))) + position_count <- data2[action=='clickthroughs', j = list(events = .N), by = c("date","click_position")] + source_count <- data2[action=='search sessions', j = list(events = .N), by = c("date","invoke_source")] + source_count$invoke_source <- dplyr::recode(source_count$invoke_source, + '0'='Main article toolbar', '1'='Widget', '2'='Share intent','3'='Process-text intent', + '4'='Floating search bar in the feed', '5'='Voice search query') + # Produce load time data - load_times <- data[data$action == "Result pages opened", { + load_times <- data1[data1$action == "Result pages opened", { output <- data.frame(t(quantile(load_time, c(0.5, 0.95, 0.99)))) names(output) <- c("Median", "95th percentile", "99th Percentile") output @@ -45,6 +62,8 @@ # Write out wmf::write_conditional(app_results, file.path(base_path, "app_event_counts.tsv")) wmf::write_conditional(load_times, file.path(base_path, "app_load_times.tsv")) - + wmf::write_conditional(source_count, file.path(base_path, "invoke_source_counts.tsv")) + wmf::write_conditional(position_count, file.path(base_path, "click_position_counts.tsv")) + return(invisible()) } -- To view, visit https://gerrit.wikimedia.org/r/308099 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib4fde72c1d3a898eab9256be89409790ff34afcd Gerrit-PatchSet: 4 Gerrit-Project: wikimedia/discovery/golden Gerrit-Branch: master Gerrit-Owner: Chelsyx <c...@wikimedia.org> Gerrit-Reviewer: Bearloga <mpo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits