Bearloga has submitted this change and it was merged.

Change subject: Generate invoke source and click position aggregates
......................................................................


Generate invoke source and click position aggregates

To be added to search metrics dashboard

Bug: T143726
Change-Id: Ib4fde72c1d3a898eab9256be89409790ff34afcd
---
M search/app.R
1 file changed, 34 insertions(+), 15 deletions(-)

Approvals:
  Bearloga: Verified; Looks good to me, approved



diff --git a/search/app.R b/search/app.R
index c82f0bf..553309c 100644
--- a/search/app.R
+++ b/search/app.R
@@ -6,7 +6,7 @@
 main <- function(date = NULL){
 
   # Retrieve data using the query builder in ./common.R
-  data <- rbind(wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) 
AS date,
+  data1 <- wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS 
date,
                            CASE event_action WHEN 'click' THEN 'clickthroughs'
                            WHEN 'start' THEN 'search sessions'
                            WHEN 'results' THEN 'Result pages opened' END AS 
action,
@@ -14,29 +14,46 @@
                            userAgent",
                            date = date,
                            table = "MobileWikiAppSearch_10641988",
-                           conditionals = "event_action IN 
('click','start','results')"),
-                wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) 
AS date,
+                           conditionals = "event_action IN 
('click','start','results')")
+  data2 <- wmf::build_query(fields = "SELECT SUBSTRING(timestamp, 1, 8) AS 
date,
                            CASE event_action WHEN 'click' THEN 'clickthroughs'
                            WHEN 'start' THEN 'search sessions'
                            WHEN 'results' THEN 'Result pages opened' END AS 
action,
                            event_timeToDisplayResults AS load_time,
-                           userAgent",
+                           userAgent,
+                                                  event_source AS 
invoke_source,
+                                                  event_position AS 
click_position",
                            date = date,
                            table = "MobileWikiAppSearch_15729321",
-                           conditionals = "event_action IN 
('click','start','results')"))
+                           conditionals = "event_action IN 
('click','start','results')")
   # See https://phabricator.wikimedia.org/T143447 for more info on why we're 
combining
   # events from these two different schema revisions.
-  data <- data.table::as.data.table(data)
-  data$date <- lubridate::ymd(data$date)
-  data$platform[grepl(x = data$userAgent, pattern = "Android", fixed = TRUE)] 
<- "Android"
-  data$platform[is.na(data$platform)] <- "iOS"
-  data <- data[,userAgent := NULL,]
-
+  data1 <- data.table::as.data.table(rbind(data1, data2[,!(names(data2) %in% 
c("invoke_source", "click_position"))]))
+  data1$date <- lubridate::ymd(data1$date)
+  data1$platform[grepl(x = data1$userAgent, pattern = "Android", fixed = 
TRUE)] <- "Android"
+  data1$platform[is.na(data1$platform)] <- "iOS"
+  data1 <- data1[,userAgent := NULL,]
+  
+  data2 <- data.table::as.data.table(data2)
+  data2$date <- lubridate::ymd(data2$date)
+  data2$platform[grepl(x = data2$userAgent, pattern = "Android", fixed = 
TRUE)] <- "Android"
+  data2$platform[is.na(data2$platform)] <- "iOS"
+  data2 <- data2[,c("userAgent","load_time"):=NULL,]
+  
   # Generate aggregates
-  app_results <- data[,j = list(events = .N), by = c("date", "action", 
"platform")]
-
+  app_results <- data1[,j = list(events = .N), by = c("date", "action", 
"platform")]  
+  data2$click_position <- as.numeric(data2$click_position)+1
+  data2$click_position <- ifelse(data2$click_position>=10 & 
data2$click_position<20, '10-19', 
+                                ifelse(data2$click_position>=20 & 
data2$click_position <=100, '20-100',
+                                                               
ifelse(data2$click_position>100, '100+', data2$click_position)))                
                                                
+  position_count <- data2[action=='clickthroughs', j = list(events = .N), by = 
c("date","click_position")]
+  source_count <- data2[action=='search sessions', j = list(events = .N), by = 
c("date","invoke_source")]
+  source_count$invoke_source <- dplyr::recode(source_count$invoke_source, 
+                                           '0'='Main article toolbar', 
'1'='Widget', '2'='Share intent','3'='Process-text intent',
+                                                                               
'4'='Floating search bar in the feed', '5'='Voice search query')
+  
   # Produce load time data
-  load_times <- data[data$action == "Result pages opened", {
+  load_times <- data1[data1$action == "Result pages opened", {
     output <- data.frame(t(quantile(load_time, c(0.5, 0.95, 0.99))))
     names(output) <- c("Median", "95th percentile", "99th Percentile")
     output
@@ -45,6 +62,8 @@
   # Write out
   wmf::write_conditional(app_results, file.path(base_path, 
"app_event_counts.tsv"))
   wmf::write_conditional(load_times, file.path(base_path, 
"app_load_times.tsv"))
-  
+  wmf::write_conditional(source_count, file.path(base_path, 
"invoke_source_counts.tsv"))
+  wmf::write_conditional(position_count, file.path(base_path, 
"click_position_counts.tsv"))  
+    
   return(invisible())
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/308099
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib4fde72c1d3a898eab9256be89409790ff34afcd
Gerrit-PatchSet: 4
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: Chelsyx <c...@wikimedia.org>
Gerrit-Reviewer: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to