Bearloga has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/276656

Change subject: Update reference to TestSearchSatisfaction2 table
......................................................................

Update reference to TestSearchSatisfaction2 table

+ Make sure timestamps are sorted before being processed by dwell_time

Change-Id: I5a25e0f95e3638a1aecaebfb967e275b99285f13
---
M search/LDN.R
M search/dwelltime.R
2 files changed, 9 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden 
refs/changes/56/276656/1

diff --git a/search/LDN.R b/search/LDN.R
index 2172eba..51de332 100644
--- a/search/LDN.R
+++ b/search/LDN.R
@@ -4,7 +4,7 @@
 # Per-file config:
 base_path <- paste0(write_root, "search/")
 
-main <- function(date = NULL, table = "TestSearchSatisfaction2_14098806") {
+main <- function(date = NULL, table = "TestSearchSatisfaction2_15357244") {
   
   checkins <- c(0, 10, 20, 30, 40, 50, 60, 90, 120, 150, 180, 210, 240, 300, 
360, 420)
   # ^ this will be used for figuring out the interval bounds for each check-in
@@ -13,18 +13,14 @@
   data <- wmf::build_query(fields = "SELECT * ",
                            date = date,
                            table = table,
-                           conditionals = "event_subTest IS NULL")
+                           conditionals = "event_subTest IS NULL
+                                           AND event_source = 'fulltext'")
   data <- data.table::as.data.table(data)
   data$timestamp <- lubridate::ymd_hms(data$timestamp)
   
-  # Backwards-compatibility:
-  if ( table == "TestSearchSatisfaction2_14098806" ) {
-    data.table::setnames(data, "event_pageViewId", "event_pageId")
-  }
-  
   # Treat each individual search session as its own thing, rather than 
belonging
   #   to a set of other search sessions by the same user.
-  page_visits <- plyr::ddply(data, .(event_searchSessionId, event_pageId),
+  page_visits <- plyr::ddply(data, .(event_searchSessionId, event_pageViewId),
                              function(session) {
                                if (!all(c('visitPage', 'checkin') %in% 
session$event_action)) {
                                  return(NULL)
diff --git a/search/dwelltime.R b/search/dwelltime.R
index 93c8e79..4c9eb07 100644
--- a/search/dwelltime.R
+++ b/search/dwelltime.R
@@ -1,7 +1,7 @@
 # Per-file config:
 base_path <- paste0(write_root, "search/")
 
-main <- function(date = NULL, table = "TestSearchSatisfaction2_14098806"){
+main <- function(date = NULL, table = "TestSearchSatisfaction2_15357244"){
   
   # Retrieve data
   data <- wmf::build_query(fields = "
@@ -9,8 +9,11 @@
                            timestamp",
                            date = date,
                            table = table,
-                           conditionals = "event_action 
IN('searchResultPage','visitPage') AND event_subTest IS NULL")
+                           conditionals = "event_action 
IN('searchResultPage','visitPage')
+                                           AND event_subTest IS NULL
+                                           AND event_source = 'fulltext'")
   data$timestamp <- lubridate::ymd_hms(data$timestamp)
+  data <- data[order(data$session_id, data$timestamp), ]
   
   # Generate the data
   if(is.null(date)){

-- 
To view, visit https://gerrit.wikimedia.org/r/276656
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a25e0f95e3638a1aecaebfb967e275b99285f13
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to