Bearloga has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/335746 )
Change subject: Point to new datasets ...................................................................... Point to new datasets Change-Id: Id384962d485931ebcb904e491ae0bf641d38c9bf --- M CHANGELOG.md M server.R M utils.R 3 files changed, 272 insertions(+), 249 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow refs/changes/46/335746/1 diff --git a/CHANGELOG.md b/CHANGELOG.md index efb55e8..f01c6ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ # Change Log (Patch Notes) + All notable changes to this project will be documented in this file. +## 2017/02/02 +- Updated to work with new datasets generated by Reportupdater-based golden ([T150915](https://phabricator.wikimedia.org/T150915)) + +## 2016 +- Added PaulScore ([T144424](https://phabricator.wikimedia.org/T144424)) +- Added ZRR broken up by language-project pairs ([T126244](https://phabricator.wikimedia.org/T126244)) +- Added Invoke Source and Click Positions for Android ([T143726](https://phabricator.wikimedia.org/T143726)) +- Added visited result survival ([T113297](https://phabricator.wikimedia.org/T113297)) +- Added dwell-time & user engagement metrics ([T113297](https://phabricator.wikimedia.org/T113297), [T113513](https://phabricator.wikimedia.org/T113513), [Change 240593](https://gerrit.wikimedia.org/r/#/c/240593/)) + ## 2015/11/10 - Updated the readme - Moved certain code blocks to **polloi** for use in other dashboards diff --git a/server.R b/server.R index 62678bd..343dd5f 100644 --- a/server.R +++ b/server.R @@ -1,9 +1,9 @@ -library(shiny) -library(shinydashboard) -library(dygraphs) -library(sparkline) -library(DT) -library(data.table) +suppressPackageStartupMessages({ + library(shiny) + library(shinydashboard) + library(dygraphs) + library(sparkline) +}) source("utils.R") @@ -162,7 +162,7 @@ ## App value boxes output$app_event_searches <- renderValueBox( valueBox( - value = android_dygraph_means[3], + value = ios_dygraph_means["search sessions"] + android_dygraph_means["search sessions"], subtitle = "Search sessions per day", icon = icon("search"), color = "green" @@ -171,7 +171,7 @@ output$app_event_resultsets <- renderValueBox( valueBox( - value = android_dygraph_means[2], + value = ios_dygraph_means["Result pages opened"] + android_dygraph_means["Result pages opened"], subtitle = "Result sets per day", icon = icon("list", lib = "glyphicon"), color = "green" @@ -180,7 +180,7 @@ output$app_event_clickthroughs <- renderValueBox( valueBox( - value = android_dygraph_means[1], + value = ios_dygraph_means["clickthroughs"] + android_dygraph_means["clickthroughs"], subtitle = "Clickthroughs per day", icon = icon("hand-up", lib = "glyphicon"), color = "green" @@ -240,36 +240,35 @@ ## API plots output$cirrus_aggregate <- renderDygraph({ - split_dataset$cirrus[, c(1, 3)] %>% + split_dataset$cirrus %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_fulltext_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Full-text via API usage by day", legend_name = "Searches") %>% dyRangeSelector }) output$open_aggregate <- renderDygraph({ - split_dataset$open[, c(1, 3)] %>% + split_dataset$open %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_open_search)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "OpenSearch API usage by day", legend_name = "Searches") %>% dyRangeSelector }) output$geo_aggregate <- renderDygraph({ - split_dataset$geo[, c(1, 3)] %>% + split_dataset$geo %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_geo_search)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Geo Search API usage by day", legend_name = "Searches") %>% dyRangeSelector }) output$language_aggregate <- renderDygraph({ - split_dataset$language[, c(1, 3)] %>% + split_dataset$language %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_language_search)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Language Search API usage by day", legend_name = "Searches") %>% dyRangeSelector }) output$prefix_aggregate <- renderDygraph({ - split_dataset$prefix[, c(1, 3)] %>% + split_dataset$prefix %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_prefix_search)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Prefix Search API usage by day", legend_name = "Searches") %>% dyRangeSelector @@ -277,7 +276,8 @@ # Failure plots output$failure_rate_plot <- renderDygraph({ - polloi::data_select(input$failure_rate_automata, failure_data_with_automata, failure_data_no_automata) %>% + input$failure_rate_automata %>% + polloi::data_select(failure_data_with_automata, failure_data_no_automata) %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", title = "Zero Results Rate, by day", legend_name = "ZRR") %>% @@ -288,7 +288,8 @@ }) output$failure_rate_change_plot <- renderDygraph({ - polloi::data_select(input$failure_rate_automata, failure_roc_with_automata, failure_roc_no_automata) %>% + input$failure_rate_automata %>% + polloi::data_select(failure_roc_with_automata, failure_roc_no_automata) %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Change", title = "Zero Results rate change, by day", legend_name = "Change") %>% dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", valueFormatter = "function(x) { return Math.round(x, 3) + '%'; }") %>% @@ -300,8 +301,7 @@ xts_data <- input$failure_breakdown_automata %>% polloi::data_select(failure_breakdown_with_automata, failure_breakdown_no_automata) %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_breakdown)) %>% - - { xts(.[, -1], order.by = .$date) } + { xts::xts(.[, -1], order.by = .$date) } xts_data %>% dygraph(xlab = "Date", ylab = "Zero Results Rate", main = "Zero result rate by search type") %>% dyLegend(width = 600, show = "always", labelsDiv = "failure_breakdown_plot_legend") %>% @@ -328,7 +328,8 @@ }) output$suggestion_dygraph_plot <- renderDygraph({ - polloi::data_select(input$failure_suggestions_automata, suggestion_with_automata, suggestion_no_automata) %>% + input$failure_suggestions_automata %>% + polloi::data_select(suggestion_with_automata, suggestion_no_automata) %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_suggestions)) %>% polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate", title = "Zero Result Rates with Search Suggestions") %>% dyAxis("y", axisLabelFormatter = "function(x) { return x + '%'; }", valueFormatter = "function(x) { return x + '%'; }") %>% @@ -384,7 +385,8 @@ }) output$failure_langproj_plot <- renderDygraph({ - polloi::data_select(input$failure_langproj_automata, langproj_with_automata, langproj_no_automata) %>% + input$failure_langproj_automata %>% + polloi::data_select(langproj_with_automata, langproj_no_automata) %>% aggregate_wikis(input$language_selector, input$project_selector) %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_langproj)) %>% polloi::make_dygraph(xlab = "", ylab = "Zero Results Rate", title = "Zero result rate by language and project") %>% @@ -417,7 +419,7 @@ temp <- dates %>% as.character("%e") %>% as.numeric %>% - sapply(toOrdinal) %>% + sapply(toOrdinal::toOrdinal) %>% sub("([a-z]{2})", "<sup>\\1</sup>", .) %>% paste0(as.character(dates, "%A, %b "), .) }, @@ -426,7 +428,7 @@ temp <- dates %>% as.character("%e") %>% as.numeric %>% - sapply(toOrdinal) %>% + sapply(toOrdinal::toOrdinal) %>% sub("([a-z]{2})", "<sup>\\1</sup>", .) %>% paste0(as.character(dates, "%b "), .) %>% { @@ -438,7 +440,7 @@ temp <- dates %>% as.character("%e") %>% as.numeric %>% - sapply(toOrdinal) %>% + sapply(toOrdinal::toOrdinal) %>% sub("([a-z]{2})", "<sup>\\1</sup>", .) %>% paste0(as.character(dates, "%b "), .) %>% { @@ -450,7 +452,7 @@ return(dates %>% as.character("%e") %>% as.numeric %>% - sapply(toOrdinal) %>% + sapply(toOrdinal::toOrdinal) %>% sub("([a-z]{2})", "<sup>\\1</sup>", .) %>% paste0(as.character(dates, "%B "), .) %>% paste0(collapse = "-") %>% @@ -485,8 +487,7 @@ output$kpi_summary_box_zero_results <- renderValueBox({ date_range <- input$kpi_summary_date_range_selector if (date_range == "all") return(div("Zero results rate")) - x <- polloi::subset_by_date_range(failure_data_with_automata, from = start_date(date_range), to = Sys.Date() - 1) - x <- transform(x, Rate = rate)$Rate + x <- polloi::subset_by_date_range(failure_data_with_automata, from = start_date(date_range), to = Sys.Date() - 1)$rate if (date_range == "quarterly") { return(valueBox(subtitle = "Zero results rate", color = "orange", value = sprintf("%.1f%%", median(x)))) @@ -497,7 +498,7 @@ return(valueBox( subtitle = sprintf("Zero results rate (%.1f%%)", z), value = sprintf("%.1f%%", y2), - icon = cond_icon(z > 0), color = polloi::cond_color(z > 0, "red") + icon = polloi::cond_icon(z > 0), color = polloi::cond_color(z > 0, "red") )) } return(valueBox(subtitle = "Zero results rate (no change)", @@ -510,9 +511,9 @@ if (date_range == "all") return(div("API usage")) x <- split_dataset %>% lapply(polloi::subset_by_date_range, from = start_date(date_range), to = Sys.Date() - 1) %>% - lapply(function(x) return(x$events)) %>% - do.call(cbind, .) %>% - transform(total = cirrus + geo + language + open + prefix) %>% + dplyr::bind_rows(.id = "api") %>% + dplyr::group_by(date) %>% + dplyr::summarize(total = sum(calls)) %>% { .$total } if (date_range == "quarterly") { return(valueBox(subtitle = "API usage", value = polloi::compress(median(x), 0), color = "orange")) @@ -535,17 +536,17 @@ x <- polloi::subset_by_date_range(augmented_clickthroughs, from = start_date(date_range), to = Sys.Date() - 1) if (date_range == "quarterly") { return(valueBox(subtitle = "User engagement", color = "orange", - value = sprintf("%.1f%%", median(x$user_engagement)))) + value = sprintf("%.1f%%", median(x$`User engagement`)))) } - y1 <- median(polloi::half(x$user_engagement)) - y2 <- median(polloi::half(x$user_engagement, FALSE)) + y1 <- median(polloi::half(x$`User engagement`)) + y2 <- median(polloi::half(x$`User engagement`, FALSE)) z <- 100 * (y2 - y1)/y1 if (!is.na(z)) { if (abs(z) > 0) { return(valueBox( subtitle = sprintf("User engagement (%.1f%%)", z), value = sprintf("%.1f%%", y2), - icon = cond_icon(z > 0), color = polloi::cond_color(z > 0, "green") + icon = polloi::cond_icon(z > 0), color = polloi::cond_color(z > 0, "green") )) } return(valueBox(subtitle = "User engagement (no change)", @@ -556,16 +557,13 @@ ## KPI Sparklines output$sparkline_load_time <- sparkline:::renderSparkline({ - if(input$kpi_summary_date_range_selector == "all"){ + if (input$kpi_summary_date_range_selector == "all") { output_sl <- list(desktop_load_data, mobile_load_data, android_load_data, ios_load_data) - } else{ + } else { output_sl <- list(desktop_load_data, mobile_load_data, android_load_data, ios_load_data) %>% lapply(polloi::subset_by_date_range, from = Sys.Date() - 91, to = Sys.Date() - 1) } output_sl <- output_sl %>% - lapply(function(platform_load_data) { - platform_load_data[, c("date", "Median")] - }) %>% dplyr::bind_rows(.id = "platform") %>% dplyr::group_by(date) %>% dplyr::summarize(Median = median(Median)) %>% @@ -595,9 +593,9 @@ return(sparkline::spk_composite(sl1, sl2)) }) output$sparkline_zero_results <- sparkline:::renderSparkline({ - if(input$kpi_summary_date_range_selector == "all"){ + if (input$kpi_summary_date_range_selector == "all") { output_sl <- failure_data_with_automata - } else{ + } else { output_sl <- failure_data_with_automata %>% polloi::subset_by_date_range(from = Sys.Date() - 91, to = Sys.Date() - 1) } @@ -611,11 +609,11 @@ chartRangeMin = min(output_sl), chartRangeMax = max(output_sl), highlightLineColor = 'orange', highlightSpotColor = 'orange') # highlight selected date range - if (input$kpi_summary_date_range_selector == "weekly"){ + if (input$kpi_summary_date_range_selector == "weekly") { output_highlight <- c(rep(NA, length(output_sl)-7), output_sl[(length(output_sl)-6):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "monthly"){ + } else if (input$kpi_summary_date_range_selector == "monthly") { output_highlight <- c(rep(NA, length(output_sl)-30), output_sl[(length(output_sl)-29):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "quarterly"){ + } else if (input$kpi_summary_date_range_selector == "quarterly") { output_highlight <- output_sl } else { return(sl1) @@ -628,19 +626,16 @@ return(sparkline::spk_composite(sl1, sl2)) }) output$sparkline_api_usage <- sparkline:::renderSparkline({ - if(input$kpi_summary_date_range_selector == "all"){ + if (input$kpi_summary_date_range_selector == "all") { output_sl <- split_dataset - } else{ + } else { output_sl <- split_dataset %>% lapply(polloi::subset_by_date_range, from = Sys.Date() - 91, to = Sys.Date() - 1) } output_sl <- output_sl %>% - lapply(function(platform_load_data) { - platform_load_data[, c("date", "events")] - }) %>% dplyr::bind_rows(.id = "api") %>% dplyr::group_by(date) %>% - dplyr::summarize(total = sum(events)) %>% + dplyr::summarize(total = sum(calls)) %>% dplyr::select(total) %>% unlist(use.names = FALSE) sl1 <- sparkline::sparkline(values = output_sl, type = "line", @@ -649,11 +644,11 @@ chartRangeMin = min(output_sl), chartRangeMax = max(output_sl), highlightLineColor = 'orange', highlightSpotColor = 'orange') # highlight selected date range - if (input$kpi_summary_date_range_selector == "weekly"){ + if (input$kpi_summary_date_range_selector == "weekly") { output_highlight <- c(rep(NA, length(output_sl)-7), output_sl[(length(output_sl)-6):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "monthly"){ + } else if (input$kpi_summary_date_range_selector == "monthly") { output_highlight <- c(rep(NA, length(output_sl)-30), output_sl[(length(output_sl)-29):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "quarterly"){ + } else if (input$kpi_summary_date_range_selector == "quarterly") { output_highlight <- output_sl } else { return(sl1) @@ -666,14 +661,14 @@ return(sparkline::spk_composite(sl1, sl2)) }) output$sparkline_augmented_clickthroughs <- sparkline:::renderSparkline({ - if(input$kpi_summary_date_range_selector == "all"){ + if(input$kpi_summary_date_range_selector == "all") { output_sl <- augmented_clickthroughs - } else{ + } else { output_sl <- augmented_clickthroughs %>% polloi::subset_by_date_range(from = Sys.Date() - 91, to = Sys.Date() - 1) } output_sl <- output_sl %>% - dplyr::select(user_engagement) %>% + dplyr::select(`User engagement`) %>% unlist(use.names = FALSE) %>% round(2) sl1 <- sparkline::sparkline(values = output_sl, type = "line", @@ -682,11 +677,11 @@ chartRangeMin = min(output_sl), chartRangeMax = max(output_sl), highlightLineColor = 'orange', highlightSpotColor = 'orange') # highlight selected date range - if (input$kpi_summary_date_range_selector == "weekly"){ + if (input$kpi_summary_date_range_selector == "weekly") { output_highlight <- c(rep(NA, length(output_sl)-7), output_sl[(length(output_sl)-6):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "monthly"){ + } else if (input$kpi_summary_date_range_selector == "monthly") { output_highlight <- c(rep(NA, length(output_sl)-30), output_sl[(length(output_sl)-29):length(output_sl)]) - } else if (input$kpi_summary_date_range_selector == "quarterly"){ + } else if (input$kpi_summary_date_range_selector == "quarterly") { output_highlight <- output_sl } else { return(sl1) @@ -707,12 +702,10 @@ all = NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91) load_times <- list(desktop_load_data, mobile_load_data, android_load_data, ios_load_data) %>% { - if (is.na(start_date)) { - lapply(., function(dataset) { - return(dataset[!duplicated(dataset$date, dataset$event_type, fromLast = TRUE), ]) - }) - } else { + if (!is.na(start_date)) { lapply(., polloi::subset_by_date_range, from = start_date, to = Sys.Date() - 1) + } else { + . } } %>% lapply(function(data_tail) return(data_tail[, c('date', 'Median')])) %>% @@ -744,10 +737,10 @@ start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all = NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91) zrr <- failure_data_with_automata %>% { - if (is.na(start_date)) { - . - } else { + if (!is.na(start_date)) { polloi::subset_by_date_range(., from = start_date, to = Sys.Date()) + } else { + . } } %>% transform(`Rate` = rate) @@ -787,25 +780,23 @@ start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all = NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91) api_usage <- split_dataset %>% { - if (is.na(start_date)) { - lapply(., function(dataset) { - return(dataset[!duplicated(dataset$date, dataset$event_type, fromLast = TRUE), ]) - }) - } else { + if (!is.na(start_date)) { lapply(., polloi::subset_by_date_range, from = start_date, to = Sys.Date() - 1) + } else { + . } } %>% - dplyr::bind_rows() %>% - tidyr::spread("event_type", "events") %>% - as.data.frame + dplyr::bind_rows(.id = "api") %>% + tidyr::spread("api", "calls") if ( input$kpi_api_usage_series_include_open ) { - api_usage <- transform(api_usage, all = cirrus + geo + language + open + prefix) + api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language + open + prefix) } else { - api_usage <- transform(api_usage, all = cirrus + geo + language + prefix) + api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language + prefix) } if ( input$kpi_api_usage_series_data == "raw" ) { - api_usage %<>% polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) - api_usage <- xts::xts(api_usage[, -1], api_usage[, 1]) + api_usage %<>% + polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) %>% + { xts::xts(.[, -1], order.by = .$date) } if (!input$kpi_api_usage_series_include_open) { colnames(api_usage)[6] <- "all except open" } @@ -821,16 +812,17 @@ dyCSS(css = system.file("custom.css", package = "polloi")) %>% dyRangeSelector) } - api_usage_change <- transform(api_usage, - cirrus = polloi::percent_change(cirrus), - geo = polloi::percent_change(geo), - language = polloi::percent_change(language), - open = polloi::percent_change(open), - prefix = polloi::percent_change(prefix), - all = polloi::percent_change(all)) %>% - { .[-1, ] } - api_usage_change %<>% polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) - api_usage_change <- xts::xts(api_usage_change[, -1], api_usage_change[, 1]) + api_usage_change <- dplyr::mutate( + api_usage, + cirrus = polloi::percent_change(cirrus), + geo = polloi::percent_change(geo), + language = polloi::percent_change(language), + open = polloi::percent_change(open), + prefix = polloi::percent_change(prefix), + all = polloi::percent_change(all)) %>% + { .[-1, ] } %>% + polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) %>% + { xts::xts(.[, -1], .$date) } if (!input$kpi_api_usage_series_include_open) colnames(api_usage_change)[6] <- "all except open" return(dygraph(api_usage_change, main = "Day-to-day % change over time", @@ -846,10 +838,10 @@ start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all = NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91) smoothed_data <- augmented_clickthroughs %>% { - if (is.na(start_date)) { - . - } else { + if (!is.na(start_date)) { polloi::subset_by_date_range(., from = start_date, to = Sys.Date()) + } else { + . } } %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_augmented_clickthroughs)) @@ -862,56 +854,53 @@ dyEvent(as.Date("2016-07-12"), "A (schema switch)", labelLoc = "bottom") }) - output$monthly_metrics_tbl <- DT::renderDataTable( - { - temp <- data.frame( + output$monthly_metrics_tbl <- DT::renderDataTable({ + temp <- data.frame( KPI = c("Load time", "Zero results rate", "API Usage", "User engagement"), - Units = c("ms", "%", "", "%") + Units = c("ms", "%", "", "%"), + stringsAsFactors = FALSE ) prev_month <- as.Date(paste(input$monthy_metrics_year, which(month.name == input$monthy_metrics_month), "1", sep = "-")) prev_prev_month <- prev_month - months(1) prev_year <- prev_month - months(12) - smoothed_load_times <- list(Desktop = desktop_load_data, - Mobile = mobile_load_data, - Android = android_load_data, - iOS = ios_load_data) %>% - lapply(function(platform_load_data) { - platform_load_data[, c("date", "Median")] - }) %>% + smoothed_load_times <- list( + Desktop = desktop_load_data, + Mobile = mobile_load_data, + Android = android_load_data, + iOS = ios_load_data + ) %>% dplyr::bind_rows(.id = "platform") %>% dplyr::group_by(date) %>% dplyr::summarize(Median = median(Median)) %>% polloi::smoother("month", rename = FALSE) smoothed_zrr <- polloi::smoother(failure_data_with_automata, "month", rename = FALSE) smoothed_api <- split_dataset %>% - lapply(function(platform_load_data) { - platform_load_data[, c("date", "events")] - }) %>% dplyr::bind_rows(.id = "api") %>% dplyr::group_by(date) %>% - dplyr::summarize(total = sum(events)) %>% + dplyr::summarize(total = sum(calls)) %>% polloi::smoother("month", rename = FALSE) - smoothed_engagement <- augmented_clickthroughs[, c("date", "user_engagement")] %>% + smoothed_engagement <- augmented_clickthroughs %>% + dplyr::select(c(date, `User engagement`)) %>% polloi::smoother("month", rename = FALSE) temp$Current <- c( smoothed_load_times$Median[smoothed_load_times$date == prev_month], smoothed_zrr$rate[smoothed_zrr$date == prev_month], smoothed_api$total[smoothed_api$date == prev_month], - smoothed_engagement$user_engagement[smoothed_engagement$date == prev_month] + smoothed_engagement$`User engagement`[smoothed_engagement$date == prev_month] ) temp$Previous_month <- c( smoothed_load_times$Median[smoothed_load_times$date == prev_prev_month], smoothed_zrr$rate[smoothed_zrr$date == prev_prev_month], smoothed_api$total[smoothed_api$date == prev_prev_month], - smoothed_engagement$user_engagement[smoothed_engagement$date == prev_prev_month] + smoothed_engagement$`User engagement`[smoothed_engagement$date == prev_prev_month] ) temp$Previous_year <- c( ifelse(sum(smoothed_load_times$date == prev_year) == 0, NA, smoothed_load_times$Median[smoothed_load_times$date == prev_year]), ifelse(sum(smoothed_zrr$date == prev_year) == 0, NA, smoothed_zrr$rate[smoothed_zrr$date == prev_year]), ifelse(sum(smoothed_api$date == prev_year) == 0, NA, smoothed_api$total[smoothed_api$date == prev_year]), - ifelse(sum(smoothed_engagement$date == prev_year) == 0, NA, smoothed_engagement$user_engagement[smoothed_engagement$date == prev_year]) + ifelse(sum(smoothed_engagement$date == prev_year) == 0, NA, smoothed_engagement$`User engagement`[smoothed_engagement$date == prev_year]) ) temp$Anchors <- c("kpi_load_time", "kpi_zero_results", "kpi_api_usage", "kpi_augmented_clickthroughs") @@ -945,7 +934,7 @@ paste(smoothed_api %>% dplyr::arrange(date) %>% dplyr::mutate(month = zoo::as.yearmon(date)) %>% dplyr::select(-date) %>% dplyr::distinct() %>% {.$total}, collapse = ","), paste(smoothed_engagement %>% dplyr::arrange(date) %>% dplyr::mutate(month = zoo::as.yearmon(date)) %>% - dplyr::select(-date) %>% dplyr::distinct() %>% {.$user_engagement}, collapse = ",") + dplyr::select(-date) %>% dplyr::distinct() %>% {.$`User engagement`}, collapse = ",") ) cols_to_keep <- c(1, 5, 4, 3, 7, 8, 9) if (!input$monthly_metrics_prev_month) { @@ -954,21 +943,28 @@ if (!input$monthly_metrics_prev_year) { cols_to_keep <- base::setdiff(cols_to_keep, 5) } - column_def <- list(list(targets = length(cols_to_keep)-1, render = JS("function(data, type, full){ return '<span class=sparkSeries>' + data + '</span>' }"))) + column_def <- list(list( + targets = length(cols_to_keep) - 1, + render = DT::JS("function(data, type, full){ return '<span class=sparkSeries>' + data + '</span>' }") + )) line_string <- "type: 'line', lineColor: 'black', fillColor: '#ccc', highlightLineColor: 'orange', highlightSpotColor: 'orange'" - callback_fnc <- JS(paste0("function (oSettings, json) { + callback_fnc <- DT::JS(paste0("function (oSettings, json) { $('.sparkSeries:not(:has(canvas))').sparkline('html', { ", line_string, " }); $('a[id^=mm_kpi_]').click(function(){ var target = $(this).attr('id').replace('mm_', ''); $('a[data-value=\"'+target+'\"]').click();}); $('a[id^=mm_kpi_]').hover(function() {$(this).css('cursor','pointer');});\n}"), collapse = "") - mm_dt <- datatable(data.table(temp[, cols_to_keep]), rownames = FALSE, - options = list(searching = F, paging = F, info = F, ordering = F, - columnDefs = column_def, fnDrawCallback = callback_fnc), escape=F) + mm_dt <- DT::datatable( + temp[, cols_to_keep], rownames = FALSE, + options = list( + searching = FALSE, paging = FALSE, info = FALSE, ordering = FALSE, + columnDefs = column_def, fnDrawCallback = callback_fnc + ), + escape = FALSE + ) mm_dt$dependencies <- append(mm_dt$dependencies, htmlwidgets:::getDependency("sparkline")) - mm_dt - } - ) + return(mm_dt) + }) # Check datasets for missing data and notify user which datasets are missing data (if any) output$message_menu <- renderMenu({ diff --git a/utils.R b/utils.R index adb0711..98fdb4e 100644 --- a/utils.R +++ b/utils.R @@ -1,155 +1,159 @@ -#Dependent libs -library(reshape2) -library(ggplot2) -library(toOrdinal) library(magrittr) -library(polloi) -library(xts) -library(tidyr) ## Read in desktop data and generate means for the value boxes, along with a time-series appropriate form for ## dygraphs. read_desktop <- function() { - data <- polloi::read_dataset("search/desktop_event_counts.tsv", col_types = "Dci") - names(data)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - interim <- reshape2::dcast(data, formula = date ~ action, fun.aggregate = sum) - interim[is.na(interim)] <- 0 - desktop_dygraph_set <<- interim - desktop_dygraph_means <<- round(colMeans(desktop_dygraph_set[,2:5])) - interim <- polloi::read_dataset("search/desktop_load_times.tsv", col_types = "Dddd") - names(interim)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - desktop_load_data <<- interim + desktop_dygraph_set <<- polloi::read_dataset("discovery/search/desktop_event_counts.tsv", col_types = "Dci") %>% + dplyr::filter(!is.na(action), !is.na(events)) %>% + tidyr::spread(action, events, fill = 0) + desktop_dygraph_means <<- round(colMeans(desktop_dygraph_set[, 2:5])) + desktop_load_data <<- polloi::read_dataset("discovery/search/desktop_load_times.tsv", col_types = "Dddd") %>% + dplyr::filter(!is.na(Median)) } read_web <- function() { - data <- polloi::read_dataset("search/mobile_event_counts.tsv", col_types = "Dci") - names(data)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - interim <- reshape2::dcast(data, formula = date ~ action, fun.aggregate = sum) - interim[is.na(interim)] <- 0 - mobile_dygraph_set <<- interim - mobile_dygraph_means <<- round(colMeans(mobile_dygraph_set[,2:4])) - interim <- polloi::read_dataset("search/mobile_load_times.tsv", col_types = "Dddd") - names(interim)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - mobile_load_data <<- interim + mobile_dygraph_set <<- polloi::read_dataset("discovery/search/mobile_event_counts.tsv", col_types = "Dci") %>% + dplyr::filter(!is.na(action), !is.na(events)) %>% + tidyr::spread(action, events, fill = 0) + mobile_dygraph_means <<- round(colMeans(mobile_dygraph_set[, 2:4])) + mobile_load_data <<- polloi::read_dataset("discovery/search/mobile_load_times.tsv", col_types = "Dddd") %>% + dplyr::filter(!is.na(Median)) } read_apps <- function() { + data <- polloi::read_dataset("discovery/search/app_event_counts.tsv", col_types = "Dcci") %>% + dplyr::filter(!is.na(action), !is.na(events)) %>% + dplyr::distinct(date, platform, action, .keep_all = TRUE) + ios <- data %>% + dplyr::filter(platform == "iOS") %>% + dplyr::select(-platform) %>% + tidyr::spread(action, events, fill = 0) + android <- data %>% + dplyr::filter(platform == "Android") %>% + dplyr::select(-platform) %>% + tidyr::spread(action, events, fill = 0) - data <- polloi::read_dataset("search/app_event_counts.tsv", col_types = "Dcci") - names(data)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - ios <- reshape2::dcast(data[data$platform == "iOS",], formula = date ~ action, fun.aggregate = sum) - android <- reshape2::dcast(data[data$platform == "Android",], formula = date ~ action, fun.aggregate = sum) ios_dygraph_set <<- ios - ios_dygraph_means <<- round(colMeans(ios[,2:4])) + ios_dygraph_means <<- round(colMeans(ios[, 2:4])) android_dygraph_set <<- android - android_dygraph_means <<- round(colMeans(android[,2:4])) + android_dygraph_means <<- round(colMeans(android[, 2:4])) - app_load_data <- polloi::read_dataset("search/app_load_times.tsv", col_types = "Dcddd") - names(app_load_data)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ + app_load_data <- polloi::read_dataset("discovery/search/app_load_times.tsv", col_types = "Dcddd") %>% + dplyr::filter(!is.na(Median)) %>% + dplyr::distinct(date, platform, .keep_all = TRUE) ios_load_data <<- app_load_data[app_load_data$platform == "iOS", names(app_load_data) != "platform"] android_load_data <<- app_load_data[app_load_data$platform == "Android", names(app_load_data) != "platform"] - position_interim <- polloi::read_dataset("search/click_position_counts.tsv", col_types = "Dci") %>% + position_interim <- polloi::read_dataset("discovery/search/click_position_counts.tsv", col_types = "Dci") %>% + dplyr::filter(!is.na(click_position), !is.na(events)) %>% + dplyr::distinct(date, click_position, .keep_all = TRUE) %>% dplyr::group_by(date) %>% dplyr::mutate(prop = round(events/sum(events)*100, 2)) %>% dplyr::ungroup() %>% dplyr::select(-events) %>% - reshape2::dcast(formula = date ~ click_position, fun.aggregate = sum) - position_interim <- position_interim[,c("date", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10-19", "20-100", "100+")] + tidyr::spread(click_position, prop, fill = 0) + position_interim <- position_interim[, c("date", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10-19", "20-100", "100+")] names(position_interim) <- c("date", "1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th-19th", "20th-100th", "101st+") position_prop <<- position_interim - source_prop <<- polloi::read_dataset("search/invoke_source_counts.tsv", col_types = "Dci") %>% + source_prop <<- polloi::read_dataset("discovery/search/invoke_source_counts.tsv", col_types = "Dci") %>% + dplyr::filter(!is.na(invoke_source), !is.na(events)) %>% + dplyr::distinct(date, invoke_source, .keep_all = TRUE) %>% dplyr::group_by(date) %>% dplyr::mutate(prop = round(events/sum(events)*100, 2)) %>% dplyr::ungroup() %>% dplyr::select(-events) %>% - reshape2::dcast(formula = date ~ invoke_source, fun.aggregate = sum) + tidyr::spread(invoke_source, prop, fill = 0) } read_api <- function(){ - data <- polloi::read_dataset("search/search_api_aggregates.tsv", col_types = "cci") - names(data)[1] <- 'date' # Will be unnecessary after https://gerrit.wikimedia.org/r/#/c/250856/ - data$date <- as.Date(data$date) - data <- data[order(data$event_type), ] - split_dataset <<- split(data, f = data$event_type) + split_dataset <<- polloi::read_dataset("discovery/search/search_api_usage.tsv", col_types = "Dci") %>% + dplyr::filter(!is.na(api), !is.na(calls)) %>% + dplyr::distinct(date, api, .keep_all = TRUE) %>% + dplyr::arrange(api, date) %>% + { split(., f = .$api) } %>% + lapply(dplyr::select_, .dots = list(quote(-api))) } read_failures <- function(date) { - - interim <- polloi::read_dataset("search/cirrus_query_aggregates_with_automata.tsv", col_types = "Dd") - interim$rate <- interim$rate*100 - failure_data_with_automata <<- interim - - interim <- polloi::read_dataset("search/cirrus_query_aggregates_no_automata.tsv", col_types = "Dd") - interim$rate <- interim$rate*100 - failure_data_no_automata <<- interim - + ## Zero results rate + ### With automata + failure_data_with_automata <<- polloi::read_dataset("discovery/search/cirrus_query_aggregates_with_automata.tsv", col_types = "Dd") %>% + dplyr::filter(!is.na(rate)) %>% + dplyr::mutate(rate = 100 * rate) + ### Without automata + failure_data_no_automata <<- polloi::read_dataset("discovery/search/cirrus_query_aggregates_no_automata.tsv", col_types = "Dd") %>% + dplyr::filter(!is.na(rate)) %>% + dplyr::mutate(rate = 100 * rate) + ## Day-to-day change + ### With automata interim_new <- failure_data_with_automata$rate[2:nrow(failure_data_with_automata)] interim_old <- failure_data_with_automata$rate[1:(nrow(failure_data_with_automata)-1)] - interim <- 100 * (interim_new - interim_old)/interim_old - - failure_roc_with_automata <<- data.frame(date = failure_data_with_automata$date[2:nrow(failure_data_with_automata)], - daily_change = interim, - stringsAsFactors = FALSE) - + failure_roc_with_automata <<- data.frame( + date = failure_data_with_automata$date[2:nrow(failure_data_with_automata)], + daily_change = 100 * (interim_new - interim_old)/interim_old, + stringsAsFactors = FALSE + ) + ### Without automata interim_new <- failure_data_no_automata$rate[2:nrow(failure_data_no_automata)] interim_old <- failure_data_no_automata$rate[1:(nrow(failure_data_no_automata)-1)] - interim <- 100 * (interim_new - interim_old)/interim_old - - failure_roc_no_automata <<- data.frame(date = failure_data_no_automata$date[2:nrow(failure_data_no_automata)], - daily_change = interim, - stringsAsFactors = FALSE) - - interim_breakdown_with_automata <- polloi::read_dataset("search/cirrus_query_breakdowns_with_automata.tsv", col_types = "Dcd") - interim_breakdown_with_automata$rate <- interim_breakdown_with_automata$rate*100 - interim_breakdown_with_automata$query_type <- as.character(factor(interim_breakdown_with_automata$query_type, - levels = c("Full-Text Search", "Prefix Search", "full_text", "prefix", "comp_suggest", "more_like", "regex", "GeoData_spatial_search"), - labels = c("Full-Text Search", "Prefix Search", "Full-Text", "Prefix", "Completion Suggester", "More Like", "Regex", "Geospatial"))) - failure_breakdown_with_automata <<- reshape2::dcast(interim_breakdown_with_automata, - formula = date ~ query_type, fun.aggregate = sum, - fill = as.double(NA)) - - interim_breakdown_no_automata <- polloi::read_dataset("search/cirrus_query_breakdowns_no_automata.tsv", col_types = "Dcd") - interim_breakdown_no_automata$rate <- interim_breakdown_no_automata$rate*100 - interim_breakdown_no_automata$query_type <- as.character(factor(interim_breakdown_no_automata$query_type, - levels = c("Full-Text Search", "Prefix Search", "full_text", "prefix", "comp_suggest", "more_like", "regex", "GeoData_spatial_search"), - labels = c("Full-Text Search", "Prefix Search", "Full-Text", "Prefix", "Completion Suggester", "More Like", "Regex", "Geospatial"))) - failure_breakdown_no_automata <<- reshape2::dcast(interim_breakdown_no_automata, - formula = date ~ query_type, fun.aggregate = sum, - fill = as.double(NA)) - - # Fix to make the suggestion dataset compatible with ZRR data format switch: - interim_breakdown_with_automata$query_type[interim_breakdown_with_automata$query_type == "Full-Text"] <- "Full-Text Search" - interim_breakdown_no_automata$query_type[interim_breakdown_no_automata$query_type == "Full-Text"] <- "Full-Text Search" - # Correction for 31 January 2016 when "Full Text" appears twice (once as "Full-Text Search" and once as "Full-Text"): - interim_breakdown_with_automata <- interim_breakdown_with_automata[!duplicated(interim_breakdown_with_automata[, c('date', 'query_type')]), ] - interim_breakdown_no_automata <- interim_breakdown_no_automata[!duplicated(interim_breakdown_no_automata[, c('date', 'query_type')]), ] - - interim <- polloi::read_dataset("search/cirrus_suggestion_breakdown_with_automata.tsv", col_types = "Dd") - interim$rate <- interim$rate*100 - interim$query_type <- "Full-Text with Suggestions" - interim <- rbind(interim[,c("date", "query_type", "rate")], - interim_breakdown_with_automata[interim_breakdown_with_automata$date %in% interim$date - & interim_breakdown_with_automata$query_type == "Full-Text Search",]) - suggestion_with_automata <<- reshape2::dcast(interim, formula = date ~ query_type, fun.aggregate = sum, - fill = as.double(NA)) - - interim <- polloi::read_dataset("search/cirrus_suggestion_breakdown_no_automata.tsv", col_types = "Dd") - interim$rate <- interim$rate*100 - interim$query_type <- "Full-Text with Suggestions" - interim <- rbind(interim[,c("date", "query_type", "rate")], - interim_breakdown_no_automata[interim_breakdown_no_automata$date %in% interim$date - & interim_breakdown_no_automata$query_type == "Full-Text Search",]) - suggestion_no_automata <<- reshape2::dcast(interim, formula = date ~ query_type, fun.aggregate = sum, - fill = as.double(NA)) - - interim <- polloi::read_dataset("search/cirrus_langproj_breakdown_with_automata.tsv", na = "~", col_types = "Dccii") - interim$language %<>% sub("NA", "(None)", .) - langproj_with_automata <<- interim - interim <- polloi::read_dataset("search/cirrus_langproj_breakdown_no_automata.tsv", na = "~", col_types = "Dccii") - interim$language %<>% sub("NA", "(None)", .) - langproj_no_automata <<- interim + failure_roc_no_automata <<- data.frame( + date = failure_data_no_automata$date[2:nrow(failure_data_no_automata)], + daily_change = 100 * (interim_new - interim_old)/interim_old, + stringsAsFactors = FALSE + ) + ## ZRR by type + ### With automata + failure_breakdown_with_automata <<- polloi::read_dataset("discovery/search/cirrus_query_breakdowns_with_automata.tsv", col_types = "Dcd") %>% + dplyr::filter(!is.na(query_type), !is.na(rate)) %>% + dplyr::mutate( + rate = 100 * rate, + query_type = as.character(factor( + query_type, + levels = c("Full-Text Search", "Prefix Search", "full_text", "prefix", "comp_suggest", "more_like", "regex", "GeoData_spatial_search"), + labels = c("Full-Text Search", "Prefix Search", "Full-Text", "Prefix", "Completion Suggester", "More Like", "Regex", "Geospatial") + )), + query_type = dplyr::if_else(query_type == "Full-Text", "Full-Text Search", query_type) + ) %>% + dplyr::distinct(date, query_type, .keep_all = TRUE) %>% + tidyr::spread(query_type, rate, fill = as.double(NA)) + ### Without automata + failure_breakdown_no_automata <<- polloi::read_dataset("discovery/search/cirrus_query_breakdowns_no_automata.tsv", col_types = "Dcd") %>% + dplyr::filter(!is.na(query_type), !is.na(rate)) %>% + dplyr::mutate( + rate = 100 * rate, + query_type = as.character(factor( + query_type, + levels = c("Full-Text Search", "Prefix Search", "full_text", "prefix", "comp_suggest", "more_like", "regex", "GeoData_spatial_search"), + labels = c("Full-Text Search", "Prefix Search", "Full-Text", "Prefix", "Completion Suggester", "More Like", "Regex", "Geospatial") + )), + query_type = dplyr::if_else(query_type == "Full-Text", "Full-Text Search", query_type) + ) %>% + dplyr::distinct(date, query_type, .keep_all = TRUE) %>% + tidyr::spread(query_type, rate, fill = as.double(NA)) + ## ZRR with suggestions + ### With automata + suggestion_with_automata <<- polloi::read_dataset("discovery/search/cirrus_suggestion_breakdown_with_automata.tsv", col_types = "Dd") %>% + dplyr::filter(!is.na(rate)) %>% + dplyr::transmute(date = date, `Full-Text with Suggestions` = 100 * rate) %>% + dplyr::full_join(dplyr::select(failure_breakdown_with_automata, c(date, `Full-Text Search`)), by = "date") %>% + dplyr::arrange(date) + ### Without automata + suggestion_no_automata <<- polloi::read_dataset("discovery/search/cirrus_suggestion_breakdown_no_automata.tsv", col_types = "Dd") %>% + dplyr::filter(!is.na(rate)) %>% + dplyr::transmute(date = date, `Full-Text with Suggestions` = 100 * rate) %>% + dplyr::full_join(dplyr::select(failure_breakdown_no_automata, c(date, `Full-Text Search`)), by = "date") %>% + dplyr::arrange(date) + ## Broken down by language-project pair + ### With automata + langproj_with_automata <<- polloi::read_dataset("discovery/search/cirrus_langproj_breakdown_with_automata.tsv", na = "~", col_types = "Dccii") %>% + dplyr::filter(!is.na(zero_results), !is.na(total)) %>% + dplyr::mutate(language = sub("NA", "(None)", language)) + ### Without automata + langproj_no_automata <<- polloi::read_dataset("discovery/search/cirrus_langproj_breakdown_no_automata.tsv", na = "~", col_types = "Dccii") %>% + dplyr::filter(!is.na(zero_results), !is.na(total)) %>% + dplyr::mutate(language = sub("NA", "(None)", language)) + ### Summaries for sorting available_languages <<- langproj_with_automata %>% dplyr::group_by(language) %>% dplyr::summarize(volume = sum(as.numeric(total))) %>% @@ -168,27 +172,39 @@ } read_augmented_clickthrough <- function() { - data <- polloi::read_dataset("search/search_threshold_pass_rate.tsv", col_types = "Dd") - temp <- polloi::safe_tail(desktop_dygraph_set, nrow(data))[, c('clickthroughs', 'Result pages opened')] + - polloi::safe_tail(mobile_dygraph_set, nrow(data))[, c('clickthroughs', 'Result pages opened')] + - polloi::safe_tail(ios_dygraph_set, nrow(data))[, c('clickthroughs', 'Result pages opened')] + - polloi::safe_tail(android_dygraph_set, nrow(data))[, c('clickthroughs', 'Result pages opened')] - intermediary_dataset <- cbind(data, clickthrough_rate = 100 * temp$clickthroughs/temp$'Result pages opened') - colnames(intermediary_dataset) <- c("date", "threshold_passing_rate", "clickthrough_rate") - intermediary_dataset$threshold_passing_rate <- 100 * intermediary_dataset$threshold_passing_rate - augmented_clickthroughs <<- transform(intermediary_dataset, user_engagement = (threshold_passing_rate + clickthrough_rate)/2) + threshold_data <- polloi::read_dataset("discovery/search/search_threshold_pass_rate.tsv", col_types = "Dd") %>% + dplyr::filter(!is.na(threshold_pass)) %>% + dplyr::mutate(threshold_pass = 100 * threshold_pass) + augmented_clickthroughs <<- list( + desktop = dplyr::select(desktop_dygraph_set, c(date, clickthroughs, `Result pages opened`)), + mobile = dplyr::select(mobile_dygraph_set, c(date, clickthroughs, `Result pages opened`)), + ios = dplyr::select(ios_dygraph_set, c(date, clickthroughs, `Result pages opened`)), + android = dplyr::select(android_dygraph_set, c(date, clickthroughs, `Result pages opened`)) + ) %>% + dplyr::bind_rows(.id = "platform") %>% + dplyr::group_by(date) %>% + dplyr::summarize(clickthroughs = sum(clickthroughs), serps = sum(`Result pages opened`)) %>% + dplyr::right_join(threshold_data, by = "date") %>% + dplyr::transmute( + date = date, + `Threshold-passing %` = threshold_pass, + `Clickthrough rate` = 100 * clickthroughs/serps, + `User engagement` = (threshold_pass + `Clickthrough rate`)/2 + ) } read_lethal_dose <- function() { - intermediary_dataset <- polloi::read_dataset("search/sample_page_visit_ld.tsv", col_types = "Diiiiiii") - colnames(intermediary_dataset) <- c("date", "10%", "25%", "50%", "75%", "90%", "95%", "99%") - user_page_visit_dataset <<- intermediary_dataset + user_page_visit_dataset <<- polloi::read_dataset("discovery/search/sample_page_visit_ld.tsv", col_types = "Dddddddd") %>% + dplyr::filter(!is.na(LD10)) %>% + set_colnames(c("date", "10%", "25%", "50%", "75%", "90%", "95%", "99%")) } read_paul_score <- function() { - data <- polloi::read_dataset("search/paulscore_approximations.tsv", col_types = "Dcddddddddd")[, c("date", "event_source", "pow_1", "pow_5", "pow_9")] - paulscore_autocomplete <<- data[data$event_source == "autocomplete", -2] %>% set_names(c("date", "F = 0.1", "F = 0.5", "F = 0.9")) - paulscore_fulltext <<- data[data$event_source == "fulltext", -2] %>% set_names(c("date", "F = 0.1", "F = 0.5", "F = 0.9")) + paulscore <- polloi::read_dataset("discovery/search/paulscore_approximations.tsv", col_types = "Dcddddddddd") %>% + dplyr::filter(!is.na(event_source)) %>% + dplyr::select(c(date, event_source, `F = 0.1` = pow_1, `F = 0.5` = pow_5, `F = 0.9` = pow_9)) + paulscore_autocomplete <<- dplyr::filter(paulscore, event_source == "autocomplete") %>% dplyr::select(-event_source) + paulscore_fulltext <<- dplyr::filter(paulscore, event_source == "fulltext") %>% dplyr::select(-event_source) } aggregate_wikis <- function(data, languages, projects) { -- To view, visit https://gerrit.wikimedia.org/r/335746 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id384962d485931ebcb904e491ae0bf641d38c9bf Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/rainbow Gerrit-Branch: master Gerrit-Owner: Bearloga <mpo...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits