Bearloga has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/379939 )
Change subject: Sister search prevalence by language ...................................................................... Sister search prevalence by language Adds the percentage of searches where the sister project search results were shown to the user. Change-Id: I4c59f2e693570b92d63d66826ca23400fc90be61 --- M CHANGELOG.md A modules/sister_search/prevalence.R M server.R A tab_documentation/sister_search_prevalence.md M ui.R M utils.R 6 files changed, 111 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow refs/changes/39/379939/1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 099e8a1..7ecd033 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## 2017/09/25 +- Added sister project search result prevalence + ## 2017/08/30 - Added SRP visit times ([T170468](https://phabricator.wikimedia.org/T170468)) - Added [dygraph-based rolling periods](https://rstudio.github.io/dygraphs/gallery-roll-periods.html) to page visit times modules diff --git a/modules/sister_search/prevalence.R b/modules/sister_search/prevalence.R new file mode 100644 index 0000000..0bedfa8 --- /dev/null +++ b/modules/sister_search/prevalence.R @@ -0,0 +1,37 @@ +output$sister_search_prevalence_lang_container <- renderUI({ + languages_to_display <- sister_search_averages$language + names(languages_to_display) <- sprintf("%s (%.1f%%)", sister_search_averages$language, sister_search_averages$avg) + if (input$sister_search_prevalence_lang_order != "alphabet") { + languages_to_display <- languages_to_display[order( + sister_search_averages$avg, + decreasing = input$sister_search_prevalence_lang_order == "high2low" + )] + } + if (!is.null(input$language_selector)) { + selected_language <- input$language_selector + } else { + selected_language <- languages_to_display[1] + } + return(selectInput( + "sister_search_prevalence_lang_selector", "Language", + multiple = TRUE, selectize = FALSE, size = 19, + choices = languages_to_display, selected = selected_language + )) +}) + +output$sister_search_prevalence_plot <- renderDygraph({ + req(input$sister_search_prevalence_lang_selector) + sister_search_prevalence %>% + dplyr::filter(language %in% input$sister_search_prevalence_lang_selector) %>% + tidyr::spread(language, prevalence, fill = 0) %>% + polloi::reorder_columns() %>% + polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_sister_search_prevalence_plot)) %>% + polloi::make_dygraph("Date", "Prevalence (%)", "Wikipedia searches that showed sister project search results") %>% + dyLegend(show = "always", width = 400, labelsDiv = "sister_search_prevalence_plot_legend") %>% + dyAxis("y", + axisLabelFormatter = "function(x) { return x + '%'; }", + valueFormatter = "function(x) { return Math.round(x * 100)/100 + '%'; }" + ) %>% + dyAxis("x", axisLabelFormatter = polloi::custom_axis_formatter) %>% + dyRangeSelector(fillColor = "", strokeColor = "") +}) diff --git a/server.R b/server.R index b91bcf9..21d45a2 100644 --- a/server.R +++ b/server.R @@ -66,6 +66,7 @@ source("modules/zero_results.R", local = TRUE) # Sister Search source("modules/sister_search/traffic.R", local = TRUE) + source("modules/sister_search/prevalence.R", local = TRUE) # Survival source("modules/page_visit_times.R", local = TRUE) # Language/Project Breakdown diff --git a/tab_documentation/sister_search_prevalence.md b/tab_documentation/sister_search_prevalence.md new file mode 100644 index 0000000..84b58fc --- /dev/null +++ b/tab_documentation/sister_search_prevalence.md @@ -0,0 +1,26 @@ +Sister project search results prevalence +======= +Sister project (cross-wiki) snippets is a feature that adds search results from sister projects of Wikipedia to a sidebar on the search engine results page (SERP). If a query results in matches from the sister projects, users will be shown snippets from Wiktionary, Wikisource, Wikiquote and/or other projects. See [T162276](https://phabricator.wikimedia.org/T162276) for more details. + +General trends +----- +* English Wikipedia has the highest prevalence with 75% of searches showing sister project results on average, followed by Chinese (73%) and French (70%) Wikipedias. +* 38% of languages show the sister project results in at least 50% of the searches made. + +Notes, outages, and inaccuracies +----- +* English Wikipedia has a different display than all the other languages due to community feedback. Specifically, it does not show results from Commons/multimedia, Wikinews, and Wikiversity. Refer to [T162276#3278689](https://phabricator.wikimedia.org/T162276#3278689) for more details. +* Languages without a lot of traffic also yield less (sampled) event logging data. In order to show somewhat-reliable numbers, languages with less than 20 recorded searches per day were excluded. Data on them is still available, though. + +Questions, bug reports, and feature suggestions +------ +For technical, non-bug questions, [email Mikhail](mailto:[email protected]?subject=Dashboard%20Question) or [Chelsy](mailto:[email protected]?subject=Dashboard%20Question). If you experience a bug or notice something wrong or have a suggestion, [open a ticket in Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery) in the Discovery board or [email Deb](mailto:[email protected]?subject=Dashboard%20Question). + +<hr style="border-color: gray;"> +<p style="font-size: small;"> + <strong>Link to this dashboard:</strong> <a href="https://discovery.wmflabs.org/metrics/#sister_search_prevalence">https://discovery.wmflabs.org/metrics/#sister_search_prevalence</a> + | Page is available under <a href="https://creativecommons.org/licenses/by-sa/3.0/" title="Creative Commons Attribution-ShareAlike License">CC-BY-SA 3.0</a> + | <a href="https://phabricator.wikimedia.org/diffusion/WDRN/" title="Search Metrics Dashboard source code repository">Code</a> is licensed under <a href="https://phabricator.wikimedia.org/diffusion/WDRN/browse/master/LICENSE.md" title="MIT License">MIT</a> + | Part of <a href="https://discovery.wmflabs.org/">Discovery Dashboards</a> + | Data available at <a href="https://analytics.wikimedia.org/datasets/discovery/" title="Specifically: metrics/search/sister_search_traffic.tsv">Wikimedia Analytics</a> +</p> diff --git a/ui.R b/ui.R index 441261b..1187242 100644 --- a/ui.R +++ b/ui.R @@ -67,7 +67,8 @@ menuSubItem(text = "Search Type Breakdown", tabName = "failure_breakdown"), menuSubItem(text = "Search Suggestions", tabName = "failure_suggestions")), menuItem(text = "Sister Search", - menuSubItem(text = "Traffic", tabName = "sister_search_traffic")), + menuSubItem(text = "Traffic", tabName = "sister_search_traffic"), + menuSubItem(text = "Prevalence", tabName = "sister_search_prevalence")), menuItem(text = "Page Visit Times", menuSubItem(text = "Search result pages", tabName = "spr_surv"), menuSubItem(text = "Visited search results", tabName = "survival")), @@ -353,6 +354,33 @@ dygraphOutput("sister_search_traffic_plot"), div(id = "sister_search_traffic_plot_legend"), includeMarkdown("./tab_documentation/sister_search_traffic.md")), + tabItem(tabName = "sister_search_prevalence", + fluidRow( + column(selectInput( + "sister_search_prevalence_lang_order", + "Sort languages by", + list( + "Alphabetical order" = "alphabet", + "Prevalence (high to low)" = "high2low", + "Prevalence (low to high)" = "low2high" + ), + selected = "high2low" + ), width = 3), + column(polloi::smooth_select("smoothing_sister_search_prevalence_plot"), width = 9) + ), + fluidRow( + column( + uiOutput("sister_search_prevalence_lang_container"), + helpText("The % beside each language name is the average prevalence."), + width = 3 + ), + column( + dygraphOutput("sister_search_prevalence_plot"), + div(id = "sister_search_prevalence_plot_legend"), + width = 9 + ) + ), + includeMarkdown("./tab_documentation/sister_search_prevalence.md")), tabItem(tabName = "survival", fluidRow( column( diff --git a/utils.R b/utils.R index 91f2376..3ac5c5a 100644 --- a/utils.R +++ b/utils.R @@ -339,6 +339,21 @@ project = polloi::capitalize_first_letter(project), access_method = polloi::capitalize_first_letter(access_method) ) + sister_search_prevalence <<- polloi::read_dataset("discovery/metrics/search/sister_search_prevalence.tsv", col_types = "Dcii") %>% + dplyr::group_by(wiki_id) %>% + dplyr::mutate(include = all((has_sister_results + no_sister_results) > 20)) %>% + dplyr::ungroup() %>% + dplyr::filter(include) %>% + dplyr::mutate(prevalence = round(100 * has_sister_results / (has_sister_results + no_sister_results), 2)) %>% + dplyr::left_join( + polloi::get_langproj()[, c("wikiid", "language")], + by = c("wiki_id" = "wikiid") + ) %>% + dplyr::select(c(date, language, prevalence)) + sister_search_averages <<- sister_search_prevalence %>% + dplyr::group_by(language) %>% + dplyr::summarize(avg = mean(prevalence, na.rm = TRUE)) %>% + dplyr::arrange(language) } aggregate_wikis <- function(data, languages, projects, input_metric) { -- To view, visit https://gerrit.wikimedia.org/r/379939 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I4c59f2e693570b92d63d66826ca23400fc90be61 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/rainbow Gerrit-Branch: develop Gerrit-Owner: Bearloga <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
