Bearloga has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/391288 )

Change subject: Fix prevalence bugs
......................................................................

Fix prevalence bugs

(Hopefully) fixes:
- the shadow and the text are a bit off
- the list of projects/wikis that have maps enabled
- the project selector is weird—selecting wikipedia
  and commons also shows results from mediawiki and
  meta and wikivoyage
- the summary prevalence chart needs to be updated
  to take into account the small increases in
  mapframe/maplink prevalence (tab removed)
- can the lang/project selectors only show the
  projects/languages that have mapframe—when the
  user selects to only show mapframe?

Bug: T170022
Change-Id: Ia44972c0691f026f7ad73c1a78341eaef7743aa6
---
M CHANGELOG.md
D modules/kartographer/overall_prevalence.R
R modules/kartographer_prevalence.R
M server.R
D tab_documentation/overall_prevalence.md
M tab_documentation/prevalence_langproj.md
M ui.R
M utils.R
8 files changed, 61 insertions(+), 159 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/wetzel 
refs/changes/88/391288/1

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f3e77ee..f6265a2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,10 @@
 # Change Log (Patch Notes)
 All notable changes to this project will be documented in this file.
 
+## 2017/11/13
+- Fixed some bugs 
([T170022#3678043](https://phabricator.wikimedia.org/T170022#3678043))
+- Removed "overall mapframe/maplink prevalence" tab
+
 ## 2017/09/18
 - Modularized the dashboard source code
 - Added maplink & mapframe prevalence graphs 
([T170022](https://phabricator.wikimedia.org/T170022))
diff --git a/modules/kartographer/overall_prevalence.R 
b/modules/kartographer/overall_prevalence.R
deleted file mode 100644
index cd28296..0000000
--- a/modules/kartographer/overall_prevalence.R
+++ /dev/null
@@ -1,7 +0,0 @@
-output$overall_prevalence_series <- renderDygraph({
-  prevalence %>%
-    polloi::reorder_columns() %>%
-    polloi::make_dygraph("Date", "Prevalence (%)", "Maplink and Mapframe 
prevalence on Wikimedia projects") %>%
-    dyLegend(labelsDiv = "overall_prevalence_series_legend", show = "always") 
%>%
-    dyRangeSelector(retainDateWindow = TRUE, fillColor = "", strokeColor = "")
-})
diff --git a/modules/kartographer/language-project_breakdown.R 
b/modules/kartographer_prevalence.R
similarity index 89%
rename from modules/kartographer/language-project_breakdown.R
rename to modules/kartographer_prevalence.R
index f10e982..38c4139 100644
--- a/modules/kartographer/language-project_breakdown.R
+++ b/modules/kartographer_prevalence.R
@@ -1,18 +1,35 @@
-output$language_selector_container <- renderUI({
-  req(input$project_selector)
-  temp_language <- available_languages
-  if ("Maplink" %in% input$prevalence_langproj_feature) {
-    temp_language <- temp_language[temp_language$maplink, ]
-  }
-  if ("Mapframe" %in% input$prevalence_langproj_feature) {
-    temp_language <- temp_language[temp_language$mapframe, ]
+output$project_selector_container <- renderUI({
+  temp_project <- available_projects
+
+  if (input$project_order == "alphabet") {
+    projects_to_display <- sort(temp_project$project)
+  } else {
+    projects_to_display <- 
temp_project$project[order(temp_project$total_articles, decreasing = TRUE)]
   }
 
+  return(selectInput(
+    "project_selector", "Project",
+    multiple = TRUE, selectize = FALSE, size = 19,
+    choices = projects_to_display, selected = projects_to_display[1]
+  ))
+})
+
+output$language_selector_container <- renderUI({
+  req(input$project_selector)
+  temp_language <- available_combos[available_combos$project %in% 
input$project_selector, ]
+  if ("Maplink" %in% input$prevalence_langproj_feature) {
+    temp_language <- dplyr::filter(temp_language, !is.na(maplink_articles), 
maplink_articles > 0)
+  }
+  if ("Mapframe" %in% input$prevalence_langproj_feature) {
+    temp_language <- dplyr::filter(temp_language, !is.na(mapframe_articles), 
mapframe_articles > 0)
+  }
+  req(temp_language$language)
   if (input$language_order == "alphabet") {
     languages_to_display <- sort(temp_language$language)
   } else {
-    languages_to_display <- 
temp_language$language[order(temp_language$articles, decreasing = TRUE)]
+    languages_to_display <- 
temp_language$language[order(pmax(temp_language$total_articles.x, 
temp_language$total_articles.y, na.rm = TRUE), decreasing = TRUE)]
   }
+  languages_to_display <- unique(languages_to_display)
 
   # e.g. if user sorts projects alphabetically and the selected project is 
"10th Anniversary of Wikipeda"
   #      then automatically select the language "(None)" to avoid giving user 
an error. This also works if
@@ -23,7 +40,7 @@
       if (!is.null(input$language_selector)) {
         selected_language <- union("(None)", input$language_selector)
       } else {
-        selected_language <- c("(None)", languages_to_display[[1]])
+        selected_language <- union("(None)", languages_to_display[[1]])
       }
     } else {
       selected_language <- "(None)"
@@ -43,34 +60,6 @@
   ))
 })
 
-output$project_selector_container <- renderUI({
-  temp_project <- available_projects
-  if ("Maplink" %in% input$prevalence_langproj_feature) {
-    temp_project <- temp_project[temp_project$maplink, ]
-  }
-  if ("Mapframe" %in% input$prevalence_langproj_feature) {
-    temp_project <- temp_project[temp_project$mapframe, ]
-  }
-
-  if (input$project_order == "alphabet") {
-    projects_to_display <- sort(temp_project$project)
-  } else {
-    projects_to_display <- temp_project$project[order(temp_project$articles, 
decreasing = TRUE)]
-  }
-
-  if (!is.null(input$project_selector)) {
-    selected_project <- input$project_selector
-  } else {
-    selected_project <- projects_to_display[1]
-  }
-
-  return(selectInput(
-    "project_selector", "Project",
-    multiple = TRUE, selectize = FALSE, size = 19,
-    choices = projects_to_display, selected = selected_project
-  ))
-})
-
 output$prevalence_langproj_plot <- renderDygraph({
   req(input$prevalence_langproj_feature, input$language_selector, 
input$project_selector)
 
@@ -84,6 +73,7 @@
     }
     if (length(input$language_selector) == 1 && input$language_selector[1] == 
"(None)") {
       feature_prevalence <- feature_prevalence %>%
+        dplyr::filter(project %in% input$project_selector) %>%
       {
         if (input$prevalence_langproj_aggregation %in% c("Average", "Median")) 
{
           if (input$prevalence_langproj_aggregation == "Average") {
diff --git a/server.R b/server.R
index 8a134aa..065c1c4 100644
--- a/server.R
+++ b/server.R
@@ -26,8 +26,7 @@
   # Kartotherian usage (tile requests):
   source("modules/kartotherian.R", local = TRUE)
   # Kartographer usage (maplink & mapframe):
-  source("modules/kartographer/overall_prevalence.R", local = TRUE)
-  source("modules/kartographer/language-project_breakdown.R", local = TRUE)
+  source("modules/kartographer_prevalence.R", local = TRUE)
   # Feature usage and geo-breakdown:
   source("modules/feature_usage.R", local = TRUE)
   source("modules/geographic_breakdown.R", local = TRUE)
diff --git a/tab_documentation/overall_prevalence.md 
b/tab_documentation/overall_prevalence.md
deleted file mode 100644
index a51db9e..0000000
--- a/tab_documentation/overall_prevalence.md
+++ /dev/null
@@ -1,39 +0,0 @@
-Overall maplink & mapframe usage
-=======
-
-[Kartographer](https://www.mediawiki.org/wiki/Extension:Kartographer) is a 
MediaWiki extension that allows editors to easily add [Wikimedia 
Maps](https://www.mediawiki.org/wiki/Maps) to articles. Editors (and bots) can 
add 
[*maplinks*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmaplink.3E)
 and 
[*mapframes*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmapframe.3E_usage)
 (where possible; refer to the list below) to pages such as places on 
Wikivoyage, landmarks on Wikipedia, and files on Wikimedia Commons.
-
-A **_maplink_** is a textual link (e.g. often coordinates) that a user can 
click on to view an interactive, potentially annotated map and is enabled on 
all Wikivoyage and Wikipedia languages. On Commons, camera coordinates -- which 
are automatically extracted from EXIF data for photo uploads -- show up as 
maplinks.
-
-A **_mapframe_** is a static thumbnail of a map that a user can click on to 
view an interactive, possibly annotated map and is enabled on all Wikivoyage 
languages but only some Wikipedias. As of September 18th, 2017, the mapframe 
feature is enabled on the following wikis:
-
-- [Metawiki](https://meta.wikimedia.org/)
-- [MediaWiki](https://www.mediawiki.org/)
-- [Wikimedia Ukraine](https://ua.wikimedia.org/)
-- [Wikivoyage](https://www.wikivoyage.org/) (all languages)
-- Wikipedia:
-    - [Catalan](https://ca.wikipedia.org/)
-    - [Hebrew](https://he.wikipedia.org/)
-    - [Russian](https://ru.wikipedia.org/)
-    - [Macedonian](https://mk.wikipedia.org/)
-    - [French](https://fr.wikipedia.org/)
-    - [Finnish](https://fi.wikipedia.org)
-    - [Norwegian](https://no.wikipedia.org/)
-    - [Swedish](https://sv.wikipedia.org/)
-    - [Portuguese](https://pt.wikipedia.org/)
-    - [Czech](https://cs.wikipedia.org/)
-    - [Basque](https://eu.wikipedia.org/)
-
-The overall prevalence is computed per-project by dividing the total number of 
articles with a maplink/mapframe across all languages of the project by the 
total number of articles across all languages of the project. In the case of 
Wikimedia Commons, MediaWiki, and Meta wiki (which are not multi-language), the 
overall prevalence is just those prevalence in those projects.
-
-Questions, bug reports, and feature suggestions
-------
-For technical, non-bug questions, [email 
Mikhail](mailto:mpo...@wikimedia.org?subject=Dashboard%20Question) or 
[Chelsy](mailto:c...@wikimedia.org?subject=Dashboard%20Question). If you 
experience a bug or notice something wrong or have a suggestion, [open a ticket 
in 
Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery)
 in the Discovery board or [email 
Deb](mailto:d...@wikimedia.org?subject=Dashboard%20Question).
-
-<hr style="border-color: gray;">
-<p style="font-size: small;">
-  <strong>Link to this dashboard:</strong> <a 
href="https://discovery.wmflabs.org/maps/#kartographer_prevalence";>https://discovery.wmflabs.org/maps/#kartographer_prevalence</a>
-  | Page is available under <a 
href="https://creativecommons.org/licenses/by-sa/3.0/"; title="Creative Commons 
Attribution-ShareAlike License">CC-BY-SA 3.0</a>
-  | <a href="https://phabricator.wikimedia.org/diffusion/WDWZ/"; 
title="Wikimedia Maps Dashboard source code repository">Code</a> is licensed 
under <a 
href="https://phabricator.wikimedia.org/diffusion/WDWZ/browse/master/LICENSE.md";
 title="MIT License">MIT</a>
-  | Part of <a href="https://discovery.wmflabs.org/";>Discovery Dashboards</a>
-</p>
diff --git a/tab_documentation/prevalence_langproj.md 
b/tab_documentation/prevalence_langproj.md
index 8294a15..8009f53 100644
--- a/tab_documentation/prevalence_langproj.md
+++ b/tab_documentation/prevalence_langproj.md
@@ -23,6 +23,7 @@
     - [Portuguese](https://pt.wikipedia.org/)
     - [Czech](https://cs.wikipedia.org/)
     - [Basque](https://eu.wikipedia.org/)
+    - [Spanish](https://es.wikipedia.org/)
 
 Notes
 -----
@@ -33,6 +34,7 @@
     * __Overall__: divides the total number of articles with a 
maplink/mapframe by the total number of articles across all languages
     * __Average__: computes the prevalence on a per-language basis and then 
computes the average of those prevalences
     * __Median__: computes the prevalence on a per-language basis and then 
computes the median of those prevalences
+* Aggregation options do not do anything if you have specific language(s) 
selected or if you are only looking at non-multilingual projects such as 
MediaWiki.
 
 Questions, bug reports, and feature suggestions
 ------
diff --git a/ui.R b/ui.R
index ad7c902..dabd2a7 100644
--- a/ui.R
+++ b/ui.R
@@ -19,7 +19,6 @@
                            menuSubItem(text = "Users by style", tabName = 
"tiles_users_by_style"),
                            menuSubItem(text = "Tiles by zoom level", tabName = 
"tiles_total_by_zoom")),
                   menuItem(text = "Kartographer usage",
-                           menuSubItem(text = "Overall prevalence", tabName = 
"kartographer_prevalence"),
                            menuSubItem(text = "Language/project breakdown", 
tabName = "kartographer_langproj")),
                   menuItem(text = "Platform usage", tabName = 
"platform_usage"),
                   menuItem(text = "Feature usage",
@@ -95,10 +94,6 @@
                 polloi::automata_select("tile_zoom_automata_check"),
                 div(id = "tiles_zoom_series_legend", style = "text-align: 
right;"),
                 includeMarkdown("./tab_documentation/tiles_total_by_zoom.md")),
-        tabItem(tabName = "kartographer_prevalence",
-                dygraphOutput("overall_prevalence_series"),
-                div(id = "overall_prevalence_series_legend", style = 
"text-align: right;"),
-                includeMarkdown("./tab_documentation/overall_prevalence.md")),
         tabItem(tabName = "kartographer_langproj",
                 fluidRow(
                   
column(polloi::smooth_select("smoothing_prevalence_langproj"), width = 4),
@@ -119,19 +114,19 @@
                                 list("Alphabetical order" = "alphabet", 
"Volume of articles" = "volume"),
                                 selected = "volume"),
                     uiOutput("project_selector_container"),
-                    width = 2
+                    width = 3
                   ),
                   column(
                     selectInput("language_order", "Sort languages by",
                                 list("Alphabetical order" = "alphabet", 
"Volume of articles" = "volume"),
                                 selected = "volume"),
                     uiOutput("language_selector_container"),
-                    width = 2
+                    width = 3
                   ),
                   column(
                     dygraphOutput("prevalence_langproj_plot"),
                     div(id = "prevalence_langproj_legend", style = 
"text-align: right;"),
-                    width = 8
+                    width = 6
                   )
                 ),
                 includeMarkdown("./tab_documentation/prevalence_langproj.md")),
diff --git a/utils.R b/utils.R
index 55f3d8a..b5da2f6 100644
--- a/utils.R
+++ b/utils.R
@@ -39,8 +39,14 @@
 }
 
 read_prevalence <- function() {
-  projects_db <<- readr::read_csv(system.file("extdata/projects.csv", package 
= "polloi"), col_types = "cclc")[, c("project", "multilingual")]
-  lang_proj <- polloi::get_langproj()
+  projects_db <<- readr::read_csv(system.file("extdata/projects.csv", package 
= "polloi"), col_types = "cclc") %>%
+    dplyr::select(project, multilingual) %>%
+    dplyr::mutate(project = dplyr::if_else(project == "Wikimedia Commons", 
"Commons", project))
+  lang_proj <- polloi::get_langproj() %>%
+    dplyr::mutate(
+      project = dplyr::if_else(project == "Wikimedia Commons", "Commons", 
project),
+      wikiid = gsub("-", "_", wikiid, fixed = TRUE)
+    )
   maplinks <- 
polloi::read_dataset("discovery/metrics/maps/maplink_prevalence.tsv", col_types 
= "Dcii-")
   mapframes <- 
polloi::read_dataset("discovery/metrics/maps/mapframe_prevalence.tsv", 
col_types = "Dcii-")
   maplink_prevalence <<- maplinks %>%
@@ -49,72 +55,24 @@
   mapframe_prevalence <<- mapframes %>%
     dplyr::left_join(lang_proj, by = c("wiki" = "wikiid")) %>%
     dplyr::filter(!is.na(project))
-  available_languages_maplink <- maplink_prevalence %>%
-    dplyr::mutate(language = dplyr::if_else(is.na(language), "(None)", 
language)) %>%
-    dplyr::group_by(language) %>%
-    dplyr::top_n(1, date) %>%
-    dplyr::summarize(articles = sum(total_articles), maplink = TRUE) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(c(language, articles, maplink))
-  available_languages_mapframe <- mapframe_prevalence %>%
-    dplyr::mutate(language = dplyr::if_else(is.na(language), "(None)", 
language)) %>%
-    dplyr::group_by(language) %>%
-    dplyr::top_n(1, date) %>%
-    dplyr::summarize(articles = sum(total_articles), mapframe = TRUE) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(c(language, articles, mapframe))
-  available_languages <<- dplyr::full_join(
-    available_languages_maplink,
-    available_languages_mapframe,
-    by = "language"
-  ) %>%
-    dplyr::mutate(
-      articles = pmax(articles.x, articles.y, na.rm = TRUE),
-      maplink = dplyr::if_else(is.na(maplink), FALSE, maplink),
-      mapframe = dplyr::if_else(is.na(mapframe), FALSE, mapframe)
-    ) %>%
-    dplyr::select(-c(articles.x, articles.y))
-  available_projects_maplink <- maplink_prevalence %>%
-    dplyr::group_by(project) %>%
-    dplyr::top_n(1, date) %>%
-    dplyr::summarize(articles = sum(total_articles), maplink = TRUE) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(c(project, articles, maplink))
-  available_projects_mapframe <- mapframe_prevalence %>%
-    dplyr::group_by(project) %>%
-    dplyr::top_n(1, date) %>%
-    dplyr::summarize(articles = sum(total_articles), mapframe = TRUE) %>%
-    dplyr::ungroup() %>%
-    dplyr::select(c(project, articles, mapframe))
-  available_projects <<- dplyr::full_join(
-    available_projects_maplink,
-    available_projects_mapframe,
-    by = "project"
-  ) %>%
-    dplyr::mutate(
-      articles = pmax(articles.x, articles.y, na.rm = TRUE),
-      maplink = dplyr::if_else(is.na(maplink), FALSE, maplink),
-      mapframe = dplyr::if_else(is.na(mapframe), FALSE, mapframe)
-    ) %>%
-    dplyr::select(-c(articles.x, articles.y))
-  prevalence <<- dplyr::inner_join(
-    maplinks, mapframes,
+  available_combos <<- dplyr::full_join(
+    dplyr::distinct(dplyr::arrange(maplinks, wiki, desc(date)), wiki, 
.keep_all = TRUE),
+    dplyr::distinct(dplyr::arrange(mapframes, wiki, desc(date)), wiki, 
.keep_all = TRUE),
     by = c("date", "wiki")
   ) %>%
     dplyr::left_join(lang_proj, by = c("wiki" = "wikiid")) %>%
-    dplyr::filter(!is.na(project)) %>%
-    dplyr::select(-wiki) %>%
-    dplyr::group_by(date, project) %>%
-    dplyr::summarize(
-      Maplink = round(100 * sum(maplink_articles) / sum(total_articles.x), 2),
-      Mapframe = round(100 * sum(mapframe_articles) / sum(total_articles.y), 2)
+    dplyr::mutate(language = dplyr::if_else(is.na(language), "(None)", 
language)) %>%
+    dplyr::select(-date) %>%
+    dplyr::mutate(
+      total_articles.x = ifelse(is.na(total_articles.x), 0, total_articles.x),
+      total_articles.y = ifelse(is.na(total_articles.y), 0, total_articles.y)
+    )
+  available_projects <<- available_combos %>%
+    dplyr::mutate(
+      total_articles.x = ifelse(is.na(total_articles.x), 0, total_articles.x),
+      total_articles.y = ifelse(is.na(total_articles.y), 0, total_articles.y)
     ) %>%
-    dplyr::ungroup() %>%
-    tidyr::gather(feature, prevalence, -c(date, project)) %>%
-    dplyr::transmute(
-      date = date, prevalence = prevalence,
-      group = paste0(feature, " (", project, ")")
-    ) %>%
-    tidyr::spread(group, prevalence)
+    dplyr::group_by(project) %>%
+    dplyr::summarize(total_articles = sum(total_articles.x + total_articles.y, 
na.rm = TRUE))
   return(invisible())
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/391288
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia44972c0691f026f7ad73c1a78341eaef7743aa6
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/wetzel
Gerrit-Branch: develop
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to