Bearloga has submitted this change and it was merged.
Change subject: Port all common Rainbow functionality to Polloi
......................................................................
Port all common Rainbow functionality to Polloi
Finishes switching Rainbow stuff over to Polloi.
Bug: T112700
Change-Id: I28aa4ef384d5e50d4094dd64abdc47dda52843eb
---
M .Rbuildignore
M DESCRIPTION
M NAMESPACE
M R/dygraphs.R
A R/manipulate.R
A R/maths.R
M R/smoothing.R
A man/compress.Rd
A man/cond_color.Rd
A man/cond_icon.Rd
A man/half.Rd
A man/mad.Rd
A man/percent_change.Rd
A man/safe_tail.Rd
A man/smoother.Rd
15 files changed, 377 insertions(+), 2 deletions(-)
Approvals:
Bearloga: Verified; Looks good to me, approved
diff --git a/.Rbuildignore b/.Rbuildignore
index 91114bf..6903bea 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,2 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
+.gitreview
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index b8582f8..e1728b4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -8,10 +8,14 @@
Description: This package (which I can say because BDR is nowhere in sight)
contains common functionality
for all of the Wikimedia Foundation's shiny dashboards.
License: MIT + file LICENSE
-Depends:
+Imports:
magrittr,
dygraphs,
RColorBrewer,
xts,
- readr
+ readr,
+ lubridate,
+ plyr,
+ shiny,
+ zoo
LazyData: TRUE
\ No newline at end of file
diff --git a/NAMESPACE b/NAMESPACE
index d51ec78..e71f30d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,14 +1,28 @@
# Generated by roxygen2 (4.1.1): do not edit by hand
+export(compress)
+export(cond_color)
+export(cond_icon)
+export(half)
+export(mad)
export(make_dygraph)
+export(percent_change)
export(read_dataset)
+export(safe_tail)
export(smooth_switch)
+export(smoother)
importFrom(RColorBrewer,brewer.pal)
importFrom(dygraphs,dyCSS)
importFrom(dygraphs,dyLegend)
importFrom(dygraphs,dyOptions)
importFrom(dygraphs,dygraph)
importFrom(dygraphs,renderDygraph)
+importFrom(lubridate,month)
+importFrom(lubridate,week)
+importFrom(lubridate,year)
importFrom(magrittr,"%>%")
+importFrom(plyr,ddply)
importFrom(readr,read_delim)
+importFrom(shiny,icon)
importFrom(xts,xts)
+importFrom(zoo,rollmean)
diff --git a/R/dygraphs.R b/R/dygraphs.R
index d68f548..3b9e011 100644
--- a/R/dygraphs.R
+++ b/R/dygraphs.R
@@ -54,4 +54,42 @@
drawPoints = FALSE, pointSize = 3, labelsKMB = use_si,
includeZero = TRUE) %>%
dyCSS(css = system.file("custom.css", package = "polloi")))
+}
+
+#'@title Select a Colour Conditionally
+#'@description select a colour based on the true/false nature of a condition.
+#'Uses green as the "true" colour by default, "red" as false, and
+#'
+#'@param condition a condition to be evaluated to produce a single TRUE or
FALSE value
+#'
+#'@param true_color the colour used to represent a TRUE result. Green by
default.
+#'
+#'@export
+cond_color <- function(condition, true_color = "green") {
+ if(is.na(condition)){
+ return("black")
+ }
+
+ colours <- c("green","red")
+ return(ifelse(condition, true_color, colours[!colours == true_color]))
+}
+
+#'@title Select an appropriate directional icon
+#'@description allows you to select an appropriate directional icon for a
change
+#'in condition.
+#'
+#'@param condition a condition to be evaluated to produce a single TRUE/FALSE
+#'
+#'@param true_direction which direction represents a positive change. "up" by
+#'default.
+#'
+#'@importFrom shiny icon
+#'@export
+cond_icon <- function(condition, true_direction = "up") {
+
+ if (true_direction == "up") {
+ return(shiny::icon(ifelse(condition, "arrow-up", "arrow-down")))
+ }
+
+ return(shiny::icon(ifelse(condition, "arrow-down", "arrow-up")))
}
\ No newline at end of file
diff --git a/R/manipulate.R b/R/manipulate.R
new file mode 100644
index 0000000..6a749df
--- /dev/null
+++ b/R/manipulate.R
@@ -0,0 +1,52 @@
+#'@title Safely retrieve the last N values from an object
+#'
+#'@description using tail() to get the last sequential values in an object
relies
+#'on that object being ordered, which it sometimes isn't due to backfilling.
\code{safe_tail}
+#'retrieves the last N values in a "safe" way, taking the possibility of
unordered data into
+#'account.
+#'
+#'@param x an object to tail
+#'
+#'@param n the number of values to take
+#'
+#'@param silent whether to produce warnings and messages or not. TRUE by
default.
+#'
+#'@export
+safe_tail <- function(x, n, silent = TRUE) {
+ if (!is.vector(x) && !is.data.frame(x)) {
+ stop("safe_trail() only works with vectors and data frames.")
+ }
+ # \code{silent} suppresses messages which may be used for debugging
+ if (is.vector(x)) {
+ return(tail(sort(x), n))
+ }
+ # Intelligently figure out which column is the date/timestamp column (in
case it's not the first column):
+ timestamp_column <- names(x)[sapply(x, class) %in% c("Date", "POSIXt",
"POSIXlt", "POSIXct")]
+ if (length(timestamp_column) == 0) {
+ if (!silent) {
+ message("No date/timestamp column detected for this dataset. It'd be
faster to use tail().")
+ }
+ return(tail(x, n))
+ }
+ if (length(timestamp_column) > 1) warning("More than one date/timestamp
column detected. Defaulting to the first one.")
+ if (!silent) {
+ message("Sorting by the date/timestamp column before returning the bottom
", n, " rows.")
+ }
+ return(tail(x[order(x[[timestamp_column[1]]]), ], n))
+}
+
+#'@title Sample Half an Object
+#'@description easily sample the top or bottom half of an object.
+#'
+#'@param x the object to sample from
+#'
+#'@param top whether it should be the top (TRUE) or bottom (FALSE) half.
+#'Set to TRUE by default.
+#'
+#'@export
+half <- function(x, top = TRUE){
+ if(top){
+ return(head(x, n = length(x)/2))
+ }
+ return(tail(x, n = length(x)/2))
+}
\ No newline at end of file
diff --git a/R/maths.R b/R/maths.R
new file mode 100644
index 0000000..d833d73
--- /dev/null
+++ b/R/maths.R
@@ -0,0 +1,47 @@
+#'@title Median Absolute Deviation
+#'@description computes the Median Absolute Deviation, or MAD. It's like
standard deviations
+#'only it doesn't suck terribly for any dataset we'd actually encounter
outside a classroom.
+#'
+#'@param x a numeric vector
+#'
+#'@export
+mad <- function(x){
+ median(abs(x - median(x)))
+}
+
+#'@title Calculate a Percentage Change
+#'
+#'@description calculates a delta between X or Y (or sequential X values)
+#'expressed as a percentage.
+#'
+#'@param x a numeric vector
+#'
+#'@param y (optionally) an additional numeric vector. If y is not provided,
each
+#'value in x will be compared to the subsequent value - if y is, each x value
will
+#'be compared to the equivalent y value.
+#'
+#'@export
+percent_change <- function(x, y = NULL) {
+ if(is.null(y)) {
+ return(100 * (x - c(NA, x[-length(x)])) / c(NA, x[-length(x)]))
+ }
+ return(100 * (y - x) / x)
+}
+
+#'@title Convert Numeric Values to use SI suffixes
+#'
+#'@description takes a numeric vector (1200, 1300, 1400) and converts it to
+#'use SI suffixes (1.2K, 1.3K, 1.4k)
+#'
+#'@param x a vector of numeric or integer values
+#'
+#'@param round_by how many digits to round the resulting numbers by.
+#'
+#'@export
+compress <- function(x, round_by = 2) {
+ # by StackOverflow user 'BondedDust' : http://stackoverflow.com/a/28160474
+ div <- findInterval(as.numeric(gsub("\\,", "", x)),
+ c(1, 1e3, 1e6, 1e9, 1e12) )
+ paste(round( as.numeric(gsub("\\,","",x))/10^(3*(div-1)), round_by),
+ c("","K","M","B","T")[div], sep = "" )
+}
\ No newline at end of file
diff --git a/R/smoothing.R b/R/smoothing.R
index 4da9fe6..73a7063 100644
--- a/R/smoothing.R
+++ b/R/smoothing.R
@@ -15,4 +15,71 @@
return(global)
}
return(local)
+}
+
+#' @title Dynamically Smooth Data
+#'
+#' @description Takes an untidy (read: dygraph-appropriate) dataset and adds
+#' columns for each variable consisting of the smoothed, averaged mean.
+#'
+#' @param dataset an untidy, dygraph-appropriate data.frame
+#'
+#' @param smooth_level the level of smoothing. Options are "day", "moving_avg",
+#' "week" and "month".
+#'
+#' @param rename whether to rename the fields once smoothed. TRUE by default.
+#'
+#' @export
+#' @importFrom plyr ddply
+#' @importFrom lubridate week year month
+#' @importFrom zoo rollmean
+smoother <- function(dataset, smooth_level = "day", rename = TRUE) {
+
+ # Determine the names and levels of aggregation. By default
+ # a smoothing level of "day" is assumed, which is no smoothing
+ # whatsoever, and so the original dataset is returned.
+ switch(smooth_level,
+ moving_avg = {
+ df <- apply(dataset[, -1, drop = FALSE], 2, function(x) {
+ y <- xts::xts(x, dataset[, 1])
+ return(as.numeric(zoo::rollmean(x, k = 17, fill = NA)))
+ }) %>% as.data.frame %>% cbind(timestamp = dataset[, 1], .)
+ names(df) <- names(dataset)
+ if (rename) names(df)[-1] <- paste(names(df)[-1], " (Moving
average)")
+ return(df)
+ },
+ week = {
+ dataset$filter_1 <- lubridate::week(dataset[, 1])
+ dataset$filter_2 <- lubridate::year(dataset[, 1])
+ name_append <- ifelse(rename, " (Weekly average)", "")
+ },
+ month = {
+ dataset$filter_1 <- lubridate::month(dataset[, 1])
+ dataset$filter_2 <- lubridate::year(dataset[, 1])
+ name_append <- ifelse(rename, " (Monthly average)", "")
+ },
+ {
+ return(dataset)
+ }
+ )
+
+ # If we're still here it was weekly or monthly. Calculate
+ # the average for each unique permutation of filters
+
+ result <- plyr::ddply(.data = dataset,
+ .variables = c("filter_1", "filter_2"),
+ .fun = function(df, name_append){
+
+ # Construct output names for the averages, compute
those averages, and
+ # apply said names.
+ output_names <- paste0(names(df)[2:(ncol(df) - 2)],
name_append)
+ holding <- apply(df[, 2:(ncol(df) - 2), drop =
FALSE], 2, FUN = median) %>%
+ round %>% t %>% as.data.frame
+ names(holding) <- output_names
+
+ # Return the bound original values and averaged
values
+ return(cbind(df[, 1, drop = FALSE], holding))
+ }, name_append = name_append)
+
+ return(result[, !(names(result) %in% c("filter_1","filter_2"))])
}
\ No newline at end of file
diff --git a/man/compress.Rd b/man/compress.Rd
new file mode 100644
index 0000000..5aa2a33
--- /dev/null
+++ b/man/compress.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/maths.R
+\name{compress}
+\alias{compress}
+\title{Convert Numeric Values to use SI suffixes}
+\usage{
+compress(x, round_by = 2)
+}
+\arguments{
+\item{x}{a vector of numeric or integer values}
+
+\item{round_by}{how many digits to round the resulting numbers by.}
+}
+\description{
+takes a numeric vector (1200, 1300, 1400) and converts it to
+use SI suffixes (1.2K, 1.3K, 1.4k)
+}
+
diff --git a/man/cond_color.Rd b/man/cond_color.Rd
new file mode 100644
index 0000000..3faa7ae
--- /dev/null
+++ b/man/cond_color.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/dygraphs.R
+\name{cond_color}
+\alias{cond_color}
+\title{Select a Colour Conditionally}
+\usage{
+cond_color(condition, true_color = "green")
+}
+\arguments{
+\item{condition}{a condition to be evaluated to produce a single TRUE or FALSE
value}
+
+\item{true_color}{the colour used to represent a TRUE result. Green by
default.}
+}
+\description{
+select a colour based on the true/false nature of a condition.
+Uses green as the "true" colour by default, "red" as false, and
+}
+
diff --git a/man/cond_icon.Rd b/man/cond_icon.Rd
new file mode 100644
index 0000000..9b79ddc
--- /dev/null
+++ b/man/cond_icon.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/dygraphs.R
+\name{cond_icon}
+\alias{cond_icon}
+\title{Select an appropriate directional icon}
+\usage{
+cond_icon(condition, true_direction = "up")
+}
+\arguments{
+\item{condition}{a condition to be evaluated to produce a single TRUE/FALSE}
+
+\item{true_direction}{which direction represents a positive change. "up" by
+default.}
+}
+\description{
+allows you to select an appropriate directional icon for a change
+in condition.
+}
+
diff --git a/man/half.Rd b/man/half.Rd
new file mode 100644
index 0000000..4085ac9
--- /dev/null
+++ b/man/half.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/manipulate.R
+\name{half}
+\alias{half}
+\title{Sample Half an Object}
+\usage{
+half(x, top = TRUE)
+}
+\arguments{
+\item{x}{the object to sample from}
+
+\item{top}{whether it should be the top (TRUE) or bottom (FALSE) half.
+Set to TRUE by default.}
+}
+\description{
+easily sample the top or bottom half of an object.
+}
+
diff --git a/man/mad.Rd b/man/mad.Rd
new file mode 100644
index 0000000..00e8047
--- /dev/null
+++ b/man/mad.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/maths.R
+\name{mad}
+\alias{mad}
+\title{Median Absolute Deviation}
+\usage{
+mad(x)
+}
+\arguments{
+\item{x}{a numeric vector}
+}
+\description{
+computes the Median Absolute Deviation, or MAD. It's like standard deviations
+only it doesn't suck terribly for any dataset we'd actually encounter outside
a classroom.
+}
+
diff --git a/man/percent_change.Rd b/man/percent_change.Rd
new file mode 100644
index 0000000..867808b
--- /dev/null
+++ b/man/percent_change.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/maths.R
+\name{percent_change}
+\alias{percent_change}
+\title{Calculate a Percentage Change}
+\usage{
+percent_change(x, y = NULL)
+}
+\arguments{
+\item{x}{a numeric vector}
+
+\item{y}{(optionally) an additional numeric vector. If y is not provided, each
+value in x will be compared to the subsequent value - if y is, each x value
will
+be compared to the equivalent y value.}
+}
+\description{
+calculates a delta between X or Y (or sequential X values)
+expressed as a percentage.
+}
+
diff --git a/man/safe_tail.Rd b/man/safe_tail.Rd
new file mode 100644
index 0000000..40572f9
--- /dev/null
+++ b/man/safe_tail.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/manipulate.R
+\name{safe_tail}
+\alias{safe_tail}
+\title{Safely retrieve the last N values from an object}
+\usage{
+safe_tail(x, n, silent = TRUE)
+}
+\arguments{
+\item{x}{an object to tail}
+
+\item{n}{the number of values to take}
+
+\item{silent}{whether to produce warnings and messages or not. TRUE by
default.}
+}
+\description{
+using tail() to get the last sequential values in an object relies
+on that object being ordered, which it sometimes isn't due to backfilling.
\code{safe_tail}
+retrieves the last N values in a "safe" way, taking the possibility of
unordered data into
+account.
+}
+
diff --git a/man/smoother.Rd b/man/smoother.Rd
new file mode 100644
index 0000000..a1f5a3b
--- /dev/null
+++ b/man/smoother.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2 (4.1.1): do not edit by hand
+% Please edit documentation in R/smoothing.R
+\name{smoother}
+\alias{smoother}
+\title{Dynamically Smooth Data}
+\usage{
+smoother(dataset, smooth_level = "day", rename = TRUE)
+}
+\arguments{
+\item{dataset}{an untidy, dygraph-appropriate data.frame}
+
+\item{smooth_level}{the level of smoothing. Options are "day", "moving_avg",
+"week" and "month".}
+
+\item{rename}{whether to rename the fields once smoothed. TRUE by default.}
+}
+\description{
+Takes an untidy (read: dygraph-appropriate) dataset and adds
+columns for each variable consisting of the smoothed, averaged mean.
+}
+
--
To view, visit https://gerrit.wikimedia.org/r/239853
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I28aa4ef384d5e50d4094dd64abdc47dda52843eb
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/polloi
Gerrit-Branch: master
Gerrit-Owner: OliverKeyes <[email protected]>
Gerrit-Reviewer: Bearloga <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits