GoranSMilovanovic has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/398692 )
Change subject: Labs processing 17 Dec 2017 ...................................................................... Labs processing 17 Dec 2017 Change-Id: I1cfad83ed71be0e046205a5cff09987df0a8ab7f --- M WDCM_EngineGeo_goransm.R A WDCM_Process.R A WDCM_Update_Labs.R 3 files changed, 1,159 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/wmde/WDCM refs/changes/92/398692/1 diff --git a/WDCM_EngineGeo_goransm.R b/WDCM_EngineGeo_goransm.R index 635606b..4a04518 100644 --- a/WDCM_EngineGeo_goransm.R +++ b/WDCM_EngineGeo_goransm.R @@ -288,14 +288,14 @@ ### --- join coordinates, items, labels, and usage setwd(dataDir) -# - list .tsv files +# - list .tsv files from dataDir lF <- list.files() w <- which(grepl("^wdcm_geoitem", lF)) lF <- lF[w] w <- which(grepl(".tsv", lF, fixed = T)) lF <- lF[w] -# - remove old .csv files: +# - remove old .csv files from dataDir rmF <- list.files() w <- which(grepl("^wdcm_geoitem", rmF)) rmF <- rmF[w] diff --git a/WDCM_Process.R b/WDCM_Process.R new file mode 100644 index 0000000..467892d --- /dev/null +++ b/WDCM_Process.R @@ -0,0 +1,910 @@ + +### --------------------------------------------------------------------------- +### --- WDCM Process Module, v. Beta 0.1 +### --- Script: WDCM_Process_v2.R, v. Beta 0.1 +### --------------------------------------------------------------------------- +### --- DESCRIPTION: +### --- WDCM_Process_v2.R takes a list of .tsv files that present +### --- the data from wbc_entity_usage tables accross the client projects +### --- fetched from production (stat1005) by WDCM_Search_Clients.R and +### --- further pre-processed by WDCM_Pre-Process.R (also on production). +### --- The goal of this WDCM module/script is to produce (or update) +### --- the WDCM Stats Dashboard database. +### --------------------------------------------------------------------------- +### --- INPUT: +### --- the WDCM_Process_v2.R reads the .tsv input files from: +### --- /home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2/ +### --- on the wikidataconcepts.eqiad.wmflabs Cloud VPS instance +### --- These files are brought to Labs directly from productio +### --- (currently the stat1005.eqiad.wmnet statbox) +### --------------------------------------------------------------------------- +### --- OUTPUT: the WDCM Dashboards MariaDB database is update +### --------------------------------------------------------------------------- + +### --------------------------------------------------------------------------- +### --- LICENSE: +### --------------------------------------------------------------------------- +### --- GPL v2 +### --- This file is part of Wikidata Concepts Monitor (WDCM) +### --- +### --- WDCM is free software: you can redistribute it and/or modify +### --- it under the terms of the GNU General Public License as published by +### --- the Free Software Foundation, either version 2 of the License, or +### --- (at your option) any later version. +### --- +### --- WDCM is distributed in the hope that it will be useful, +### --- but WITHOUT ANY WARRANTY; without even the implied warranty of +### --- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### --- GNU General Public License for more details. +### --- +### --- You should have received a copy of the GNU General Public License +### --- along with WDCM. If not, see <http://www.gnu.org/licenses/>. +### --------------------------------------------------------------------------- + +### --- Setup +library(RMySQL) +library(httr) +library(XML) +library(data.table) +library(dplyr) +library(tidyr) +library(readr) +library(htmltab) +library(snowfall) +library(maptpx) +library(Rtsne) + +# - mysql --defaults-file=/home/goransm/mySQL_Credentials/replica.my.cnf -h tools.labsdb u16664__wdcm_p +# - database: u16664__wdcm_p + +### --- functions + +# - projectType() to determine project type +projectType <- function(projectName) { + unname(sapply(projectName, function(x) { + if (grepl("commons", x, fixed = T)) {"Commons" + } else if (grepl("mediawiki|meta|species|wikidata", x)) {"Other" + } else if (grepl("wiki$", x)) {"Wikipedia" + } else if (grepl("quote$", x)) {"Wikiquote" + } else if (grepl("voyage$", x)) {"Wikivoyage" + } else if (grepl("news$", x)) {"Wikinews" + } else if (grepl("source$", x)) {"Wikisource" + } else if (grepl("wiktionary$", x)) {"Wiktionary" + } else if (grepl("versity$", x)) {"Wikiversity" + } else if (grepl("books$", x)) {"Wikibooks" + } else {"Other"} + })) +} + +### --------------------------------------------------------------------------- +### --- NOTE: +### --- TABLE NAMING CONVENTION FOR v2 (WDCM Stats Dashboard) +### --- wdcm2_something +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("WDCM Process.R update started at: ", Sys.time(), sep = "")) + +# - to working directory: +# - credentials on tools.labsdb +setwd('/home/goransm/WMDE/WDCM/WDCM_RScripts') +mySQLCreds <- read.csv("mySQLCreds.csv", + header = T, + check.names = F, + row.names = 1, + stringsAsFactors = F) + +### --- list existing tables +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- "SHOW TABLES;" +res <- dbSendQuery(con, q) +st <- fetch(res, -1) +dbClearResult(res) +dbDisconnect(con) +colnames(st) <- "tables" +# - keep only v2 tables: +st <- st[which(grepl("^wdcm2", st$tables)), ] + +### --------------------------------------------------------------------------- +### --------------------------------------------------------------------------- +### --- CREATE TABLES for WDCM Projects Stats Dashboard +### --------------------------------------------------------------------------- +### --------------------------------------------------------------------------- + +# - to /home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2/ +inputDir <- '/home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2/' +setwd(inputDir) + +### --------------------------------------------------------------------------- +### --- wdcm2_project +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_project: ", Sys.time(), sep = "")) + +# - load wdcm_project.tsv as data.frame +wdcm2_project <- fread('wdcm_project.csv', data.table = F) +wdcm2_project$V1 <- NULL +# - check whether wdcm2_project table exists: +checkTable <- which(st %in% "wdcm2_project") +# - DROP wdcm2_project if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_project;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - CREATE wdcm2_project: +# - write to wdcm2_project +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +dbWriteTable(conn = con, + name = "wdcm2_project", + value = wdcm2_project, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project); gc() + +### --------------------------------------------------------------------------- +### --- wdcm2_category +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_category: ", Sys.time(), sep = "")) + +# - load wdcm_category.tsv as data.frame +wdcm2_category <- fread('wdcm_category.csv', data.table = F) +wdcm2_category$V1 <- NULL +# - check whether wdcm2_category table exists: +checkTable <- which(st %in% "wdcm2_category") +# - DROP wdcm2_category if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_category;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - CREATE wdcm2_category +# - write to wdcm2_category +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +dbWriteTable(conn = con, + name = "wdcm2_category", + value = wdcm2_category, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_category); gc() + +### --------------------------------------------------------------------------- +### --- wdcm2_project_category +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_project_category: ", Sys.time(), sep = "")) + +# - load wdcm_category.tsv as data.frame +wdcm2_project_category <- fread('wdcm_project_category.csv', data.table = F) +wdcm2_project_category$V1 <- NULL +### --- FIX THIS (!!!) +colnames(wdcm2_project_category)[4] <- 'projecttype' +### --- FIX THIS (!!!) +# - check whether wdcm2_project_category table exists: +checkTable <- which(st %in% "wdcm2_project_category") +# - DROP wdcm2_project_category if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_project_category;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - CREATE wdcm2_project_category +# - write to wdcm2_project_category +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'CREATE TABLE wdcm2_project_category ( + eu_project VARCHAR(255) NOT NULL, + category VARCHAR(37) NOT NULL, + eu_count INT, + projecttype VARCHAR(255) NOT NULL + ) + ENGINE=InnoDB + ROW_FORMAT=COMPRESSED;' +res <- dbSendQuery(con, qCreate) +dbClearResult(res) +# - populate wdcm2_project_category +dbWriteTable(conn = con, + name = "wdcm2_project_category", + value = wdcm2_project_category, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project_category); gc() +# - CREATE INDEX on wdcm2_project_category +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +# - CREATE INDEX +q <- "CREATE INDEX wdcm2_project_category_eu_project_category ON wdcm2_project_category (eu_project, category); " +res <- dbSendQuery(con, q) +dbClearResult(res) +dbDisconnect(con) + +### --------------------------------------------------------------------------- +### --- wdcm2_project_item100 +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_project_item100: ", Sys.time(), sep = "")) + +# - load wdcm_project_item100.tsv as data.frame +wdcm2_project_item100 <- fread('wdcm_project_item100.csv', data.table = F) +wdcm2_project_item100$V1 <- NULL +wdcm2_project_item100$t.row <- NULL +colnames(wdcm2_project_item100) <- c('eu_project', 'eu_entity_id', 'eu_count', 'projecttype') +# - check whether wdcm2_project_item100 table exists: +checkTable <- which(st %in% "wdcm2_project_item100") +# - DROP wdcm2_project_category if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_project_item100;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - get item labels for wdcm2_project_item100 +items <- gsub("Q", "", unique(wdcm2_project_item100$eu_entity_id), fixed = T) +itemList <- paste0(items, collapse = ", ", sep = "") +# - fetch English labels where available from wb_terms +con <- dbConnect(MySQL(), + host = "wikidatawiki.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "wikidatawiki_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- paste("SELECT term_entity_id, term_text FROM wb_terms ", + "WHERE ((term_entity_id IN (", itemList, ")) AND ", + "(term_language = 'en') AND ", + "(term_entity_type = 'item') AND (term_type = 'label'));", + sep = ""); +rm(itemList) +res <- dbSendQuery(con, q) +itemLabels <- fetch(res, -1) +dbClearResult(res) +dbDisconnect(con) +itemLabels$term_entity_id <- paste("Q", itemLabels$term_entity_id, sep = "") +colnames(itemLabels) <- c('eu_entity_id', 'eu_label') +# - insert labels to wdcm2_project_item100 + +wdcm2_project_item100 <- left_join(wdcm2_project_item100, itemLabels, by = 'eu_entity_id') +# - recognize missing English labels and replace w. the respective eu_entity_id value +wdcm2_project_item100$eu_label[is.na(wdcm2_project_item100$eu_label)] <- + wdcm2_project_item100$eu_entity_id[is.na(wdcm2_project_item100$eu_label)] +rm(itemLabels); gc() +# - CREATE wdcm2_project_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'CREATE TABLE wdcm2_project_item100 ( + eu_project VARCHAR(255) NOT NULL, + eu_entity_id VARCHAR(37) NOT NULL, + eu_count INT, + projecttype VARCHAR(255) NOT NULL, + eu_label VARCHAR(255) NOT NULL) + ENGINE=InnoDB + ROW_FORMAT=COMPRESSED;' +res <- dbSendQuery(con, qCreate) +dbClearResult(res) +# - populate wdcm2_project_item100 +dbWriteTable(conn = con, + name = "wdcm2_project_item100", + value = wdcm2_project_item100, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project_item100); gc() +# - CREATE INDEX on wdcm2_project_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- "CREATE INDEX wdcm2_project_item100_eu_project ON wdcm2_project_item100 (eu_project); " +res <- dbSendQuery(con, q) +dbClearResult(res) +q <- "CREATE INDEX wdcm2_project_item100_projecttype ON wdcm2_project_item100 (projecttype); " +res <- dbSendQuery(con, q) +dbClearResult(res) +dbDisconnect(con) + +### --------------------------------------------------------------------------- +### --- wdcm2_project_category_item100 +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_project_category_item100: ", Sys.time(), sep = "")) + +# - load wdcm_project_category_item100.tsv as data.frame +wdcm2_project_category_item100 <- fread('wdcm_project_category_item100.csv', data.table = F) +wdcm2_project_category_item100$V1 <- NULL +wdcm2_project_category_item100$t.row <- NULL +colnames(wdcm2_project_category_item100) <- c('eu_project', 'category', 'eu_entity_id', 'eu_count', 'projecttype') +# - check whether wdcm2_project_item100 table exists: +checkTable <- which(st %in% "wdcm2_project_category_item100") +# - DROP wdcm2_project_category if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_project_category_item100;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - get item labels for wdcm2_project_category_item100 +items <- gsub("Q", "", unique(wdcm2_project_category_item100$eu_entity_id), fixed = T) +itemList <- paste0(items, collapse = ", ", sep = "") +# - fetch English labels where available from wb_terms +con <- dbConnect(MySQL(), + host = "wikidatawiki.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "wikidatawiki_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- paste("SELECT term_entity_id, term_text FROM wb_terms ", + "WHERE ((term_entity_id IN (", itemList, ")) AND ", + "(term_language = 'en') AND ", + "(term_entity_type = 'item') AND (term_type = 'label'));", + sep = ""); +rm(itemList); gc() +res <- dbSendQuery(con, q) +itemLabels <- fetch(res, -1) +dbClearResult(res) +dbDisconnect(con) +itemLabels$term_entity_id <- paste("Q", itemLabels$term_entity_id, sep = "") +colnames(itemLabels) <- c('eu_entity_id', 'eu_label') +# - insert labels to wdcm2_project_category_item100 + +wdcm2_project_category_item100 <- left_join(wdcm2_project_category_item100, itemLabels, + by = 'eu_entity_id') +# - recognize missing English labels and replace w. the respective eu_entity_id value +wdcm2_project_category_item100$eu_label[is.na(wdcm2_project_category_item100$eu_label)] <- + wdcm2_project_category_item100$eu_entity_id[is.na(wdcm2_project_category_item100$eu_label)] +rm(itemLabels); gc() +# - CREATE wdcm2_project_category_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'CREATE TABLE wdcm2_project_category_item100 ( + eu_project VARCHAR(255) NOT NULL, + category VARCHAR(255) NOT NULL, + eu_entity_id VARCHAR(37) NOT NULL, + eu_count INT, + projecttype VARCHAR(255) NOT NULL, + eu_label VARCHAR(255) NOT NULL) + ENGINE=InnoDB + ROW_FORMAT=COMPRESSED;' +res <- dbSendQuery(con, qCreate) +dbClearResult(res) +# - populate wdcm2_project_category_item100 +dbWriteTable(conn = con, + name = "wdcm2_project_category_item100", + value = wdcm2_project_category_item100, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project_category_item100); gc() +# - CREATE INDEX on wdcm2_project_category_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- "CREATE INDEX wdcm2_project_category_item100_eu_project_category ON wdcm2_project_category_item100 (eu_project, category);" +res <- dbSendQuery(con, q) +dbClearResult(res) +q <- "CREATE INDEX wdcm2_project_category_item100_eu_project ON wdcm2_project_category_item100 (eu_project);" +res <- dbSendQuery(con, q) +dbClearResult(res) +q <- "CREATE INDEX wdcm2_project_category_item100_category ON wdcm2_project_category_item100 (category);" +res <- dbSendQuery(con, q) +dbClearResult(res) +q <- "CREATE INDEX wdcm2_project_category_item100_projecttype ON wdcm2_project_category_item100 (projecttype);" +res <- dbSendQuery(con, q) +dbClearResult(res) +dbDisconnect(con) + +### --------------------------------------------------------------------------- +### --- wdcm2_category_item100 +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_category_item100: ", Sys.time(), sep = "")) + +# - check whether wdcm2_category_item100 table exists: +checkTable <- which(st %in% "wdcm2_category_item100") +# - DROP wdcm2_category_item100 if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_category_item100;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - CREATE wdcm2_category_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'CREATE TABLE wdcm2_category_item100 ( + eu_entity_id VARCHAR(255) NOT NULL, + eu_count INT, + category VARCHAR(255) NOT NULL, + eu_label VARCHAR(255) NOT NULL) + ENGINE=InnoDB + ROW_FORMAT=COMPRESSED;' +res <- dbSendQuery(con, qCreate) +dbClearResult(res) +dbDisconnect(con) +# - populate wdcm2_category_item100 +itemFiles <- list.files() +itemFiles <- itemFiles[grepl("^wdcm_item", itemFiles)] +for (i in 1:length(itemFiles)) { + # - load categoryFile[i].tsv as data.frame + categoryName <- strsplit(itemFiles[i], ".", fixed = T)[[1]][1] + categoryName <- strsplit(categoryName, "_", fixed = T)[[1]][3] + categoryName <- gsub("([[:lower:]])([[:upper:]])", "\\1 \\2", categoryName) + ### --- FIX THIS (!!!) + if (categoryName == "Workof Art") { + categoryName <- "Work of Art" + } + ### --- FIX ABOVE (!!!) + categoryFile <- fread(itemFiles[i], nrows = 100) + categoryFile$category <- categoryName + # - get item labels for categoryFile[i] + items <- gsub("Q", "", unique(categoryFile$eu_entity_id), fixed = T) + itemList <- paste0(items, collapse = ", ", sep = "") + # - fetch English labels where available from wb_terms + con <- dbConnect(MySQL(), + host = "wikidatawiki.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "wikidatawiki_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- paste("SELECT term_entity_id, term_text FROM wb_terms ", + "WHERE ((term_entity_id IN (", itemList, ")) AND ", + "(term_language = 'en') AND ", + "(term_entity_type = 'item') AND (term_type = 'label'));", + sep = ""); + rm(itemList); gc() + res <- dbSendQuery(con, q) + itemLabels <- fetch(res, -1) + dbClearResult(res) + dbDisconnect(con) + itemLabels$term_entity_id <- paste("Q", itemLabels$term_entity_id, sep = "") + colnames(itemLabels) <- c('eu_entity_id', 'eu_label') + # - insert labels to categoryFile[i] + + categoryFile <- left_join(categoryFile, itemLabels, by = 'eu_entity_id') + # - recognize missing English labels and replace w. the respective eu_entity_id value + categoryFile$eu_label[is.na(categoryFile$eu_label)] <- + categoryFile$eu_entity_id[is.na(categoryFile$eu_label)] + rm(itemLabels); gc() + # - populate wdcm2_category_item100 by categoryFile[i] + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + dbWriteTable(conn = con, + name = "wdcm2_category_item100", + value = categoryFile, + row.names = F, + append = T) + dbDisconnect(con) + rm(categoryFile); gc() +} +# - CREATE INDEX on wdcm2_category_item100 +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- "CREATE INDEX wdcm2_category_item100_category ON wdcm2_category_item100 (category); " +res <- dbSendQuery(con, q) +dbClearResult(res) +dbDisconnect(con) + +### --------------------------------------------------------------------------- +### --- wdcm2_projects_2dmaps +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_projects_2dmaps: ", Sys.time(), sep = "")) + +# - check whether wdcm2_projects_2dmaps table exists: +checkTable <- which(st %in% "wdcm2_projects_2dmaps") +# - DROP wdcm2_projects_2dmaps if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_projects_2dmaps;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - CREATE wdcm2_projects_2dmaps +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'CREATE TABLE wdcm2_projects_2dmaps ( + D1 FLOAT, + D2 FLOAT, + eu_project VARCHAR(255) NOT NULL, + projecttype VARCHAR(255) NOT NULL, + category VARCHAR(37) NOT NULL) + ENGINE=InnoDB + ROW_FORMAT=COMPRESSED;' +res <- dbSendQuery(con, qCreate) +dbClearResult(res) +dbDisconnect(con) +# - populate wdcm2_projects_2dmaps +itemFiles <- list.files() +itemFiles <- itemFiles[grepl("^wdcm2_tsne", itemFiles)] +for (i in 1:length(itemFiles)) { + # - load categoryFile[i].tsv as data.frame + categoryName <- strsplit(itemFiles[i], ".", fixed = T)[[1]][1] + categoryName <- strsplit(categoryName, "_", fixed = T)[[1]][4] + categoryName <- gsub("([[:lower:]])([[:upper:]])", "\\1 \\2", categoryName) + ### --- FIX THIS (!!!) + if (categoryName == "Workof Art") { + categoryName <- "Work of Art" + } + ### --- FIX ABOVE (!!!) + categoryFile <- fread(itemFiles[i], data.table = F) + categoryFile$V1 <- NULL + categoryFile$category <- categoryName + colnames(categoryFile)[3] <- 'eu_project' + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + dbWriteTable(conn = con, + name = "wdcm2_projects_2dmaps", + value = categoryFile, + row.names = F, + append = T) + dbDisconnect(con) + rm(categoryFile); gc() +} +# - CREATE INDEX on wdcm2_projects_2dmaps +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +q <- "CREATE INDEX wdcm2_projects_2dmaps_category_euproject ON wdcm2_projects_2dmaps (category, eu_project);" +res <- dbSendQuery(con, q) +dbClearResult(res) +dbDisconnect(con) + +### --------------------------------------------------------------------------- +### --- wdcm2_project_category_2dmap +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_project_category_2dmap: ", Sys.time(), sep = "")) + +# - check whether wdcm2_project_category_2dmap table exists: +checkTable <- which(st %in% "wdcm2_project_category_2dmap") +# - DROP wdcm2_project_category_2dmap if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_project_category_2dmap;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - fetch wdcm2_project_category: +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'SELECT * FROM wdcm2_project_category;' +res <- dbSendQuery(con, qCreate) +wdcm2_project_category <- fetch(res, -1) +dbClearResult(res) +dbDisconnect(con) +# - wrangle wdcm2_project_category for t-SNE: +tsneData <- wdcm2_project_category[, 1:3] +tsneData <- spread(tsneData, + key = category, + value = eu_count, + fill = 0) +rownames(tsneData) <- tsneData$eu_project +tsneData$eu_project <- NULL +tsneData <- as.matrix(dist(tsneData, method = "euclidean")) +projects <- rownames(tsneData) +# - t-SNE 2D reduction: +tsneData <- Rtsne(tsneData, theta = .5, is_distance = T) +tsneData <- as.data.frame(tsneData$Y) +tsneData$projects <- projects +tsneData$projecttype <- projectType(tsneData$projects) +colnames(tsneData)[1:2] <- c('D1', 'D2') +# - populate wdcm2_project_category_2dmap +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +dbWriteTable(conn = con, + name = "wdcm2_project_category_2dmap", + value = tsneData, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project_category); rm(tsneData); gc() + +### --------------------------------------------------------------------------- +### --- wdcm2_category_project_2dmap +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_category_project_2dmap: ", Sys.time(), sep = "")) + +# - check whether wdcm2_category_project_2dmap table exists: +checkTable <- which(st %in% "wdcm2_category_project_2dmap") +# - DROP wdcm2_category_project_2dmap if it exists +if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- "DROP TABLE wdcm2_category_project_2dmap;" + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) +} +# - fetch wdcm2_project_category: +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +qCreate <- 'SELECT * FROM wdcm2_project_category;' +res <- dbSendQuery(con, qCreate) +wdcm2_project_category <- fetch(res, -1) +dbClearResult(res) +dbDisconnect(con) +# - wrangle wdcm2_project_category for t-SNE: +tsneData <- wdcm2_project_category[, 1:3] +tsneData <- spread(tsneData, + key = category, + value = eu_count, + fill = 0) +rownames(tsneData) <- tsneData$eu_project +tsneData$eu_project <- NULL +tsneData <- as.matrix(dist(t(tsneData), method = "euclidean")) +category <- rownames(tsneData) +# - t-SNE 2D reduction: +tsneData <- Rtsne(tsneData, theta = .5, is_distance = T, perplexity = 4) +tsneData <- as.data.frame(tsneData$Y) +tsneData$category <- category +colnames(tsneData)[1:2] <- c('D1', 'D2') +# - populate wdcm2_category_project_2dmap +con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) +dbWriteTable(conn = con, + name = "wdcm2_category_project_2dmap", + value = tsneData, + row.names = F, + append = T) +dbDisconnect(con) +rm(wdcm2_project_category); rm(tsneData); gc() + +### --------------------------------------------------------------------------- +### --- wdcm2_itemtopic_ tables +### --------------------------------------------------------------------------- + +# - to nohup.out +print(paste("wdcm2_itemtopic_ tables: ", Sys.time(), sep = "")) + +lF <- list.files() +lF <- lF[grepl("wdcm2_itemtopic_", lF, fixed = T)] +for (i in 1:length(lF)) { + # - check whether the current wdcm2_itemtopic_ table exists: + tName <- gsub(".csv", "", lF[i]) + checkTable <- which(st %in% tName) + # - DROP current wdcm2_itemtopic_ table if it exists + if (length(checkTable) == 1) { + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- paste("DROP TABLE ", tName, ";", sep = "") + res <- dbSendQuery(con, q) + dbClearResult(res) + dbDisconnect(con) + } + # - populate current wdcm2_itemtopic_ + cTable <- fread(lF[i]) + colnames(cTable)[1] <- 'eu_entity_id' + # - get item labels for current wdcm2_itemtopic_ + items <- gsub("Q", "", unique(cTable$eu_entity_id), fixed = T) + itemList <- paste0(items, collapse = ", ", sep = "") + # - fetch English labels where available from wb_terms + con <- dbConnect(MySQL(), + host = "wikidatawiki.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "wikidatawiki_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + q <- paste("SELECT term_entity_id, term_text FROM wb_terms ", + "WHERE ((term_entity_id IN (", itemList, ")) AND ", + "(term_language = 'en') AND ", + "(term_entity_type = 'item') AND (term_type = 'label'));", + sep = ""); + rm(itemList) + res <- dbSendQuery(con, q) + itemLabels <- fetch(res, -1) + dbClearResult(res) + dbDisconnect(con) + itemLabels$term_entity_id <- paste("Q", itemLabels$term_entity_id, sep = "") + colnames(itemLabels) <- c('eu_entity_id', 'eu_label') + # - insert labels to current wdcm2_itemtopic_ + cTable <- left_join(cTable, itemLabels, by = 'eu_entity_id') + # - recognize missing English labels and replace w. the respective eu_entity_id value + cTable$eu_label[is.na(cTable$eu_label)] <- + cTable$eu_entity_id[is.na(cTable$eu_label)] + rm(itemLabels); gc() + # - write current wdcm2_itemtopic_ + con <- dbConnect(MySQL(), + host = "tools.labsdb", + defult.file = "/home/goransm/mySQL_Credentials/replica.my.cnf", + dbname = "u16664__wdcm_p", + user = mySQLCreds$user, + password = mySQLCreds$password) + dbWriteTable(conn = con, + name = tName, + value = cTable, + row.names = F, + append = F) + dbDisconnect(con) +} + +### --- copy toLabsReport.csv to all WDCM Dashboards: +# - to nohup.out +print(paste("copy toLabsReport.csv to all WDCM Dashboards: ", Sys.time(), sep = "")) + +# - Overview Dashboard Update File +system('sudo cp /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsReport.csv /srv/shiny-server/WDCM_OverviewDashboard/update/', + wait = F) +# - Usage Dashboard Update File +system('sudo cp /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsReport.csv /srv/shiny-server/WDCM_UsageDashboard/update/', + wait = F) +# - Semantics Dashboard Update File +system('sudo cp /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsReport.csv /srv/shiny-server/WDCM_SemanticsDashboard/update/', + wait = F) +# - Semantics Dashboard Data Sets +lF <- list.files() +lF <- lF[grepl("wdcm2_projecttopic_", lF, fixed = T)] +for (i in 1:length(lF)) { + system(paste('sudo cp ', inputDir, + lF[i], + ' /srv/shiny-server/WDCM_SemanticsDashboard/data/', + sep = ''), + wait = F) +} +lF <- list.files() +lF <- lF[grepl("wdcm2_visNetworkNodes_project", lF, fixed = T)] +for (i in 1:length(lF)) { + system(paste('sudo cp ', inputDir, + lF[i], + ' /srv/shiny-server/WDCM_SemanticsDashboard/data/', + sep = ''), + wait = F) +} +lF <- list.files() +lF <- lF[grepl("wdcm2_visNetworkEdges_project", lF, fixed = T)] +for (i in 1:length(lF)) { + system(paste('sudo cp ', inputDir, + lF[i], + ' /srv/shiny-server/WDCM_SemanticsDashboard/data/', + sep = ''), + wait = F) +} + +### --- Report: Update completed +# - to nohup.out +print(paste("WDCM Dashboards Report Completed: ", Sys.time(), sep = "")) + + + diff --git a/WDCM_Update_Labs.R b/WDCM_Update_Labs.R new file mode 100644 index 0000000..a0bbfaf --- /dev/null +++ b/WDCM_Update_Labs.R @@ -0,0 +1,247 @@ +### --------------------------------------------------------------------------- +### --- WDCM Process Module, v. Beta 0.1 +### --- Script: WDCM_Update_Labs.R, v. Beta 0.1 +### --------------------------------------------------------------------------- +### --- DESCRIPTION: +### --- WDCM_Update_Labs.R runs on cron from wikidataconcepts.eqiad.wmflabs +### --- periodically checking the changes in /srv/published-datasets/wdcm +### --- on stat1005 by accessing https://analytics.wikimedia.org/datasets/wdcm/ +### --- onto which the /srv/published-datasets/wdcm from stat1005 +### --- is mapped. +### --- This script will check the timestamp in toLabsReport.csv +### --- and compare it to a locally stored version. +### --- If the timestamp found in production is newer, the script +### --- will copy all files from https://analytics.wikimedia.org/datasets/wdcm/ +### --- to: /home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2 +### --- and update its local timestamp. +### --- Once the timestamp is updated and the files copied, the script will run +### --- nohup /home/goransm/WMDE/WDCM/WDCM_RScripts & +### --- to update the WDCM Dashboards. +### --------------------------------------------------------------------------- + +### --------------------------------------------------------------------------- +### --- LICENSE: +### --------------------------------------------------------------------------- +### --- GPL v2 +### --- This file is part of Wikidata Concepts Monitor (WDCM) +### --- +### --- WDCM is free software: you can redistribute it and/or modify +### --- it under the terms of the GNU General Public License as published by +### --- the Free Software Foundation, either version 2 of the License, or +### --- (at your option) any later version. +### --- +### --- WDCM is distributed in the hope that it will be useful, +### --- but WITHOUT ANY WARRANTY; without even the implied warranty of +### --- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +### --- GNU General Public License for more details. +### --- +### --- You should have received a copy of the GNU General Public License +### --- along with WDCM. If not, see <http://www.gnu.org/licenses/>. +### --------------------------------------------------------------------------- + +### --- Setup +library(httr) +library(stringr) + +### --------------------------------------------------------------------------- +### --- 1. WDCM Dashboards: General Update +### --- affected dashboards: Overview, Usage, Semantics +### --------------------------------------------------------------------------- + +### --- Check update timestamp in production: +### --- https://analytics.wikimedia.org/datasets/wdcm/toLabsReport.csv +download.file('https://analytics.wikimedia.org/datasets/wdcm/toLabsReport.csv', + destfile = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampProducton.csv', + quiet = T) +timestampProduction <- read.csv('/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampProducton.csv', + header = T, + check.names = F, + stringsAsFactors = F, + row.names = 1) +tProd <- as.POSIXct(timestampProduction$timeStamp, format = "%Y-%m-%d %H:%M:%S") + +### --- Check local timestamp on Labs: +timestampLocal <- read.csv('/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampLocal.csv', + header = T, + check.names = F, + stringsAsFactors = F, + row.names = 1) +tLoc <- as.POSIXct(timestampLocal$timeStamp, format = "%Y-%m-%d %H:%M:%S") + + +### --- Compare with the current local timestamp +### --- and update if necessary: +if (tLoc < tProd) { + + # - update local timestamp: + timestampLocal$timeStamp <- timestampProduction$timeStamp + write.csv(timestampLocal, file = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampLocal.csv') + + # - list files: + url <- 'https://analytics.wikimedia.org/datasets/wdcm/' + page <- as.character(GET(url)) + links <- str_extract_all(page, "<a href=.+>.+</a>") + links <- sapply(links, function(x) {str_extract_all(x, ">.+<")}) + links <- sapply(links, function(x) {gsub('^>|"|<$|>|<', "", x)}) + links <- links[3:length(links)] + + # - clean up: + wREADME <- which(grepl("README", links)) + links <- links[-wREADME] + wtoLabsReport <- which(grepl("toLabs", links)) + links <- links[-wtoLabsReport] + wWDCM_MainReport <- which(grepl("WDCM_MainReport", links)) + links <- links[-wWDCM_MainReport] + + # - clear output dir: + setwd('/home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2') + lF <- list.files() + rmF <- file.remove(lF) + + # - Download files to: /home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2 + destDir <- '/home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2/' + for (i in 1:length(links)) { + # - to nohup: + print(paste("Downloading file: ", links[i], ": ", i, "/", length(links), sep = "")) + download.file(url = paste(url, links[i], sep = ""), + destfile = paste(destDir, links[i], sep = ""), + quiet = T) + } + + ### --- fetch toLabsReport.csv and place to: + ### --- /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/ + download.file('https://analytics.wikimedia.org/datasets/wdcm/toLabsReport.csv', + destfile = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsReport.csv', + quiet = T) + + # - set updateDir + updateDir <- '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/' + setwd(updateDir) + + ### --- start WDCM Dashboards Update: + # - remove previous runtime report: + file.remove('WDCM_Update_Process_nohupReport.txt') + nohupReport <- system(command = 'nohup Rscript /home/goransm/WMDE/WDCM/WDCM_RScripts/WDCM_Process.R', intern = T) + write(nohupReport, 'WDCM_Update_Process_nohupReport.txt') + + ### --- Update Report: + lF <- list.files() + if ('WDCM_SystemUpdateReport.csv' %in% lF) { + updateReport <- read.csv('WDCM_SystemUpdateReport.csv', + header = T, + row.names = 1, + check.names = F, + stringsAsFactors = F) + newReport <- data.frame(Step = 'WDCM Update', + Time = as.character(Sys.time()), + stringsAsFactors = F) + updateReport <- rbind(updateReport, newReport) + write.csv(updateReport, 'WDCM_SystemUpdateReport.csv') + } else { + newReport <- data.frame(Step = 'WDCM Update', + Time = as.character(Sys.time()), + stringsAsFactors = F) + write.csv(newReport, 'WDCM_SystemUpdateReport.csv') + } + +} + +### --------------------------------------------------------------------------- +### --- 2. WDCM GeoDashboard Update +### --- affected dashboards: Overview, Usage, Semantics +### --------------------------------------------------------------------------- + +### --- Check update timestamp in production: +### --- https://analytics.wikimedia.org/datasets/wdcm/toLabsReport.csv +download.file('https://analytics.wikimedia.org/datasets/wdcm/toLabsGeoReport.csv', + destfile = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampGeoProduction.csv', + quiet = T) +timestampProduction <- read.csv('/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampGeoProduction.csv', + header = T, + check.names = F, + stringsAsFactors = F, + row.names = 1) +tProd <- as.POSIXct(timestampProduction$timeStamp, format = "%Y-%m-%d %H:%M:%S") + +### --- Check local timestamp on Labs: +timestampLocal <- read.csv('/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampGeoLocal.csv', + header = T, + check.names = F, + stringsAsFactors = F, + row.names = 1) +tLoc <- as.POSIXct(timestampLocal$timeStamp, format = "%Y-%m-%d %H:%M:%S") + +### --- Compare with the current local timestamp +### --- and update if necessary: +if (tLoc < tProd) { + + # - update local timestamp: + timestampLocal$timeStamp <- timestampProduction$timeStamp + write.csv(timestampLocal, file = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/timestampGeoLocal.csv') + + # - list files: + url <- 'https://analytics.wikimedia.org/datasets/wdcm/' + page <- as.character(GET(url)) + links <- str_extract_all(page, "<a href=.+>.+</a>") + links <- sapply(links, function(x) {str_extract_all(x, ">.+<")}) + links <- sapply(links, function(x) {gsub('^>|"|<$|>|<', "", x)}) + links <- links[3:length(links)] + links <- links[which(grepl("^wdcm_geoitem_", links))] + + # - Download files to: /home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2 + destDir <- '/home/goransm/WMDE/WDCM/WDCM_DataIN/WDCM_DataIN_ClientUsage_v2/' + for (i in 1:length(links)) { + # - to nohup: + print(paste("GeoDashboard - Downloading file: ", links[i], ": ", i, "/", length(links), sep = "")) + download.file(url = paste(url, links[i], sep = ""), + destfile = paste(destDir, links[i], sep = ""), + quiet = T) + } + + ### --- fetch toLabsReport.csv and place to: + ### --- /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/ + download.file('https://analytics.wikimedia.org/datasets/wdcm/toLabsGeoReport.csv', + destfile = '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsGeoReport.csv', + quiet = T) + + # - Copy GeoDashboard Data Sets to Dashboard /srv + setwd(destDir) + lF <- list.files() + lF <- lF[grepl("^wdcm_geoitem_", lF)] + for (i in 1:length(lF)) { + system(paste('sudo cp ', destDir, + lF[i], + ' /srv/shiny-server/WDCM_GeoDashboard/data/', + sep = ''), + wait = F) + } + + ### --- Copy toLabsGeoReport to Dashboard /srv + system('sudo cp /home/goransm/WMDE/WDCM/WDCM_SystemUpdate/toLabsGeoReport.csv /srv/shiny-server/WDCM_GeoDashboard/update/', + wait = F) + + ### --- Update Report: + # - set updateDir + updateDir <- '/home/goransm/WMDE/WDCM/WDCM_SystemUpdate/' + setwd(updateDir) + lF <- list.files() + if ('WDCM_SystemUpdateReport.csv' %in% lF) { + updateReport <- read.csv('WDCM_SystemUpdateReport.csv', + header = T, + row.names = 1, + check.names = F, + stringsAsFactors = F) + newReport <- data.frame(Step = 'WDCM Geo Update', + Time = as.character(Sys.time()), + stringsAsFactors = F) + updateReport <- rbind(updateReport, newReport) + write.csv(updateReport, 'WDCM_SystemUpdateReport.csv') + } else { + newReport <- data.frame(Step = 'WDCM Geo Update', + Time = as.character(Sys.time()), + stringsAsFactors = F) + write.csv(newReport, 'WDCM_SystemUpdateReport.csv') + } + +} + -- To view, visit https://gerrit.wikimedia.org/r/398692 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1cfad83ed71be0e046205a5cff09987df0a8ab7f Gerrit-PatchSet: 1 Gerrit-Project: analytics/wmde/WDCM Gerrit-Branch: master Gerrit-Owner: GoranSMilovanovic <goran.milovanovic_...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits