Christopher Johnson (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/252033
Change subject: adds static path for tsv output ...................................................................... adds static path for tsv output Change-Id: I329b63da75f97129ef00bfe941156de60961a743 --- M src/scripts/property_usage.R A src/scripts/property_usage.Rout 2 files changed, 127 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard refs/changes/33/252033/1 diff --git a/src/scripts/property_usage.R b/src/scripts/property_usage.R index d85a081..243c4bd 100644 --- a/src/scripts/property_usage.R +++ b/src/scripts/property_usage.R @@ -22,10 +22,11 @@ setkey(props, id) setkey(prop_counts, id) setkey(labels, id) - dt_join_prop_usage <- props[prop_counts] + dt_join_props <- props[labels] + dt_join_prop_usage <- dt_join_props[prop_counts] dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3,4)] dt_join_prop_usage <- setnames(dt_join_prop_usage, c("Property","Label","Count")) - write.table(dt_join_prop_usage, paste0(sparql_data_uri, "prop_usage.tsv"), sep = "\t", row.names = FALSE) + write.table(dt_join_prop_usage, "/srv/dashboards/shiny-server/wdm/data/sparql/prop_usage.tsv", sep = "\t", row.names = FALSE) } write_prop_usage_counts() \ No newline at end of file diff --git a/src/scripts/property_usage.Rout b/src/scripts/property_usage.Rout new file mode 100644 index 0000000..5f983f1 --- /dev/null +++ b/src/scripts/property_usage.Rout @@ -0,0 +1,124 @@ + +R version 3.2.2 (2015-08-14) -- "Fire Safety" +Copyright (C) 2015 The R Foundation for Statistical Computing +Platform: x86_64-pc-linux-gnu (64-bit) + +R is free software and comes with ABSOLUTELY NO WARRANTY. +You are welcome to redistribute it under certain conditions. +Type 'license()' or 'licence()' for distribution details. + + Natural language support but running in an English locale + +R is a collaborative project with many contributors. +Type 'contributors()' for more information and +'citation()' on how to cite R or R packages in publications. + +Type 'demo()' for some demos, 'help()' for on-line help, or +'help.start()' for an HTML browser interface to help. +Type 'q()' to quit R. + +[Previously saved workspace restored] + +> #Bulk Query of WDQS for Property Use Counts and write to TSV +> +> src.path <- "/srv/dashboards/shiny-server/wdm/src/" +> source(paste0(src.path, "config.R"), chdir=T) + +Attaching package: ‘shinydashboard’ + +The following object is masked from ‘package:graphics’: + + box + +Loading required package: zoo + +Attaching package: ‘zoo’ + +The following objects are masked from ‘package:base’: + + as.Date, as.Date.numeric + + +Attaching package: ‘scales’ + +The following objects are masked from ‘package:readr’: + + col_factor, col_numeric + + +Attaching package: ‘lubridate’ + +The following object is masked from ‘package:plyr’: + + here + + +Attaching package: ‘curl’ + +The following object is masked from ‘package:readr’: + + parse_date + +Loading required package: bitops +Loading required package: rJava + +Attaching package: ‘rJava’ + +The following object is masked from ‘package:RCurl’: + + clone + +Loading required package: rrdflibs + +Attaching package: ‘data.table’ + +The following objects are masked from ‘package:lubridate’: + + hour, mday, month, quarter, wday, week, yday, year + +The following objects are masked from ‘package:reshape2’: + + dcast, melt + +The following object is masked from ‘package:xts’: + + last + + +Attaching package: ‘DT’ + +The following objects are masked from ‘package:shiny’: + + dataTableOutput, renderDataTable + +> source(paste0(src.path, "utils.R"), chdir=T) +> +> write_prop_usage_counts <- function() { ++ query <- get_property_list_query() ++ prefix <- get_property_label_prefixes() ++ doc <- get_sparql_result(wdmrdf_uri, prefix, query) ++ plist <- get_dataframe_from_xml_result(doc, "//sq:uri") ++ props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/", "", x)) ++ labels <- get_dataframe_from_xml_result(doc, "//sq:literal") ++ values <- lapply(props$text, function(x) get_estimated_card_from_prop_predicate(estcard.uri, x)) ++ vals <- do.call(c, unlist(values, recursive=FALSE)) ++ labels <- data.table(labels$text) ++ labels$id <- seq_len(nrow(labels)) ++ prop_counts <- data.table(vals) ++ props <- data.table(props$text) ++ props$id <- seq_len(nrow(props)) ++ prop_counts$id <- seq_len(nrow(prop_counts)) ++ setkey(props, id) ++ setkey(prop_counts, id) ++ setkey(labels, id) ++ dt_join_prop_usage <- props[prop_counts] ++ dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3,4)] ++ dt_join_prop_usage <- setnames(dt_join_prop_usage, c("Property","Label","Count")) ++ write.table(dt_join_prop_usage, paste0(sparql_data_uri, "prop_usage.tsv"), sep = "\t", row.names = FALSE) ++ } +> +> write_prop_usage_counts() + +Warning message: +closing unused connection 3 (https://wdm-rdf.wmflabs.org/bigdata/namespace/wdq/sparql?query=PREFIX%20wd%3A%20%3Chttp%3A%2F%2Fwww.wikidata.org%2Fentity%2F%3EPREFIX%20wikibase%3A%20%3Chttp%3A%2F%2Fwikiba.se%2Fontology%23%3EPREFIX%20rdfs%3A%20%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3ESELECT%20%3Fs%20%3Fo%20WHERE%20%7B%3Fs%20%3Fp%20wikibase%3AProperty%20.%0ASERVICE%20wikibase%3Alabel%20%7B%0A%20%20%20%20%20%20bd%3AserviceParam%20wikibase%3Alanguage%20%22en%22%20.%0A%20%20%20%20%20%20%3Fs%20rdfs%3Alabel%20%3Fo%7D%7D) +Execution halted -- To view, visit https://gerrit.wikimedia.org/r/252033 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I329b63da75f97129ef00bfe941156de60961a743 Gerrit-PatchSet: 1 Gerrit-Project: wikidata/analytics/dashboard Gerrit-Branch: master Gerrit-Owner: Christopher Johnson (WMDE) <christopher.john...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits