[MediaWiki-commits] [Gerrit] adds property labels to property usage table - change (wikidata...dashboard)

2015-11-09 Thread Christopher Johnson (WMDE) (Code Review)
Christopher Johnson (WMDE) has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/252024

Change subject: adds property labels to property usage table
..

adds property labels to property usage table

Change-Id: I7ad226c89cc7eacde5394ac5892ff9c361af14a0
---
M src/output/server-properties.R
M src/scripts/property_usage.R
M src/utils.R
3 files changed, 25 insertions(+), 13 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikidata/analytics/dashboard 
refs/changes/24/252024/1

diff --git a/src/output/server-properties.R b/src/output/server-properties.R
index 48cfdab..353ba28 100644
--- a/src/output/server-properties.R
+++ b/src/output/server-properties.R
@@ -6,13 +6,16 @@
   box(title = "Definition", width = 6, status = "info", metric_desc)
 })
 output$wikidata_property_usage_count_table <- DT::renderDataTable({
-  datatable(property_usage_counts,   class = "display compact", colnames = 
c("Property", "Count"),
+  datatable(property_usage_counts,   class = "display compact", colnames = 
c("Property", "Label", "Count"),
 options = list(
-  order = list(2, 'desc'),
+  order = list(3, 'desc'),
   pageLength = 100,
   columnDefs = list(
-list(className = 'dt-left', targets = c(1,2)),
+list(className = 'dt-left', targets = c(1,2,3)),
 list(visible = FALSE, targets = c(0)),
+list(width='10%', targets = c(1)),
+list(width='40%', targets = c(2)),
+list(width='40%', targets = c(3)),
 list(targets = c(1), render = JS(
   "function(data, type, row, meta) {",
   "return 'https://www.wikidata.org/wiki/Property:'+data+'\" 
target=\"_blank\">'+data+''",
diff --git a/src/scripts/property_usage.R b/src/scripts/property_usage.R
index 908aaf4..d85a081 100644
--- a/src/scripts/property_usage.R
+++ b/src/scripts/property_usage.R
@@ -7,19 +7,24 @@
 write_prop_usage_counts <- function() {
   query <- get_property_list_query()
   prefix <- get_property_label_prefixes()
-  plist <- get_sparql_result_from_uri(wdmrdf_uri, prefix, query)
+  doc <- get_sparql_result(wdmrdf_uri, prefix, query)
+  plist <- get_dataframe_from_xml_result(doc, "//sq:uri")
   props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/;, 
"", x))
+  labels <- get_dataframe_from_xml_result(doc, "//sq:literal")
   values <- lapply(props$text, function(x) 
get_estimated_card_from_prop_predicate(estcard.uri, x))
   vals <- do.call(c, unlist(values, recursive=FALSE))
+  labels <- data.table(labels$text)
+  labels$id <- seq_len(nrow(labels))
   prop_counts <- data.table(vals)
   props <- data.table(props$text)
   props$id <- seq_len(nrow(props))
   prop_counts$id <- seq_len(nrow(prop_counts))
   setkey(props, id)
   setkey(prop_counts, id)
+  setkey(labels, id)
   dt_join_prop_usage <- props[prop_counts]
-  dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)]
-  dt_join_prop_usage <- setnames(dt_join_prop_usage, c("Property", "Count"))
+  dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3,4)]
+  dt_join_prop_usage <- setnames(dt_join_prop_usage, 
c("Property","Label","Count"))
   write.table(dt_join_prop_usage, paste0(sparql_data_uri, "prop_usage.tsv"), 
sep = "\t", row.names = FALSE)
 }
 
diff --git a/src/utils.R b/src/utils.R
index b858894..d08a766 100644
--- a/src/utils.R
+++ b/src/utils.R
@@ -230,21 +230,21 @@
 }
 
 get_property_list_query <- function(){
-  query = curl_escape("SELECT ?s WHERE {?s ?p wikibase:Property}")
+  query = curl_escape("SELECT ?s ?o WHERE {?s ?p wikibase:Property .
+SERVICE wikibase:label {
+  bd:serviceParam wikibase:language \"en\" .
+  ?s rdfs:label ?o}}")
   return(query)
 }
 
 get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
   xml_result <- readLines(curl(paste0(uri, prefix, query)))
   doc = xmlParse(xml_result)
-  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
-  return(result)
+  return(doc)
 }
 
-get_sparql_result_from_uri <- function(uri = wdmrdf_uri, prefix, query) {
-  xml_result <- readLines(curl(paste0(uri, prefix, query)))
-  doc = xmlParse(xml_result)
-  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:uri", c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
+get_dataframe_from_xml_result <- function(doc, qname) {
+  result = xmlToDataFrame(nodes = getNodeSet(doc, qname, c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
   return(result)
 }
 
@@ -254,4 +254,8 @@
   result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount")
   return(result)
 
+}
+
+join_data_frames <- function(x, y) {
+
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/252024
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: 

[MediaWiki-commits] [Gerrit] adds property labels to property usage table - change (wikidata...dashboard)

2015-11-09 Thread Christopher Johnson (WMDE) (Code Review)
Christopher Johnson (WMDE) has submitted this change and it was merged.

Change subject: adds property labels to property usage table
..


adds property labels to property usage table

Change-Id: I7ad226c89cc7eacde5394ac5892ff9c361af14a0
---
M src/output/server-properties.R
M src/scripts/property_usage.R
M src/utils.R
3 files changed, 25 insertions(+), 13 deletions(-)

Approvals:
  Christopher Johnson (WMDE): Verified; Looks good to me, approved



diff --git a/src/output/server-properties.R b/src/output/server-properties.R
index 48cfdab..353ba28 100644
--- a/src/output/server-properties.R
+++ b/src/output/server-properties.R
@@ -6,13 +6,16 @@
   box(title = "Definition", width = 6, status = "info", metric_desc)
 })
 output$wikidata_property_usage_count_table <- DT::renderDataTable({
-  datatable(property_usage_counts,   class = "display compact", colnames = 
c("Property", "Count"),
+  datatable(property_usage_counts,   class = "display compact", colnames = 
c("Property", "Label", "Count"),
 options = list(
-  order = list(2, 'desc'),
+  order = list(3, 'desc'),
   pageLength = 100,
   columnDefs = list(
-list(className = 'dt-left', targets = c(1,2)),
+list(className = 'dt-left', targets = c(1,2,3)),
 list(visible = FALSE, targets = c(0)),
+list(width='10%', targets = c(1)),
+list(width='40%', targets = c(2)),
+list(width='40%', targets = c(3)),
 list(targets = c(1), render = JS(
   "function(data, type, row, meta) {",
   "return 'https://www.wikidata.org/wiki/Property:'+data+'\" 
target=\"_blank\">'+data+''",
diff --git a/src/scripts/property_usage.R b/src/scripts/property_usage.R
index 908aaf4..d85a081 100644
--- a/src/scripts/property_usage.R
+++ b/src/scripts/property_usage.R
@@ -7,19 +7,24 @@
 write_prop_usage_counts <- function() {
   query <- get_property_list_query()
   prefix <- get_property_label_prefixes()
-  plist <- get_sparql_result_from_uri(wdmrdf_uri, prefix, query)
+  doc <- get_sparql_result(wdmrdf_uri, prefix, query)
+  plist <- get_dataframe_from_xml_result(doc, "//sq:uri")
   props <- lapply(plist, function(x) gsub("http://www.wikidata.org/entity/;, 
"", x))
+  labels <- get_dataframe_from_xml_result(doc, "//sq:literal")
   values <- lapply(props$text, function(x) 
get_estimated_card_from_prop_predicate(estcard.uri, x))
   vals <- do.call(c, unlist(values, recursive=FALSE))
+  labels <- data.table(labels$text)
+  labels$id <- seq_len(nrow(labels))
   prop_counts <- data.table(vals)
   props <- data.table(props$text)
   props$id <- seq_len(nrow(props))
   prop_counts$id <- seq_len(nrow(prop_counts))
   setkey(props, id)
   setkey(prop_counts, id)
+  setkey(labels, id)
   dt_join_prop_usage <- props[prop_counts]
-  dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3)]
-  dt_join_prop_usage <- setnames(dt_join_prop_usage, c("Property", "Count"))
+  dt_join_prop_usage <- dt_join_prop_usage[,.SD,.SDcols=c(1,3,4)]
+  dt_join_prop_usage <- setnames(dt_join_prop_usage, 
c("Property","Label","Count"))
   write.table(dt_join_prop_usage, paste0(sparql_data_uri, "prop_usage.tsv"), 
sep = "\t", row.names = FALSE)
 }
 
diff --git a/src/utils.R b/src/utils.R
index b858894..d08a766 100644
--- a/src/utils.R
+++ b/src/utils.R
@@ -230,21 +230,21 @@
 }
 
 get_property_list_query <- function(){
-  query = curl_escape("SELECT ?s WHERE {?s ?p wikibase:Property}")
+  query = curl_escape("SELECT ?s ?o WHERE {?s ?p wikibase:Property .
+SERVICE wikibase:label {
+  bd:serviceParam wikibase:language \"en\" .
+  ?s rdfs:label ?o}}")
   return(query)
 }
 
 get_sparql_result <- function(uri = wdqs_uri, prefix, query) {
   xml_result <- readLines(curl(paste0(uri, prefix, query)))
   doc = xmlParse(xml_result)
-  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:literal", c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
-  return(result)
+  return(doc)
 }
 
-get_sparql_result_from_uri <- function(uri = wdmrdf_uri, prefix, query) {
-  xml_result <- readLines(curl(paste0(uri, prefix, query)))
-  doc = xmlParse(xml_result)
-  result = xmlToDataFrame(nodes = getNodeSet(doc, "//sq:uri", c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
+get_dataframe_from_xml_result <- function(doc, qname) {
+  result = xmlToDataFrame(nodes = getNodeSet(doc, qname, c(sq = 
"http://www.w3.org/2005/sparql-results#;)))
   return(result)
 }
 
@@ -254,4 +254,8 @@
   result = xpathApply(doc, "//data[@rangeCount]", xmlGetAttr, "rangeCount")
   return(result)
 
+}
+
+join_data_frames <- function(x, y) {
+
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/252024
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7ad226c89cc7eacde5394ac5892ff9c361af14a0
Gerrit-PatchSet: