https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113066

Revision: 113066
Author:   rfaulk
Date:     2012-03-05 19:25:26 +0000 (Mon, 05 Mar 2012)
Log Message:
-----------
modularized the main visualization implementation in order to more easily 
modify parameters 

Modified Paths:
--------------
    trunk/tools/wsor/message_templates/R/template_analysis.R
    trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R

Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R    2012-03-05 
19:22:27 UTC (rev 113065)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R    2012-03-05 
19:25:26 UTC (rev 113066)
@@ -47,7 +47,7 @@
 # GLOBALS assumed to exist:  warn_test, warn_control, blocks_test, 
blocks_control, edits_test, edits_control
 #
 
-process.data.frames <- function(min_edits_before=0, 
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf) 
{
+process.data.frames <- function(min_edits_before=0, 
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, 
min_revisions_after = 0, registered=TRUE) {
        
        # MERGE THE METRICS AND ADD TEMPLATE COLS
 
@@ -69,16 +69,18 @@
        
        maximum_warns_before <- 0
        
-       IP_regex <- 
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
-       IP_regex_not <- '.*[a-zA-z].*'
+       if (!registered)
+               IP_regex <- 
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
+       else
+               IP_regex <- '.*[a-zA-z].*'
        
        condition_1 <- TRUE # merged_test$blocks_before > 0
        condition_2 <- merged_test$blocks_after == 0
        condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before & 
merged_test$ns_0_revisions_before <= max_edits_before 
        condition_4 <- merged_test$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
        condition_5 <- merged_test$warns_before <= maximum_warns_before
-       condition_6 <- filter.list.by.regex(IP_regex_not, 
merged_test$recipient_name)
-       condition_7 <- merged_test$ns_0_revisions_after_0_3 > 0
+       condition_6 <- filter.list.by.regex(IP_regex, 
merged_test$recipient_name)
+       condition_7 <- merged_test$ns_0_revisions_after_0_3 >= 
min_revisions_after
        
        indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7
        merged_test <<- merged_test[indices,]
@@ -88,8 +90,8 @@
        condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before 
& merged_control$ns_0_revisions_before <= max_edits_before
        condition_4 <- merged_control$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
        condition_5 <- merged_control$warns_before <= maximum_warns_before
-       condition_6 <- filter.list.by.regex(IP_regex_not, 
merged_control$recipient_name)
-       condition_7 <- merged_control$ns_0_revisions_after_0_3 > 0
+       condition_6 <- filter.list.by.regex(IP_regex, 
merged_control$recipient_name)
+       condition_7 <- merged_control$ns_0_revisions_after_0_3 >= 
min_revisions_after
        
        indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7 
        merged_control <<- merged_control[indices,]
@@ -149,13 +151,13 @@
 # A pseudo main method to allow the script to be executed as a batch 
 #
 
-execute.main <- function(test_samples, control_samples) {
+execute.main <- function() {
        
        # IMPORT DATA
        
-       template_indices_control <- c(78,81) # c(84, 0) # 
c(107,109,111,113,115) # c(1,4) # c(84,99,101,103,105) # 
c(60,62,64,66,68,70,72,74,76) 
-       template_indices_test <- c(79,82) # c(86, 0) # c(108,110,114,116) # 
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) 
-       fname_first_part <- paste(home_dir,"output/metrics_1109_1209_z",sep="") 
# paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") # 
"/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
+       template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) # 
c(78,81) # c(84, 0) #  c(1,4) # c(84,99,101,103,105) # 
c(60,62,64,66,68,70,72,74,76) 
+       template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82) 
# c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) 
+       fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") 
# paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_1109_1209_z",sep="") # 
paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") 
        
        # import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
        
@@ -165,7 +167,8 @@
        
        # print("")
        # print("Processing data frames.")
-       process.data.frames(1,0,Inf,Inf)
+       registered = TRUE
+       process.data.frames(3,0,Inf,Inf,registered)
        
        
        
@@ -177,7 +180,7 @@
        
        # LOGISTIC REGRESSION MODELLING:
        
-       all_data <- append.data.frames(merged_test, merged_control)
+       all_data <<- append.data.frames(merged_test, merged_control)
        # summary(glm(template ~ edits_decrease, data=all_data, 
family=binomial(link="logit")))
        # summary(glm(template ~ edits_del_decrease, data=all_data, 
family=binomial(link="logit")))
 

Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-03-05 19:22:27 UTC (rev 113065)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-03-05 19:25:26 UTC (rev 113066)
@@ -13,6 +13,8 @@
 #
 # Basic plotting for te        st vs. control
 #
+# e.g. call :: plot.control.vs.test("Huggle Short 2 Experiment (reduced) - 
Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean % 
Decrease in Edit Activity", edit_decrease_means_test, 
edit_decrease_means_control)   
+#
 
 plot.control.vs.test <- function(title, x_label, y_label, test_samples, 
control_samples) {
 
@@ -62,44 +64,68 @@
 
 # IMPORT DATA
 
-#  c(84, 0) c(107,109,111,113,115) c(78,81) c(1,4) c(84,99,101,103,105) 
-template_indices_control <- c(60,62,64,66,68,70,72,74,76)
-
-#  c(85, 0) c(108,110,114,116) c(79,82) c(2,3) c(85,86,100,102,104,106) 
-template_indices_test <- c(61,63,65,67,69,71,73,75,77)  
-
-# paste(home_dir,"output/metrics_1108_1202_z",sep="") 
paste(home_dir,"output/metrics_1122_1222_z",sep="") 
paste(home_dir,"output/metrics_1109_1209_z",sep="") 
paste(home_dir,"output/metrics_pt_z",sep="")  
"/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
-fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") 
-
-import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
-
-
-
-# PROCESS DATA
-
-edit_count_before_filter <- 1:10
-
-edit_decrease_means_test <- c()
-edit_decrease_means_control <- c()
- 
-for (i in edit_count_before_filter)
+line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 
10, import_metrics = FALSE, save_plot = TRUE, registered = TRUE, error_bars = 
FALSE)
 {
-       process.data.frames(i,0,Inf,Inf)
+       #  c(78,81) c(1,4)  c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) 
c(107,109,111,113,115) c(84,99,101,103,105)
+       template_indices_control <- c(84, 0)
        
-       edit_decrease_means_test <- c(edit_decrease_means_test, 
mean(merged_test$edits_decrease))
-       edit_decrease_means_control <- c(edit_decrease_means_control, 
mean(merged_control$edits_decrease))
+       #  c(79,82) c(2,3)  c(61,63,65,67,69,71,73,75,77) c(61,63,67,77)  
c(108,110,114,116) c(85,86,100,102,104,106)
+       template_indices_test <- c(85, 0)
+       
+       #   paste(home_dir,"output/metrics_1109_1209_z",sep="") 
paste(home_dir,"output/metrics_pt_z",sep="")  
paste(home_dir,"output/metrics_1018_1119_z",sep="") 
paste(home_dir,"output/metrics_1122_1222_z",sep="")
+       fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="")
+       
+       if (import_metrics)
+               import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
+       
+       
+       
+       # PROCESS DATA
+       
+       edit_count_before_filter <- edit_count_min_lower : edit_count_min_upper
+       
+       data_counts_test <<- c()
+       data_counts_control <<- c()
+       
+       edit_decrease_means_test <<- c()
+       edit_decrease_means_control <<- c()
+       
+       edit_decrease_sd_test <<- c()
+       edit_decrease_sd_control <<- c()        
+       
+       
+       if (registered)
+               reg_str = 'registered'
+       else
+               reg_str = 'non_registered'
+       
+       for (i in edit_count_before_filter)
+       {
+               
process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0)
+               
+               edit_decrease_means_test <<- c(edit_decrease_means_test, 
mean(merged_test$edits_decrease) * 100)
+               edit_decrease_means_control <<- c(edit_decrease_means_control, 
mean(merged_control$edits_decrease) * 100)
+               
+               edit_decrease_sd_test <<- c(edit_decrease_sd_test, 
sd(merged_test$edits_decrease * 100))
+               edit_decrease_sd_control <<- c(edit_decrease_sd_control, 
sd(merged_control$edits_decrease * 100))
+               
+               data_counts_test <<- c(data_counts_test, 
length(merged_test$edits_decrease))    
+               data_counts_control <<- c(data_counts_control, 
length(merged_control$edits_decrease))
+       }
+       
+       # PLOT DATA             
+       
+       plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") - 
Decrease in Editor Activity", sep="")
+       
+       df <- data.frame(x=1:length(edit_decrease_means_test), 
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control, 
y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control)        
+       p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + 
geom_line(aes(y=y_ctrl,colour="Control")) 
+       
+       if (error_bars)
+               p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax = 
y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin = 
y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2)
+       
+       p <- p + scale_x_continuous('Minimum Edits before Template Posting') + 
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = 
plot_title, legend.title = theme_blank())
+       
+       if (save_plot)
+               
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_short_1_2_',reg_str,'.png',sep=""),width=8)
 }
 
-# PLOT DATA
-
-# plot.control.vs.test("Huggle Short 2 Experiment - Decrease in Editor 
Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit 
Activity", edit_decrease_means_test, edit_decrease_means_control)
-
-# ggplot 
-
-plot_title = "Huggle 3 - % decrease of Edit Actitivity after Posting"
-
-df <- data.frame(x=1:length(edit_decrease_means_test), 
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control)    
-p = ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + 
geom_line(aes(y=y_ctrl,colour="Control")) 
-p + scale_x_continuous('Minimum Edits before Template Posting') + 
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = 
plot_title, legend.title = theme_blank())
-ggsave('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_3.png',width=8)
-


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to