[MediaWiki-CVS] SVN: [106108] trunk/tools/wsor/newbie_warnings/R

halfak Tue, 13 Dec 2011 13:53:29 -0800

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106108


Revision: 106108
Author:   halfak
Date:     2011-12-13 21:53:08 +0000 (Tue, 13 Dec 2011)
Log Message:
-----------
Added CHI^2 tests to hugglings

Added Paths:
-----------
    trunk/tools/wsor/newbie_warnings/R/chi2_tests.R
    trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R

Added: trunk/tools/wsor/newbie_warnings/R/chi2_tests.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/chi2_tests.R                             
(rev 0)
+++ trunk/tools/wsor/newbie_warnings/R/chi2_tests.R     2011-12-13 21:53:08 UTC 
(rev 106108)
@@ -0,0 +1,188 @@
+source("loader/load_hugglings.R")
+source("loader/load_huggling_codings.R")
+library(doBy)
+hugglings = load_hugglings()
+
+hugglingCounts = summaryBy(
+       recipient ~ recipient,
+       data = hugglings,
+       FUN=length
+)
+hugglingCounts$count = hugglingCounts$recipient.length
+hugglingCounts$recipient.length = NULL
+
+hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
+
+huggling_codings = load_huggling_codings(reload=T)
+messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
+
+messaged_codings$retailates   = messaged_codings$retaliates > 0
+messaged_codings$contact      = !is.na(messaged_codings$contacts_huggler) & 
(messaged_codings$contacts_huggler > 0 | messaged_codings$retaliates > 0)
+messaged_codings$quality_work = messaged_codings$after_rating >= 3.0
+messaged_codings$stay         = !is.na(messaged_codings$after_rating)
+messaged_codings$improves     = messaged_codings$after_rating > 
messaged_codings$before_rating
+messaged_codings$anon         = messaged_codings$is_anon > 0
+messaged_codings$talk_edits_before_msg = with(
+       messaged_codings,
+       user_talk_edits_after_msg + article_talk_edits_before_msg
+)
+messaged_codings$ntalk_edits_before_msg = with(
+       messaged_codings,
+       edits_before_msg - talk_edits_before_msg
+)
+messaged_codings$good_contact = mapply(
+       function(contact, retaliates){
+               if(!is.na(contact) & contact){
+                       retaliates <= 0 
+               }else{
+                       NA
+               }
+       },
+       messaged_codings$contact,
+       messaged_codings$retaliates
+)
+messaged_codings$good_outcome = with(
+       messaged_codings,
+       (
+               before_rating <= 2 &
+               (
+                       is.na(after_rating) |
+                       after_rating > 2
+               )
+       ) |
+       (
+               !is.na(good_contact) & 
+               good_contact
+       ) | 
+       (
+               !is.na(quality_work) &
+               quality_work
+       )
+)
+
+##
+# Groups
+#
+# - < 2          at least one of us thought "no hope"
+# - >= 2 & <= 3  possibles
+# - > 3          at least one of us thought "golden"
+#
+# For each group:
+#  - contact
+#    - contact huggler + retaliate
+#    - talk? (wait for staeiou)
+#  - continue editing
+#    - did they actually
+#    - quality
+#      - improve
+#      - was it good
+#      - degrade
+#
+#
+# Predictors:
+#  - number of edilts before message
+#    - number deleted
+#  - makes edits to talk (before/after)
+
+messaged_codings$group = as.factor(sapply(
+       messaged_codings$before_rating,
+       function(rating){
+               if(is.na(rating)){
+                       NA
+               }else if(rating < 2){
+                       "unlikely"
+               }else if(rating <= 3){
+                       "possible"
+               }else{
+                       "golden"
+               }
+       }
+))
+
+formatNum = function(num){
+       if(!is.numeric(num) | is.nan(num)){
+               " ---  "
+       }
+       else if(num >= 0){
+               paste(" ", format(round(num, 3), nsmall=3), sep="")
+       }else{
+               format(round(num, 3), nsmall=3)
+       }
+}
+
+for(group in c("unlikely", "possible", "golden")){
+       group_codings = messaged_codings[messaged_codings$group == group,]
+       
+       
+       cat("Result's for ", length(group_codings$group), " '", group, "' 
editors:\n", sep='')
+       cat("============================================================\n")
+       
+       control      = group_codings[!group_codings$personal & 
!group_codings$teaching,]
+       personal     = group_codings[group_codings$personal & 
!group_codings$teaching,]
+       teaching     = group_codings[group_codings$teaching & 
!group_codings$personal,]
+       both         = group_codings[group_codings$teaching & 
group_codings$personal,]
+       
+       experiments = list(
+               list(name="Personal           ", data=personal),
+               list(name="Teaching           ", data=teaching),
+               list(name="Personal & Teaching", data=teaching)
+       )
+       
+       outcomes = list(
+               list(name="Good outcome", field="good_outcome"),
+               list(name="Improves", field="improves"),
+               list(name="Contact", field="contact"),
+               list(name="Stays", field="stay"),
+               list(name="Good contact", field="good_contact")
+       )
+               
+       for(outcome in outcomes){
+               cat(outcome$name, ": \n", sep="")
+               
+               controlLen = length(na.omit(control[[outcome$field]]))
+               controlSuccess = sum(control[[outcome$field]], na.rm=T)
+               cat(
+                       "\tControl            ",
+                       ": prop=", formatNum(controlSuccess/controlLen),
+                       ", n=", controlLen, "\n", 
+                       sep=""
+               )
+               for(experiment in experiments){
+                       expSuccess = sum(experiment$data[[outcome$field]], 
na.rm=T)
+                       expLen  = 
length(na.omit(experiment$data[[outcome$field]]))
+                       if(controlLen > 0 & expLen > 0){
+                               t = prop.test(
+                                       c(
+                                               expSuccess, 
+                                               controlSuccess
+                                       ),
+                                       c(
+                                               expLen,
+                                               controlLen
+                                       )
+                               )
+                       }else{
+                               t = list(
+                                       p.value=NA,
+                                       conf.int=c(NA, NA)
+                               )
+                       }
+                       
+                       propDiff = mean(experiment$data[[outcome$field]], 
na.rm=T)-mean(control[[outcome$field]], na.rm=T)
+                       cat(
+                               "\t", experiment$name, 
+                               ": prop=", formatNum(expSuccess/expLen),
+                               ", diff=", formatNum(propDiff),
+                               ", p-value=", formatNum(t$p.value),
+                               ", conf.int=(", formatNum(t$conf.int[1]), ", ", 
formatNum(t$conf.int[2]), ")", 
+                               ", n=", expLen, "\n",
+                               sep=""
+                       )
+               }
+               cat("\n")
+       }
+       
+       
+       cat("\n\n\n")
+}
+

Added: trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R                         
(rev 0)
+++ trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R 2011-12-13 21:53:08 UTC 
(rev 106108)
@@ -0,0 +1,377 @@
+source("loader/load_huggling_codings_mk2.R")
+library(doBy)
+hugglings = load_huggling_codings_mk2()
+
+#hugglingCounts = summaryBy(
+#      recipient ~ recipient,
+#      data = hugglings,
+#      FUN=length
+#)
+#hugglingCounts$count = hugglingCounts$recipient.length
+#hugglingCounts$recipient.length = NULL
+#
+#hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
+
+#huggling_codings = load_huggling_codings(reload=T)
+#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
+ifNA = function(val, naThen){
+       if(is.na(val)){
+               naThen
+       }else{
+               val
+       }
+}
+
+hugglings$contact = with(
+       hugglings,
+       mapply(
+               ifNA,
+               responds_hugglers_talk | 
+               responds_own_talk | 
+               responds_elsewhere | 
+               retaliates,
+               F
+       )
+)
+hugglings$good_contact = mapply(
+       function(contact, retaliates){
+               if(!contact){
+                       NA
+               }else{
+                       !retaliates
+               }
+       },
+       hugglings$contact,
+       hugglings$retaliates
+)
+hugglings$stay         = !is.na(hugglings$after_rating)
+hugglings$improves     = hugglings$after_rating > hugglings$before_rating
+hugglings$talk_edits_before_msg = with(
+       hugglings,
+       user_talk_edits_after_msg + article_talk_edits_before_msg
+)
+# Can't do it
+#messaged_codings$ntalk_edits_before_msg = with(
+#      messaged_codings,
+#      edits_before_msg - talk_edits_before_msg
+#)
+
+hugglings$good_outcome = with(
+       hugglings,
+       ( #Suckas leave or get better
+               before_rating <= 2 &
+               (
+                       is.na(after_rating) |
+                       after_rating > 2
+               )
+       ) |
+       ( #Good contact was made 
+               !is.na(good_contact) & 
+               good_contact
+       ) | 
+       ( #Edits are good after receiving message
+               !is.na(after_rating) &
+               after_rating > 3
+       )
+)
+
+##
+# Groups
+#
+# - <= 1        "vandal": We all agreed that the editor was a blatant vandal
+# - > 1 & <= 2  "bad faith": We all agreed that the editor was bad faith
+# - > 2 & < 4   "test": A test edit, but not good faith
+# - >= 4        "good faith": Good faith to excellent
+#
+# For each group:
+#  - contact
+#    - contact huggler + retaliate
+#    - talk? (wait for staeiou)
+#  - continue editing
+#    - did they actually
+#    - quality
+#      - improve
+#      - was it good
+#      - degrade
+#
+#
+# Predictors:
+#  - number of edilts before message
+#    - number deleted
+#  - makes edits to talk (before/after)
+
+hugglings$group = as.factor(sapply(
+       hugglings$before_rating,
+       function(rating){
+               if(is.na(rating)){
+                       NA
+               }else if(rating <= 1){
+                       "vandal"
+               }else if(rating > 1 & rating <= 2){
+                       "bad faith"
+               }else if(rating > 2 & rating < 4){
+                       "test"
+               }else if(rating >= 4){
+                       "good faith"
+               }else{
+                       NA
+               }
+       }
+))
+formatNum = function(num){
+       if(num >= 0){
+               paste(" ", format(round(num, 3), nsmall=3), sep="")
+       }else{
+               format(round(num, 3), nsmall=3)
+       }
+}
+
+for(group in c("vandal", "bad faith", "test", "good faith")){
+       group_codings = hugglings[hugglings$group == group,]
+       
+       
+       cat("Result's for ", length(group_codings$group), " '", group, "' 
editors:\n", sep='')
+       cat("============================================================\n")
+       
+       control      = group_codings[group_codings$def,]
+       personal     = group_codings[group_codings$personal,]
+       nodirectives = group_codings[group_codings$nodirectives,]
+       
+       experiments = list(
+               list(name="Personal     ", data=personal),
+               list(name="No Directives", data=nodirectives)
+       )
+       
+       outcomes = list(
+               list(name="Good outcome", field="good_outcome"),
+               list(name="Improves", field="improves"),
+               list(name="Contact", field="contact"),
+               list(name="Stays", field="stay"),
+               list(name="Good contact", field="good_contact")
+       )
+       for(outcome in outcomes){
+               cat(outcome$name, ": \n", sep="")
+               
+               controlLen = length(na.omit(control[[outcome$field]]))
+               controlSuccess = sum(control[[outcome$field]], na.rm=T)
+               cat(
+                       "\tControl      ",
+                       ": prop=", formatNum(controlSuccess/controlLen),
+                       ", n=", controlLen, "\n", 
+                       sep=""
+               )
+               for(experiment in experiments){
+                       expSuccess = sum(experiment$data[[outcome$field]], 
na.rm=T)
+                       expLen  = 
length(na.omit(experiment$data[[outcome$field]]))
+                       t = prop.test(
+                               c(
+                                       expSuccess, 
+                                       controlSuccess
+                               ),
+                               c(
+                                       expLen,
+                                       controlLen
+                               )
+                       )
+                       
+                       propDiff = mean(experiment$data[[outcome$field]], 
na.rm=T)-mean(control[[outcome$field]], na.rm=T)
+                       cat(
+                               "\t", experiment$name, 
+                               ": prop=", formatNum(expSuccess/expLen),
+                               ", diff=", formatNum(propDiff),
+                               ", p-value=", formatNum(t$p.value),
+                               ", conf.int=(", formatNum(t$conf.int[1]), ", ", 
formatNum(t$conf.int[2]), ")", 
+                               ", n=", expLen, "\n",
+                               sep=""
+                       )
+               }
+               cat("\n")
+       }
+       
+       
+       cat("\n\n\n")
+}
+
+meanNoNA = function(x){
+       mean(x, na.rm=T)
+}
+lengthNoNA = function(x){
+       length(na.omit(x))
+}
+
+library(lattice)
+outcomeNames = list(
+       good_outcome = "with a \"good outcome\"",
+       improves     = "who show improvement",
+       contact      = "who contact the reverting editor",
+       good_contact = "who contact the reverting editor nicely",
+       stay         = "who make at least one edit after reading the message"
+)
+for(outcomeName in c("good_outcome", "improves", "contact", "good_contact", 
"stay")){
+       f = with(
+               summaryBy(
+                       outcome ~ group + teaching + personal,
+                       data = data.frame(
+                               outcome  = messaged_codings[[outcomeName]],
+                               teaching = messaged_codings$teaching,
+                               personal = messaged_codings$personal,
+                               group    = messaged_codings$group
+                       ),
+                       FUN=c(meanNoNA, lengthNoNA)
+               ),
+               data.frame(
+                       group    = group,
+                       message  = mapply(
+                               function(personal, teaching){
+                                       if(personal & teaching){
+                                               "personal & teaching"
+                                       }else if(personal){
+                                               "personal"
+                                       }else if(teaching){
+                                               "teaching"
+                                       }else{
+                                               "control"
+                                       }
+                               },
+                               personal,
+                               teaching
+                       ),
+                       #teaching = teaching,
+                       #personal = personal,
+                       prop     = outcome.meanNoNA,
+                       n        = outcome.lengthNoNA
+               )
+       )
+       cat(outcomeName, "\n")
+       cat(f$prop, "\n\n")
+       svg(paste("plots/outcome", outcomeName, "all_groups.svg", sep="."), 
height=4, width=8)
+       print(barchart(
+               prop ~ group | message,
+               data = f,
+               layout=c(4,1),
+               xlab="Pre-message rating",
+               lab="Proportion of editors",
+               main=paste("Proportion of editors", outcomeNames[[outcomeName]])
+       ))
+       dev.off()
+}
+
+messaged_codings$default = !messaged_codings$personal & 
!messaged_codings$teaching
+messaged_codings$teaching_only = messaged_codings$teaching & 
!messaged_codings$personal
+messaged_codings$personal_only = !messaged_codings$teaching & 
messaged_codings$personal
+messaged_codings$teaching_and_personal = messaged_codings$teaching & 
messaged_codings$personal
+
+s = scale
+
+for(condition in c("teaching_only", "personal_only", "teaching_and_personal")){
+       cat("-----------------------------------------------------------\n")
+       cat("-----------", condition, "\n")
+       cat("-----------------------------------------------------------\n")
+       exp_codings = messaged_codings[
+               messaged_codings[[condition]] | 
+               messaged_codings$default,
+       ]
+       
+       exp_codings$condition = exp_codings[[condition]]
+       
+       print(summary(glm(
+               good_outcome ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[exp_codings$image,]
+       )))
+       print(summary(glm(
+               good_outcome ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[!exp_codings$image,]
+       )))
+       
+       
+       print(summary(glm(
+               improves ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[exp_codings$image,]
+       )))
+       print(summary(glm(
+               improves ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[!exp_codings$image,]
+       )))
+       
+       
+       print(summary(glm(
+               contact ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[exp_codings$image,]
+       )))
+       print(summary(glm(
+               contact ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[!exp_codings$image,]
+       )))
+       
+       
+       print(summary(glm(
+               good_contact ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[exp_codings$image,]
+       )))
+       print(summary(glm(
+               good_contact ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[!exp_codings$image,]
+       )))
+       
+       
+       print(summary(glm(
+               stay ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[exp_codings$image,]
+       )))
+       print(summary(glm(
+               stay ~ 
+               anon + 
+               s(ntalk_edits_before_msg) + 
+               s(talk_edits_before_msg) + 
+               s(before_rating) * 
+               condition,
+               data = exp_codings[!exp_codings$image,]
+       )))
+}
+
+


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [106108] trunk/tools/wsor/newbie_warnings/R

Reply via email to