https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106108
Revision: 106108 Author: halfak Date: 2011-12-13 21:53:08 +0000 (Tue, 13 Dec 2011) Log Message: ----------- Added CHI^2 tests to hugglings Added Paths: ----------- trunk/tools/wsor/newbie_warnings/R/chi2_tests.R trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R Added: trunk/tools/wsor/newbie_warnings/R/chi2_tests.R =================================================================== --- trunk/tools/wsor/newbie_warnings/R/chi2_tests.R (rev 0) +++ trunk/tools/wsor/newbie_warnings/R/chi2_tests.R 2011-12-13 21:53:08 UTC (rev 106108) @@ -0,0 +1,188 @@ +source("loader/load_hugglings.R") +source("loader/load_huggling_codings.R") +library(doBy) +hugglings = load_hugglings() + +hugglingCounts = summaryBy( + recipient ~ recipient, + data = hugglings, + FUN=length +) +hugglingCounts$count = hugglingCounts$recipient.length +hugglingCounts$recipient.length = NULL + +hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) + +huggling_codings = load_huggling_codings(reload=T) +messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] + +messaged_codings$retailates = messaged_codings$retaliates > 0 +messaged_codings$contact = !is.na(messaged_codings$contacts_huggler) & (messaged_codings$contacts_huggler > 0 | messaged_codings$retaliates > 0) +messaged_codings$quality_work = messaged_codings$after_rating >= 3.0 +messaged_codings$stay = !is.na(messaged_codings$after_rating) +messaged_codings$improves = messaged_codings$after_rating > messaged_codings$before_rating +messaged_codings$anon = messaged_codings$is_anon > 0 +messaged_codings$talk_edits_before_msg = with( + messaged_codings, + user_talk_edits_after_msg + article_talk_edits_before_msg +) +messaged_codings$ntalk_edits_before_msg = with( + messaged_codings, + edits_before_msg - talk_edits_before_msg +) +messaged_codings$good_contact = mapply( + function(contact, retaliates){ + if(!is.na(contact) & contact){ + retaliates <= 0 + }else{ + NA + } + }, + messaged_codings$contact, + messaged_codings$retaliates +) +messaged_codings$good_outcome = with( + messaged_codings, + ( + before_rating <= 2 & + ( + is.na(after_rating) | + after_rating > 2 + ) + ) | + ( + !is.na(good_contact) & + good_contact + ) | + ( + !is.na(quality_work) & + quality_work + ) +) + +## +# Groups +# +# - < 2 at least one of us thought "no hope" +# - >= 2 & <= 3 possibles +# - > 3 at least one of us thought "golden" +# +# For each group: +# - contact +# - contact huggler + retaliate +# - talk? (wait for staeiou) +# - continue editing +# - did they actually +# - quality +# - improve +# - was it good +# - degrade +# +# +# Predictors: +# - number of edilts before message +# - number deleted +# - makes edits to talk (before/after) + +messaged_codings$group = as.factor(sapply( + messaged_codings$before_rating, + function(rating){ + if(is.na(rating)){ + NA + }else if(rating < 2){ + "unlikely" + }else if(rating <= 3){ + "possible" + }else{ + "golden" + } + } +)) + +formatNum = function(num){ + if(!is.numeric(num) | is.nan(num)){ + " --- " + } + else if(num >= 0){ + paste(" ", format(round(num, 3), nsmall=3), sep="") + }else{ + format(round(num, 3), nsmall=3) + } +} + +for(group in c("unlikely", "possible", "golden")){ + group_codings = messaged_codings[messaged_codings$group == group,] + + + cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='') + cat("============================================================\n") + + control = group_codings[!group_codings$personal & !group_codings$teaching,] + personal = group_codings[group_codings$personal & !group_codings$teaching,] + teaching = group_codings[group_codings$teaching & !group_codings$personal,] + both = group_codings[group_codings$teaching & group_codings$personal,] + + experiments = list( + list(name="Personal ", data=personal), + list(name="Teaching ", data=teaching), + list(name="Personal & Teaching", data=teaching) + ) + + outcomes = list( + list(name="Good outcome", field="good_outcome"), + list(name="Improves", field="improves"), + list(name="Contact", field="contact"), + list(name="Stays", field="stay"), + list(name="Good contact", field="good_contact") + ) + + for(outcome in outcomes){ + cat(outcome$name, ": \n", sep="") + + controlLen = length(na.omit(control[[outcome$field]])) + controlSuccess = sum(control[[outcome$field]], na.rm=T) + cat( + "\tControl ", + ": prop=", formatNum(controlSuccess/controlLen), + ", n=", controlLen, "\n", + sep="" + ) + for(experiment in experiments){ + expSuccess = sum(experiment$data[[outcome$field]], na.rm=T) + expLen = length(na.omit(experiment$data[[outcome$field]])) + if(controlLen > 0 & expLen > 0){ + t = prop.test( + c( + expSuccess, + controlSuccess + ), + c( + expLen, + controlLen + ) + ) + }else{ + t = list( + p.value=NA, + conf.int=c(NA, NA) + ) + } + + propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T) + cat( + "\t", experiment$name, + ": prop=", formatNum(expSuccess/expLen), + ", diff=", formatNum(propDiff), + ", p-value=", formatNum(t$p.value), + ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")", + ", n=", expLen, "\n", + sep="" + ) + } + cat("\n") + } + + + cat("\n\n\n") +} + Added: trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R =================================================================== --- trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R (rev 0) +++ trunk/tools/wsor/newbie_warnings/R/chi2_tests_mk2.R 2011-12-13 21:53:08 UTC (rev 106108) @@ -0,0 +1,377 @@ +source("loader/load_huggling_codings_mk2.R") +library(doBy) +hugglings = load_huggling_codings_mk2() + +#hugglingCounts = summaryBy( +# recipient ~ recipient, +# data = hugglings, +# FUN=length +#) +#hugglingCounts$count = hugglingCounts$recipient.length +#hugglingCounts$recipient.length = NULL +# +#hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) + +#huggling_codings = load_huggling_codings(reload=T) +#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] +ifNA = function(val, naThen){ + if(is.na(val)){ + naThen + }else{ + val + } +} + +hugglings$contact = with( + hugglings, + mapply( + ifNA, + responds_hugglers_talk | + responds_own_talk | + responds_elsewhere | + retaliates, + F + ) +) +hugglings$good_contact = mapply( + function(contact, retaliates){ + if(!contact){ + NA + }else{ + !retaliates + } + }, + hugglings$contact, + hugglings$retaliates +) +hugglings$stay = !is.na(hugglings$after_rating) +hugglings$improves = hugglings$after_rating > hugglings$before_rating +hugglings$talk_edits_before_msg = with( + hugglings, + user_talk_edits_after_msg + article_talk_edits_before_msg +) +# Can't do it +#messaged_codings$ntalk_edits_before_msg = with( +# messaged_codings, +# edits_before_msg - talk_edits_before_msg +#) + +hugglings$good_outcome = with( + hugglings, + ( #Suckas leave or get better + before_rating <= 2 & + ( + is.na(after_rating) | + after_rating > 2 + ) + ) | + ( #Good contact was made + !is.na(good_contact) & + good_contact + ) | + ( #Edits are good after receiving message + !is.na(after_rating) & + after_rating > 3 + ) +) + +## +# Groups +# +# - <= 1 "vandal": We all agreed that the editor was a blatant vandal +# - > 1 & <= 2 "bad faith": We all agreed that the editor was bad faith +# - > 2 & < 4 "test": A test edit, but not good faith +# - >= 4 "good faith": Good faith to excellent +# +# For each group: +# - contact +# - contact huggler + retaliate +# - talk? (wait for staeiou) +# - continue editing +# - did they actually +# - quality +# - improve +# - was it good +# - degrade +# +# +# Predictors: +# - number of edilts before message +# - number deleted +# - makes edits to talk (before/after) + +hugglings$group = as.factor(sapply( + hugglings$before_rating, + function(rating){ + if(is.na(rating)){ + NA + }else if(rating <= 1){ + "vandal" + }else if(rating > 1 & rating <= 2){ + "bad faith" + }else if(rating > 2 & rating < 4){ + "test" + }else if(rating >= 4){ + "good faith" + }else{ + NA + } + } +)) +formatNum = function(num){ + if(num >= 0){ + paste(" ", format(round(num, 3), nsmall=3), sep="") + }else{ + format(round(num, 3), nsmall=3) + } +} + +for(group in c("vandal", "bad faith", "test", "good faith")){ + group_codings = hugglings[hugglings$group == group,] + + + cat("Result's for ", length(group_codings$group), " '", group, "' editors:\n", sep='') + cat("============================================================\n") + + control = group_codings[group_codings$def,] + personal = group_codings[group_codings$personal,] + nodirectives = group_codings[group_codings$nodirectives,] + + experiments = list( + list(name="Personal ", data=personal), + list(name="No Directives", data=nodirectives) + ) + + outcomes = list( + list(name="Good outcome", field="good_outcome"), + list(name="Improves", field="improves"), + list(name="Contact", field="contact"), + list(name="Stays", field="stay"), + list(name="Good contact", field="good_contact") + ) + for(outcome in outcomes){ + cat(outcome$name, ": \n", sep="") + + controlLen = length(na.omit(control[[outcome$field]])) + controlSuccess = sum(control[[outcome$field]], na.rm=T) + cat( + "\tControl ", + ": prop=", formatNum(controlSuccess/controlLen), + ", n=", controlLen, "\n", + sep="" + ) + for(experiment in experiments){ + expSuccess = sum(experiment$data[[outcome$field]], na.rm=T) + expLen = length(na.omit(experiment$data[[outcome$field]])) + t = prop.test( + c( + expSuccess, + controlSuccess + ), + c( + expLen, + controlLen + ) + ) + + propDiff = mean(experiment$data[[outcome$field]], na.rm=T)-mean(control[[outcome$field]], na.rm=T) + cat( + "\t", experiment$name, + ": prop=", formatNum(expSuccess/expLen), + ", diff=", formatNum(propDiff), + ", p-value=", formatNum(t$p.value), + ", conf.int=(", formatNum(t$conf.int[1]), ", ", formatNum(t$conf.int[2]), ")", + ", n=", expLen, "\n", + sep="" + ) + } + cat("\n") + } + + + cat("\n\n\n") +} + +meanNoNA = function(x){ + mean(x, na.rm=T) +} +lengthNoNA = function(x){ + length(na.omit(x)) +} + +library(lattice) +outcomeNames = list( + good_outcome = "with a \"good outcome\"", + improves = "who show improvement", + contact = "who contact the reverting editor", + good_contact = "who contact the reverting editor nicely", + stay = "who make at least one edit after reading the message" +) +for(outcomeName in c("good_outcome", "improves", "contact", "good_contact", "stay")){ + f = with( + summaryBy( + outcome ~ group + teaching + personal, + data = data.frame( + outcome = messaged_codings[[outcomeName]], + teaching = messaged_codings$teaching, + personal = messaged_codings$personal, + group = messaged_codings$group + ), + FUN=c(meanNoNA, lengthNoNA) + ), + data.frame( + group = group, + message = mapply( + function(personal, teaching){ + if(personal & teaching){ + "personal & teaching" + }else if(personal){ + "personal" + }else if(teaching){ + "teaching" + }else{ + "control" + } + }, + personal, + teaching + ), + #teaching = teaching, + #personal = personal, + prop = outcome.meanNoNA, + n = outcome.lengthNoNA + ) + ) + cat(outcomeName, "\n") + cat(f$prop, "\n\n") + svg(paste("plots/outcome", outcomeName, "all_groups.svg", sep="."), height=4, width=8) + print(barchart( + prop ~ group | message, + data = f, + layout=c(4,1), + xlab="Pre-message rating", + lab="Proportion of editors", + main=paste("Proportion of editors", outcomeNames[[outcomeName]]) + )) + dev.off() +} + +messaged_codings$default = !messaged_codings$personal & !messaged_codings$teaching +messaged_codings$teaching_only = messaged_codings$teaching & !messaged_codings$personal +messaged_codings$personal_only = !messaged_codings$teaching & messaged_codings$personal +messaged_codings$teaching_and_personal = messaged_codings$teaching & messaged_codings$personal + +s = scale + +for(condition in c("teaching_only", "personal_only", "teaching_and_personal")){ + cat("-----------------------------------------------------------\n") + cat("-----------", condition, "\n") + cat("-----------------------------------------------------------\n") + exp_codings = messaged_codings[ + messaged_codings[[condition]] | + messaged_codings$default, + ] + + exp_codings$condition = exp_codings[[condition]] + + print(summary(glm( + good_outcome ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[exp_codings$image,] + ))) + print(summary(glm( + good_outcome ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[!exp_codings$image,] + ))) + + + print(summary(glm( + improves ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[exp_codings$image,] + ))) + print(summary(glm( + improves ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[!exp_codings$image,] + ))) + + + print(summary(glm( + contact ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[exp_codings$image,] + ))) + print(summary(glm( + contact ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[!exp_codings$image,] + ))) + + + print(summary(glm( + good_contact ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[exp_codings$image,] + ))) + print(summary(glm( + good_contact ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[!exp_codings$image,] + ))) + + + print(summary(glm( + stay ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[exp_codings$image,] + ))) + print(summary(glm( + stay ~ + anon + + s(ntalk_edits_before_msg) + + s(talk_edits_before_msg) + + s(before_rating) * + condition, + data = exp_codings[!exp_codings$image,] + ))) +} + + _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs