Bearloga has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/391063 )

Change subject: db1047 => db1108
......................................................................

db1047 => db1108

Bug: T156844
Change-Id: I81f0f93a97f7467e1fcf30e20c252fc044bbbd31
---
M DESCRIPTION
M NEWS.md
M R/mysql.R
M man/mysql.Rd
A vignettes/interleaved.R
A vignettes/interleaved.html
6 files changed, 475 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/wmf 
refs/changes/63/391063/1

diff --git a/DESCRIPTION b/DESCRIPTION
index d0d3314..57e4d2f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: wmf
 Type: Package
 Title: R Code for Wikimedia Foundation Internal Usage
-Version: 0.3.0
-Date: 2017-11-01
+Version: 0.3.1
+Date: 2017-11-13
 Authors@R: c(
     person("Mikhail", "Popov", email = "mikh...@wikimedia.org", role = 
c("aut", "cre")),
     person("Oliver", "Keyes", role = "aut", comment = "No longer employed at 
the Foundation"),
diff --git a/NEWS.md b/NEWS.md
index c3927ce..dfd22b8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+wmf 0.3.1
+=========
+* Switched host name from db1047.eqiad.wmnet to db1108.eqiad.wmnet per 
[T156844](https://phabricator.wikimedia.org/T156844)
+
 wmf 0.3.0
 =========
 * C++-based `exact_binomial()` to quickly estimate sample size for exact 
binomial tests
diff --git a/R/mysql.R b/R/mysql.R
index 4594a36..725b8c4 100644
--- a/R/mysql.R
+++ b/R/mysql.R
@@ -33,7 +33,7 @@
 #' @export
 mysql_connect <- function(
   database, default_file = NULL,
-  hostname = ifelse(database == "log", "db1047.eqiad.wmnet", 
"analytics-store.eqiad.wmnet")
+  hostname = ifelse(database == "log", "db1108.eqiad.wmnet", 
"analytics-store.eqiad.wmnet")
 ) {
   # Begin Exclude Linting
   if (is.null(default_file)) {
diff --git a/man/mysql.Rd b/man/mysql.Rd
index 09c9606..2032aa8 100644
--- a/man/mysql.Rd
+++ b/man/mysql.Rd
@@ -11,7 +11,7 @@
 \title{Work with MySQL databases}
 \usage{
 mysql_connect(database, default_file = NULL, hostname = ifelse(database ==
-  "log", "db1047.eqiad.wmnet", "analytics-store.eqiad.wmnet"))
+  "log", "db1108.eqiad.wmnet", "analytics-store.eqiad.wmnet"))
 
 mysql_read(query, database, con = NULL)
 
diff --git a/vignettes/interleaved.R b/vignettes/interleaved.R
new file mode 100644
index 0000000..d903b20
--- /dev/null
+++ b/vignettes/interleaved.R
@@ -0,0 +1,38 @@
+## ---- echo=FALSE---------------------------------------------------------
+set.seed(0)
+
+## ------------------------------------------------------------------------
+data(interleaved_data, package = "wmf") # no preference
+data(interleaved_data_a, package = "wmf") # preference for A
+data(interleaved_data_b, package = "wmf") # preference for B
+
+## ---- results='asis'-----------------------------------------------------
+knitr::kable(head(interleaved_data_b))
+
+## ------------------------------------------------------------------------
+library(wmf)
+
+## ----no_pref-------------------------------------------------------------
+x <- interleaved_data[interleaved_data$event == "click", ]
+x <- x[order(x$session_id, x$timestamp), ]
+boot_x <- interleaved_bootstraps(x$session_id, x$ranking_function)
+hist(boot_x, col = "gray70", border = NA, main = "No preference", xlab = 
"Bootstrapped preferences")
+abline(v = quantile(boot_x, c(0.025, 0.975)), lty = "dashed")
+abline(v = interleaved_preference(x$session_id, x$ranking_function), lwd = 2)
+
+## ----a_pref--------------------------------------------------------------
+y <- interleaved_data_a[interleaved_data_a$event == "click", ]
+y <- y[order(y$session_id, y$timestamp), ]
+boot_y <- interleaved_bootstraps(y$session_id, y$ranking_function)
+hist(boot_y, col = "gray70", border = NA, main = "Preference for A", xlab = 
"Bootstrapped preferences")
+abline(v = quantile(boot_y, c(0.025, 0.975)), lty = "dashed")
+abline(v = interleaved_preference(y$session_id, y$ranking_function), lwd = 2)
+
+## ----b_pref--------------------------------------------------------------
+z <- interleaved_data_b[interleaved_data_b$event == "click", ]
+z <- z[order(z$session_id, z$timestamp), ]
+boot_z <- interleaved_bootstraps(z$session_id, z$ranking_function)
+hist(boot_z, col = "gray70", border = NA, main = "Preference for B", xlab = 
"Bootstrapped preferences")
+abline(v = quantile(boot_z, c(0.025, 0.975)), lty = "dashed")
+abline(v = interleaved_preference(z$session_id, z$ranking_function), lwd = 2)
+
diff --git a/vignettes/interleaved.html b/vignettes/interleaved.html
new file mode 100644
index 0000000..2e4d725
--- /dev/null
+++ b/vignettes/interleaved.html
@@ -0,0 +1,429 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml";>
+
+<head>
+
+<meta charset="utf-8" />
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="Mikhail Popov" />
+
+<meta name="date" content="2017-11-13" />
+
+<title>Estimating Preference For Ranking Functions With Clicks On Interleaved 
Search Results</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceLine, a.sourceLine { display: inline-block; min-height: 1.25em; }
+a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; 
}
+.sourceCode { overflow: visible; }
+code.sourceCode { white-space: pre; }
+@media print {
+code.sourceCode { white-space: pre-wrap; }
+div.sourceLine, a.sourceLine { text-indent: -1em; padding-left: 1em; }
+}
+pre.numberSource div.sourceLine, .numberSource a.sourceLine
+  { position: relative; }
+pre.numberSource div.sourceLine::before, .numberSource a.sourceLine::before
+  { content: attr(data-line-number);
+    position: absolute; left: -5em; text-align: right; vertical-align: 
baseline;
+    border: none; pointer-events: all;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em; }
+pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; color: 
#aaaaaa;  padding-left: 4px; }
+@media screen {
+a.sourceLine::before { text-decoration: underline; color: initial; }
+}
+code span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code span.dt { color: #902000; } /* DataType */
+code span.dv { color: #40a070; } /* DecVal */
+code span.bn { color: #40a070; } /* BaseN */
+code span.fl { color: #40a070; } /* Float */
+code span.ch { color: #4070a0; } /* Char */
+code span.st { color: #4070a0; } /* String */
+code span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code span.ot { color: #007020; } /* Other */
+code span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code span.fu { color: #06287e; } /* Function */
+code span.er { color: #ff0000; font-weight: bold; } /* Error */
+code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* 
Warning */
+code span.cn { color: #880000; } /* Constant */
+code span.sc { color: #4070a0; } /* SpecialChar */
+code span.vs { color: #4070a0; } /* VerbatimString */
+code span.ss { color: #bb6688; } /* SpecialString */
+code span.im { } /* Import */
+code span.va { color: #19177c; } /* Variable */
+code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code span.op { color: #666666; } /* Operator */
+code span.bu { } /* BuiltIn */
+code span.ex { } /* Extension */
+code span.pp { color: #bc7a00; } /* Preprocessor */
+code span.at { color: #7d9029; } /* Attribute */
+code span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* 
Annotation */
+code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* 
CommentVar */
+code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* 
Information */
+</style>
+
+
+
+<style type="text/css">body {
+background-color: #fff;
+margin: 1em auto;
+max-width: 700px;
+overflow: visible;
+padding-left: 2em;
+padding-right: 2em;
+font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
+font-size: 14px;
+line-height: 1.35;
+}
+#header {
+text-align: center;
+}
+#TOC {
+clear: both;
+margin: 0 0 10px 10px;
+padding: 4px;
+width: 400px;
+border: 1px solid #CCCCCC;
+border-radius: 5px;
+background-color: #f6f6f6;
+font-size: 13px;
+line-height: 1.3;
+}
+#TOC .toctitle {
+font-weight: bold;
+font-size: 15px;
+margin-left: 5px;
+}
+#TOC ul {
+padding-left: 40px;
+margin-left: -1.5em;
+margin-top: 5px;
+margin-bottom: 5px;
+}
+#TOC ul ul {
+margin-left: -2em;
+}
+#TOC li {
+line-height: 16px;
+}
+table {
+margin: 1em auto;
+border-width: 1px;
+border-color: #DDDDDD;
+border-style: outset;
+border-collapse: collapse;
+}
+table th {
+border-width: 2px;
+padding: 5px;
+border-style: inset;
+}
+table td {
+border-width: 1px;
+border-style: inset;
+line-height: 18px;
+padding: 5px 5px;
+}
+table, table th, table td {
+border-left-style: none;
+border-right-style: none;
+}
+table thead, table tr.even {
+background-color: #f7f7f7;
+}
+p {
+margin: 0.5em 0;
+}
+blockquote {
+background-color: #f6f6f6;
+padding: 0.25em 0.75em;
+}
+hr {
+border-style: solid;
+border: none;
+border-top: 1px solid #777;
+margin: 28px 0;
+}
+dl {
+margin-left: 0;
+}
+dl dd {
+margin-bottom: 13px;
+margin-left: 13px;
+}
+dl dt {
+font-weight: bold;
+}
+ul {
+margin-top: 0;
+}
+ul li {
+list-style: circle outside;
+}
+ul ul {
+margin-bottom: 0;
+}
+pre, code {
+background-color: #f7f7f7;
+border-radius: 3px;
+color: #333;
+white-space: pre-wrap; 
+}
+pre {
+border-radius: 3px;
+margin: 5px 0px 10px 0px;
+padding: 10px;
+}
+pre:not([class]) {
+background-color: #f7f7f7;
+}
+code {
+font-family: Consolas, Monaco, 'Courier New', monospace;
+font-size: 85%;
+}
+p > code, li > code {
+padding: 2px 0px;
+}
+div.figure {
+text-align: center;
+}
+img {
+background-color: #FFFFFF;
+padding: 2px;
+border: 1px solid #DDDDDD;
+border-radius: 3px;
+border: 1px solid #CCCCCC;
+margin: 0 5px;
+}
+h1 {
+margin-top: 0;
+font-size: 35px;
+line-height: 40px;
+}
+h2 {
+border-bottom: 4px solid #f7f7f7;
+padding-top: 10px;
+padding-bottom: 2px;
+font-size: 145%;
+}
+h3 {
+border-bottom: 2px solid #f7f7f7;
+padding-top: 10px;
+font-size: 120%;
+}
+h4 {
+border-bottom: 1px solid #f7f7f7;
+margin-left: 8px;
+font-size: 105%;
+}
+h5, h6 {
+border-bottom: 1px solid #ccc;
+font-size: 105%;
+}
+a {
+color: #0033dd;
+text-decoration: none;
+}
+a:hover {
+color: #6666ff; }
+a:visited {
+color: #800080; }
+a:visited:hover {
+color: #BB00BB; }
+a[href^="http:"] {
+text-decoration: underline; }
+a[href^="https:"] {
+text-decoration: underline; }
+
+code > span.kw { color: #555; font-weight: bold; } 
+code > span.dt { color: #902000; } 
+code > span.dv { color: #40a070; } 
+code > span.bn { color: #d14; } 
+code > span.fl { color: #d14; } 
+code > span.ch { color: #d14; } 
+code > span.st { color: #d14; } 
+code > span.co { color: #888888; font-style: italic; } 
+code > span.ot { color: #007020; } 
+code > span.al { color: #ff0000; font-weight: bold; } 
+code > span.fu { color: #900; font-weight: bold; }  code > span.er { color: 
#a61717; background-color: #e3d2d2; } 
+</style>
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Estimating Preference For Ranking Functions With 
Clicks On Interleaved Search Results</h1>
+<h4 class="author"><em>Mikhail Popov</em></h4>
+<h4 class="date"><em>2017-11-13</em></h4>
+
+
+
+<section id="introduction" class="level2">
+<h2>Introduction</h2>
+<p>The way Search and Discovery’s Analysts have been assessing changes to 
search has so traditionally relied on A/B testing wherein the control group 
receives results using the latest configuration and the test group (or groups) 
receives results using the experimental configuration. Another way to evaluate 
the user-perceived relevance of search results from the experimental 
configuration relies on a technique called <em>interleaving</em>. In it, each 
user is their own baseline – we perform two searches behind the scenes and then 
interleave them together into a single set of results using the team draft 
algorithm described by Chapelle et al. (2012):</p>
+<ol type="1">
+<li><strong>Input</strong>: result sets <span class="math inline">\(A\)</span> 
and <span class="math inline">\(B\)</span>.</li>
+<li><strong>Initialize</strong>: an empty interleaved result sets <span 
class="math inline">\(I\)</span> and drafts <span class="math inline">\(T_A, 
T_B\)</span> for keeping track of which results belong to which team.</li>
+<li>For each round of picking:
+<ol type="a">
+<li>Randomly decide whether we first pick from <span class="math 
inline">\(A\)</span> or from <span class="math inline">\(B\)</span>.</li>
+<li>Without loss of generality, if <span class="math inline">\(A\)</span> is 
randomly chosen to go first, grab top result <span class="math inline">\(a \in 
A\)</span>, append it to <span class="math inline">\(I\)</span> and <span 
class="math inline">\(T_A\)</span>: <span class="math inline">\(I \gets a, T_A 
\gets a\)</span>.</li>
+<li>Take the top result <span class="math inline">\(b \in B\)</span> such that 
<span class="math inline">\(b \neq a\)</span> and append it to <span 
class="math inline">\(I\)</span> after <span class="math inline">\(a\)</span> 
and to <span class="math inline">\(T_B\)</span>: <span class="math inline">\(I 
\gets b, T_B \gets b\)</span>.</li>
+<li>Update <span class="math inline">\(A = A \setminus \{a, b\}\)</span> and 
<span class="math inline">\(B \setminus \{a, b\}\)</span> so the two results 
that were just appended to <span class="math inline">\(I\)</span> are not 
considered again.</li>
+<li>Stop when <span class="math inline">\(|I| = \text{maximum per 
page}\)</span>, so only the first page contains interleaved results.</li>
+</ol></li>
+<li><strong>Output</strong>: interleaved results <span class="math 
inline">\(I\)</span> and team drafts <span class="math inline">\(T_A, 
T_B\)</span>.</li>
+</ol>
+<p>By keeping track of which results belong to which ranking function when the 
user clicks on them, we can estimate a preference for one ranker over the 
other. The preference statistic <span class="math 
inline">\(\Delta_{AB}\)</span> is described by Chapelle et al. as</p>
+<p><span class="math display">\[
+\Delta_{AB} = \frac{\text{wins}_A + \frac{1}{2} \text{ties}}{\text{wins}_A + 
\text{wins}_B + \text{ties}} - 0.5,
+\]</span></p>
+<p>where wins are calculated by counting clicks on the results from teams “A” 
and “B”. A positive value of <span class="math inline">\(\Delta_{AB}\)</span> 
indicates that <span class="math inline">\(A \succ B\)</span>, a negative value 
indicates that <span class="math inline">\(B \succ A\)</span>. We performed two 
types of calculations: per-session and per-search. In 
<strong>per-session</strong>, “A” has won if there are more clicks on team “A” 
results than team “B” results and <span class="math 
inline">\(\text{wins}_A\)</span> is incremented by one for each such session. 
In <strong>per-search</strong>, “A” has won if there are more clicks on team 
“A” results in each search, thus any one session can contribute multiple points 
to the overall <span class="math inline">\(\text{wins}_A\)</span>.</p>
+<p>In order to obtain confidence intervals for the preference statistic, we 
utilize <a 
href="https://en.wikipedia.org/wiki/Bootstrapping_(statistics)">bootstrapping</a>
 with <span class="math inline">\(m\)</span> iterations.</p>
+<ol type="1">
+<li>For bootstrap iteration <span class="math inline">\(i = 1, \ldots, 
m\)</span>:
+<ol type="a">
+<li>Sample unique IDs with replacement.</li>
+<li>Calculate <span class="math inline">\(\Delta_{AB}^{(i)}\)</span> from new 
data.</li>
+</ol></li>
+<li>The confidence intervals (CIs) are calculated by finding percentiles of 
the distribution of bootstrapped preferences <span class="math 
inline">\(\{\Delta_{AB}^{(1)}, \ldots, \Delta_{AB}^{(m)}\}\)</span> – e.g. the 
2.5th and 97.5th percentiles for a 95% CI.</li>
+</ol>
+</section>
+<section id="simulated-data" class="level2">
+<h2>Simulated Data</h2>
+<p>This package provides simulated search and click data. The three built-in 
datasets have simulated users that (1) exhibit no preference, (2) exhibit 
preference for the ranking function “A”, and (3) exhibit preference for the 
ranking function “B”.</p>
+<pre class="sourceCode r" id="cb1"><code class="sourceCode r"><div 
class="sourceLine" id="cb1-1" data-line-number="1"><span 
class="kw">data</span>(interleaved_data, <span class="dt">package =</span> 
<span class="st">&quot;wmf&quot;</span>) <span class="co"># no 
preference</span></div>
+<div class="sourceLine" id="cb1-2" data-line-number="2"><span 
class="kw">data</span>(interleaved_data_a, <span class="dt">package =</span> 
<span class="st">&quot;wmf&quot;</span>) <span class="co"># preference for 
A</span></div>
+<div class="sourceLine" id="cb1-3" data-line-number="3"><span 
class="kw">data</span>(interleaved_data_b, <span class="dt">package =</span> 
<span class="st">&quot;wmf&quot;</span>) <span class="co"># preference for 
B</span></div></code></pre>
+<p>Here are the first few rows of the third dataset:</p>
+<pre class="sourceCode r" id="cb2"><code class="sourceCode r"><div 
class="sourceLine" id="cb2-1" data-line-number="1">knitr<span 
class="op">::</span><span class="kw">kable</span>(<span 
class="kw">head</span>(interleaved_data_b))</div></code></pre>
+<table>
+<thead>
+<tr class="header">
+<th style="text-align: left;">session_id</th>
+<th style="text-align: left;">timestamp</th>
+<th style="text-align: left;">event</th>
+<th style="text-align: right;">position</th>
+<th style="text-align: left;">ranking_function</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td style="text-align: left;">rusjmp29aw</td>
+<td style="text-align: left;">2018-08-01 00:00:47</td>
+<td style="text-align: left;">serp</td>
+<td style="text-align: right;">NA</td>
+<td style="text-align: left;">NA</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">rusjmp29aw</td>
+<td style="text-align: left;">2018-08-01 00:05:18</td>
+<td style="text-align: left;">click</td>
+<td style="text-align: right;">18</td>
+<td style="text-align: left;">B</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">gdkhnfo5ts</td>
+<td style="text-align: left;">2018-08-01 00:01:14</td>
+<td style="text-align: left;">serp</td>
+<td style="text-align: right;">NA</td>
+<td style="text-align: left;">NA</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">gdkhnfo5ts</td>
+<td style="text-align: left;">2018-08-01 00:02:44</td>
+<td style="text-align: left;">click</td>
+<td style="text-align: right;">7</td>
+<td style="text-align: left;">B</td>
+</tr>
+<tr class="odd">
+<td style="text-align: left;">gdkhnfo5ts</td>
+<td style="text-align: left;">2018-08-01 00:03:13</td>
+<td style="text-align: left;">click</td>
+<td style="text-align: right;">11</td>
+<td style="text-align: left;">B</td>
+</tr>
+<tr class="even">
+<td style="text-align: left;">gdkhnfo5ts</td>
+<td style="text-align: left;">2018-08-01 00:03:42</td>
+<td style="text-align: left;">click</td>
+<td style="text-align: right;">14</td>
+<td style="text-align: left;">A</td>
+</tr>
+</tbody>
+</table>
+</section>
+<section id="estimation" class="level2">
+<h2>Estimation</h2>
+<pre class="sourceCode r" id="cb3"><code class="sourceCode r"><div 
class="sourceLine" id="cb3-1" data-line-number="1"><span 
class="kw">library</span>(wmf)</div></code></pre>
+<p>To calculate <span class="math inline">\(\Delta_{AB}\)</span> with 
<code>interleaved_preference</code>, we will need to use the clicks. We also 
use bootstrapping via <code>interleaved_bootstraps</code> which resamples 
sessions (with replacement) to obtain a distribution of the preference 
statistic <span class="math inline">\(\Delta_{AB}\)</span>. After we plot each 
bootstrapped sample, we mark the 95% confidence interval bounds. 
<strong>Note</strong> that <code>interleaved_confint</code> outputs the 
<code>quantile</code>-based CI and uses the same bootstrap function 
internally.</p>
+<section id="no-preference" class="level3">
+<h3>No preference</h3>
+<p>When users click on the interleaved results <em>without</em> a preference, 
the resulting preference statistic is close to 0 and the confidence interval 
covers 0:</p>
+<pre class="sourceCode r" id="cb4"><code class="sourceCode r"><div 
class="sourceLine" id="cb4-1" data-line-number="1">x &lt;-<span class="st"> 
</span>interleaved_data[interleaved_data<span class="op">$</span>event <span 
class="op">==</span><span class="st"> &quot;click&quot;</span>, ]</div>
+<div class="sourceLine" id="cb4-2" data-line-number="2">x &lt;-<span 
class="st"> </span>x[<span class="kw">order</span>(x<span 
class="op">$</span>session_id, x<span class="op">$</span>timestamp), ]</div>
+<div class="sourceLine" id="cb4-3" data-line-number="3">boot_x &lt;-<span 
class="st"> </span><span class="kw">interleaved_bootstraps</span>(x<span 
class="op">$</span>session_id, x<span class="op">$</span>ranking_function)</div>
+<div class="sourceLine" id="cb4-4" data-line-number="4"><span 
class="kw">hist</span>(boot_x, <span class="dt">col =</span> <span 
class="st">&quot;gray70&quot;</span>, <span class="dt">border =</span> <span 
class="ot">NA</span>, <span class="dt">main =</span> <span class="st">&quot;No 
preference&quot;</span>, <span class="dt">xlab =</span> <span 
class="st">&quot;Bootstrapped preferences&quot;</span>)</div>
+<div class="sourceLine" id="cb4-5" data-line-number="5"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">quantile</span>(boot_x, <span class="kw">c</span>(<span 
class="fl">0.025</span>, <span class="fl">0.975</span>)), <span class="dt">lty 
=</span> <span class="st">&quot;dashed&quot;</span>)</div>
+<div class="sourceLine" id="cb4-6" data-line-number="6"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">interleaved_preference</span>(x<span class="op">$</span>session_id, 
x<span class="op">$</span>ranking_function), <span class="dt">lwd =</span> 
<span class="dv">2</span>)</div></code></pre>
+<p><img 
src=""
 /><!-- --></p>
+</section>
+<section id="preference-for-a" class="level3">
+<h3>Preference for A</h3>
+<p>When users click on the interleaved results <em>with</em> a preference for 
A, the resulting preference statistic is <em>positive</em> and the confidence 
interval does <em>not</em> cover 0:</p>
+<pre class="sourceCode r" id="cb5"><code class="sourceCode r"><div 
class="sourceLine" id="cb5-1" data-line-number="1">y &lt;-<span class="st"> 
</span>interleaved_data_a[interleaved_data_a<span class="op">$</span>event 
<span class="op">==</span><span class="st"> &quot;click&quot;</span>, ]</div>
+<div class="sourceLine" id="cb5-2" data-line-number="2">y &lt;-<span 
class="st"> </span>y[<span class="kw">order</span>(y<span 
class="op">$</span>session_id, y<span class="op">$</span>timestamp), ]</div>
+<div class="sourceLine" id="cb5-3" data-line-number="3">boot_y &lt;-<span 
class="st"> </span><span class="kw">interleaved_bootstraps</span>(y<span 
class="op">$</span>session_id, y<span class="op">$</span>ranking_function)</div>
+<div class="sourceLine" id="cb5-4" data-line-number="4"><span 
class="kw">hist</span>(boot_y, <span class="dt">col =</span> <span 
class="st">&quot;gray70&quot;</span>, <span class="dt">border =</span> <span 
class="ot">NA</span>, <span class="dt">main =</span> <span 
class="st">&quot;Preference for A&quot;</span>, <span class="dt">xlab =</span> 
<span class="st">&quot;Bootstrapped preferences&quot;</span>)</div>
+<div class="sourceLine" id="cb5-5" data-line-number="5"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">quantile</span>(boot_y, <span class="kw">c</span>(<span 
class="fl">0.025</span>, <span class="fl">0.975</span>)), <span class="dt">lty 
=</span> <span class="st">&quot;dashed&quot;</span>)</div>
+<div class="sourceLine" id="cb5-6" data-line-number="6"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">interleaved_preference</span>(y<span class="op">$</span>session_id, 
y<span class="op">$</span>ranking_function), <span class="dt">lwd =</span> 
<span class="dv">2</span>)</div></code></pre>
+<p><img 
src=""
 /><!-- --></p>
+</section>
+<section id="preference-for-b" class="level3">
+<h3>Preference for B</h3>
+<p>When users click on the interleaved results <em>with</em> a preference for 
B, the resulting preference statistic is <em>negative</em> and the confidence 
interval does <em>not</em> cover 0:</p>
+<pre class="sourceCode r" id="cb6"><code class="sourceCode r"><div 
class="sourceLine" id="cb6-1" data-line-number="1">z &lt;-<span class="st"> 
</span>interleaved_data_b[interleaved_data_b<span class="op">$</span>event 
<span class="op">==</span><span class="st"> &quot;click&quot;</span>, ]</div>
+<div class="sourceLine" id="cb6-2" data-line-number="2">z &lt;-<span 
class="st"> </span>z[<span class="kw">order</span>(z<span 
class="op">$</span>session_id, z<span class="op">$</span>timestamp), ]</div>
+<div class="sourceLine" id="cb6-3" data-line-number="3">boot_z &lt;-<span 
class="st"> </span><span class="kw">interleaved_bootstraps</span>(z<span 
class="op">$</span>session_id, z<span class="op">$</span>ranking_function)</div>
+<div class="sourceLine" id="cb6-4" data-line-number="4"><span 
class="kw">hist</span>(boot_z, <span class="dt">col =</span> <span 
class="st">&quot;gray70&quot;</span>, <span class="dt">border =</span> <span 
class="ot">NA</span>, <span class="dt">main =</span> <span 
class="st">&quot;Preference for B&quot;</span>, <span class="dt">xlab =</span> 
<span class="st">&quot;Bootstrapped preferences&quot;</span>)</div>
+<div class="sourceLine" id="cb6-5" data-line-number="5"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">quantile</span>(boot_z, <span class="kw">c</span>(<span 
class="fl">0.025</span>, <span class="fl">0.975</span>)), <span class="dt">lty 
=</span> <span class="st">&quot;dashed&quot;</span>)</div>
+<div class="sourceLine" id="cb6-6" data-line-number="6"><span 
class="kw">abline</span>(<span class="dt">v =</span> <span 
class="kw">interleaved_preference</span>(z<span class="op">$</span>session_id, 
z<span class="op">$</span>ranking_function), <span class="dt">lwd =</span> 
<span class="dv">2</span>)</div></code></pre>
+<p><img 
src=""
 /><!-- --></p>
+</section>
+</section>
+<section id="references" class="level1">
+<h1>References</h1>
+<ul>
+<li>Chapelle, O., Joachims, T., Radlinski, F., &amp; Yue, Y. (2012). 
Large-scale validation and analysis of interleaved search evaluation. <em>ACM 
Transactions on Information Systems</em>, <strong>30</strong>(1), 1-41. <a 
href="https://doi.org/10.1145/2094072.2094078";>doi:10.1145/2094072.2094078</a></li>
+<li>Radlinski, F. and Craswell, N. (2013). <a 
href="https://www.microsoft.com/en-us/research/publication/optimized-interleaving-for-online-retrieval-evaluation/";>Optimized
 interleaving for online retrieval evaluation</a>. <em>ACM International 
Conference on Web Search and Data Mining (WSDM)</em>. <a 
href="https://doi.org/10.1145/2433396.2433429";>doi:10.1145/2433396.2433429</a></li>
+</ul>
+</section>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = 
"https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";;
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>

-- 
To view, visit https://gerrit.wikimedia.org/r/391063
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I81f0f93a97f7467e1fcf30e20c252fc044bbbd31
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/wmf
Gerrit-Branch: master
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to