Joal has submitted this change and it was merged.

Change subject: Evaluating Pageview tagging only for apps requests
......................................................................


Evaluating Pageview tagging only for apps requests

Requests that come tagged with pageview=1 in x-analytics
header are considered pageviews if they are coming from an app
client. We look at the value of this header once
we have verified other attributes of the request that are needed
for it to be consider a pageview, like http status.

Bug: T128612

Change-Id: I5b74b9a2c9856fcbacda521fe5fb388e28307d40
---
M 
refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java
M 
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java
2 files changed, 141 insertions(+), 59 deletions(-)

Approvals:
  Joal: Looks good to me, approved
  jenkins-bot: Verified



diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java
index 051e442..f14206a 100644
--- 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/PageviewDefinition.java
@@ -168,6 +168,10 @@
      * Please note that requests tagged as 'preview' are not counted
      * as pageviews.
      *
+     * We let apps decide whether a request is a pageview by tagging it as such
+     * on x-analytics header, if pageview=1 is present
+     * we do not look further at urls.
+     *
      * We use the raw xAnalytics header rather than x_analytics_map
      * to make sure this function can be applied
      * to raw data, where the parsing of x-Analytics header into
@@ -199,19 +203,20 @@
         if 
(wr.getXAnalyticsValue(rawXAnalyticsHeader,"preview").trim().equalsIgnoreCase("1"))
             return false;
 
-        if 
(wr.getXAnalyticsValue(rawXAnalyticsHeader,"pageview").trim().equalsIgnoreCase("1"))
-            return true;
+        boolean isTaggedPageview = 
(wr.getXAnalyticsValue(rawXAnalyticsHeader,"pageview").trim().equalsIgnoreCase("1"));
 
+        return (Utilities.stringContains(contentType, appContentType)
+                && Utilities.stringContains(userAgent,   appUserAgent)
 
-        return (
-               Utilities.stringContains(uriPath,     uriPathAPI)
-            && (
-                    Utilities.stringContains(uriQuery, appPageURIQuery)
-                    || (Utilities.stringContains(uriQuery, iosAppPageURIQuery) 
&& Utilities.stringContains(userAgent, iosUserAgent))
+                && (isTaggedPageview ||
+                (
+                    Utilities.stringContains(uriPath, uriPathAPI) &&
+                    (Utilities.stringContains(uriQuery, appPageURIQuery)
+                     || (Utilities.stringContains(uriQuery, iosAppPageURIQuery)
+                         && Utilities.stringContains(userAgent, iosUserAgent))
+                    )
                )
-            && Utilities.stringContains(contentType, appContentType)
-            && Utilities.stringContains(userAgent,   appUserAgent)
-        );
+            ));
     }
 
 
@@ -301,9 +306,6 @@
         if 
(wr.getXAnalyticsValue(rawXAnalyticsHeader,"preview").trim().equalsIgnoreCase("1"))
             return false;
 
-
-        if 
(wr.getXAnalyticsValue(rawXAnalyticsHeader,"pageview").trim().equalsIgnoreCase("1"))
-            return true;
 
         return (
             // All pageviews have a 200 or 304 HTTP status
diff --git 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java
 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java
index f5b3a90..5b176dd 100644
--- 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java
+++ 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestPageview.java
@@ -197,78 +197,158 @@
     public void testIsPageviewXAnalyticsPreview(
 
     ){
-        String uri_host = "en.wikipedia";
+        String uri_host = "en.wikipedia.org";
         String uri_path = "/wiki/Horseshoe%20crab#anchor"; ;
         String uri_query = "-";
         String http_status = "200";
         String content_type = "text/html";
-        String user_agent = "turnip/";
+        String user_agent = "Mozilla/4.0";
 
-        assertTrue("Preview requests are not pageviews", 
PageviewDefinition.getInstance().isPageview(
+        // first make sure this request is a pageview
+        assertTrue("Should be a pageview", 
PageviewDefinition.getInstance().isPageview(
             uri_host,
             uri_path,
             uri_query,
             http_status,
             content_type,
             user_agent,
-            "WMF-Last-Access=10-Jan-2016;preview=1"
+            "WMF-Last-Access=10-Jan-2016;"
+
+        ) == true);
+
+        //bad preview header value, still pageview
+        assertTrue("Bad preview header value, still pageview", 
PageviewDefinition.getInstance().isPageview(
+                uri_host,
+                uri_path,
+                uri_query,
+                http_status,
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;preview=jajaj"
+
+        ) == true);
+
+        // add preview header, we only accept "1"
+        assertTrue("Add preview header, still no pageview", 
PageviewDefinition.getInstance().isPageview(
+                uri_host,
+                uri_path,
+                uri_query,
+                http_status,
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;preview=1"
+
+        ) == false);
+
+
+    }
+
+
+
+    /**
+     * If a request comes tagged as pageview is not counted as such
+     * unless is a request for an app (pageview tagging is restricted to apps
+     * for now)
+     **/
+    @Test
+    public void testIsPageviewXAnalyticsPageviewTagged(
+
+    ){
+        String uri_host = "en.wikipedia.org";
+        String uri_path = "/wiki/Horseshoe%20crab#anchor"; ;
+        String uri_query = "-";
+        String http_status = "200";
+        String content_type = "text/html";
+        String user_agent = "Mozilla/4.0";
+
+       //first make sure request is a pageview
+        assertTrue("Should be a pageview", 
PageviewDefinition.getInstance().isPageview(
+                uri_host,
+                uri_path,
+                uri_query,
+                http_status,
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;"
+
+        ) == true);
+
+        //change something so it is not a pageview request
+        assertTrue("Should not be a pageview", 
PageviewDefinition.getInstance().isPageview(
+                uri_host,
+                uri_path,
+                uri_query,
+                "500",
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;"
+
+        ) == false);
+
+        //tagging it as pageview should not make this request a pageview, it 
is not
+        //an app request
+        assertTrue("Not a pageview,tagging it as pageview should not change 
anything", PageviewDefinition.getInstance().isPageview(
+            uri_host,
+            uri_path,
+            uri_query,
+                "500",
+            content_type,
+            user_agent,
+            "WMF-Last-Access=10-Jan-2016;pageview=1"
 
         ) == false);
 
     }
 
     /**
-     * We only accept value "1" for preview header
+     * Apps pageviews that come tagged as 'pageview' are counted as such
+     * regardless of url
      */
     @Test
-    public void testIsPageviewXAnalyticsPreviewBadHeaderValue(
+    public void testIsPageviewXAnalyticsPageviewTaggedAppsPageview(
 
     ){
-        String uri_host = "en.wikipedia";
-        String uri_path = "/wiki/Horseshoe%20crab#anchor"; ;
-        String uri_query = "-";
-        String http_status = "200";
-        String content_type = "text/html";
-        String user_agent = "turnip/";
+        String uri_path = "api.php"; ;
+        String uri_query = "sections=0";
+        String content_type = "application/json";
+        String user_agent = "WikipediaApp";
 
-        assertTrue("A bad value for preview request header should not be 
consider a preview", PageviewDefinition.getInstance().isPageview(
-            uri_host,
-            uri_path,
-            uri_query,
-            http_status,
-            content_type,
-            user_agent,
-            "WMF-Last-Access=10-Jan-2016;preview=BAD;pageview=1"
+
+
+        //first make sure this is a pageview
+        assertTrue("Should be an app pageview", 
PageviewDefinition.getInstance().isAppPageview(
+
+                uri_path,
+                uri_query,
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;"
+
+        ) == true);
+
+        //change something so it is not an app pageview
+        assertTrue("Not an app pageview", 
PageviewDefinition.getInstance().isAppPageview(
+
+                uri_path,
+                "garbage",
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;"
+
+        ) == false);
+
+        //tagging it as pageview should make it so
+        assertTrue("App request tagged as pageview in x-analytics should be a 
pageview regardless of url",
+                PageviewDefinition.getInstance().isAppPageview(
+                uri_path,
+                "garbage",
+                content_type,
+                user_agent,
+                "WMF-Last-Access=10-Jan-2016;pageview=1"
 
         ) == true);
 
     }
 
-    /**
-     * If a request comes tagged as pageview is counted as such
-     * regardless of uri_host, uri_path...
-     */
-    @Test
-    public void testIsPageviewXAnalyticsPageviewTagged(
 
-    ){
-        String uri_host = "en.wikipedia";
-        String uri_path = "blah"; ;
-        String uri_query = "-";
-        String http_status = "200";
-        String content_type = "blah";
-        String user_agent = "blah/";
-
-        assertTrue("Request tagged as pageview in x-analytics should be 
consider pageviews", PageviewDefinition.getInstance().isPageview(
-            uri_host,
-            uri_path,
-            uri_query,
-            http_status,
-            content_type,
-            user_agent,
-            "WMF-Last-Access=10-Jan-2016;pageview=1"
-
-        ) == true);
-
-    }
 }
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/279447
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5b74b9a2c9856fcbacda521fe5fb388e28307d40
Gerrit-PatchSet: 6
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Joal <j...@wikimedia.org>
Gerrit-Reviewer: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to