jenkins-bot has submitted this change and it was merged.

Change subject: Modify check_ip to consider internal proxied requests as valid
......................................................................


Modify check_ip to consider internal proxied requests as valid

We would like to run webstatscollector using the varnish logs
in Kafka.  Unlike udp2log, this stream does not contain requests
from the nginx SSL and IPv6 proxies.  Instead of throwing out
all varnish requests that have internal RemoteAddresses, we now
consider if the X-Forwarded-For header is set.  If it is, we
(naively) assume that the request is a valid external request
that has been proxied by a WMF internal node.

Change-Id: Id54308de10d95caf2b5acaa1022385ff430c1856
---
M filter.c
A tests/entry-internal-not-proxied.txt
A tests/entry-internal-proxied.txt
M tests/test.sh
4 files changed, 59 insertions(+), 10 deletions(-)

Approvals:
  QChris: Verified; Looks good to me, approved
  jenkins-bot: Verified



diff --git a/filter.c b/filter.c
index ad0eb61..cd12ae2 100644
--- a/filter.c
+++ b/filter.c
@@ -52,14 +52,36 @@
                 "91.198.174.",
                 NULL};
 
-bool check_ip(char *ip) {
+/**
+ * Returns true if this request should be
+ * counted based on request IP and X-Forwarded-For.
+ * Returns false if the request should be
+ * discarded.
+ */
+bool check_ip(char *ip, char *xff) {
        char **prefix=dupes;
-       for (;*prefix;prefix++) {
-               if (!strncmp(*prefix,ip,strlen(*prefix)))
-                       return false;
+
+       bool ip_is_internal = false;
+       bool xff_is_set = (xff != NULL && strncmp("-",xff,1) != 0);
+
+       // Check if ip is a WMF public internal IP.
+       for (; *prefix; prefix++) {
+               if (strncmp(*prefix, ip, strlen(*prefix)) == 0) {
+                       ip_is_internal = true;
+                       break;
+               }
        }
-       return true;
+
+       /* Throw away anything that is internal
+          and does not have XFF set.  Internal
+          requests with XFF set are most likely
+          externally proxied requests (SSL or IPv6) */
+       if (ip_is_internal && !xff_is_set)
+               return false;
+       else
+               return true;
 }
+
 
 const struct project {
        char *full;
@@ -169,28 +191,35 @@
        setgroups(1,gidlist);
        setuid(65534);
 
-       char *undef,*ip,*url, *size;
+       char *undef, *ip, *url, *size, *xff;
        while (fgets(line,LINESIZE-1,stdin)) {
                bzero(&info,sizeof(info));
                /* Tokenize the log line */
                TOKENIZE(line,"\t"); /* server */
                                FIELD; /* id? */
                                FIELD; /* timestamp */
-                               FIELD; /* ??? */
+                               FIELD; /* time-to-first-byte */
                info.ip=        FIELD; /* IP address! */
-                               FIELD; /* status */
+                               FIELD; /* HTTP status */
                info.size=      FIELD; /* object size */
+                               FIELD; /* HTTP method */
+               url=            FIELD; /* request URI */
                                FIELD;
-               url=            FIELD;
+                               FIELD; /* content_type */
+                               FIELD; /* referer */
+               xff=            FIELD; /* x-forwarded-for */
+
                if (!url || !info.ip || !info.size)
                        continue;
+
                replace_space(url);
-               if (!check_ip(info.ip))
+               if (!check_ip(info.ip,xff))
                        continue;
                if (!parse_url(url,&info))
                        continue;
                if (!check_project(&info))
                        continue;
+
                printf("%s%s 1 %s %s\n",info.language, info.suffix, info.size, 
info.title);
        }
 }
diff --git a/tests/entry-internal-not-proxied.txt 
b/tests/entry-internal-not-proxied.txt
new file mode 100644
index 0000000..37ed132
--- /dev/null
+++ b/tests/entry-internal-not-proxied.txt
@@ -0,0 +1 @@
+cp1063.eqiad.wmnet     108502994009    2014-08-21T19:52:03     0.000062466     
208.80.154.14   -/200   0       GET     http://en.wikipedia.org/wiki/Neutron    
-       -       -       -       check_http/v1.4.15 (nagios-plugins 1.4.15)      
-       -
diff --git a/tests/entry-internal-proxied.txt b/tests/entry-internal-proxied.txt
new file mode 100644
index 0000000..2307ad3
--- /dev/null
+++ b/tests/entry-internal-proxied.txt
@@ -0,0 +1 @@
+cp1066.eqiad.wmnet     25134406711     2014-08-21T19:34:07     0.746407509     
208.80.154.133  miss/200        1064    GET     
http://en.wikipedia.org/wiki/Neutron    -       application/json; charset=utf-8 
http://en.wikipedia.org/wiki/Main_Page  5.4.3.2 UserAgent WooWoo        
en-US,en;q=0.8  -
diff --git a/tests/test.sh b/tests/test.sh
index 258c95e..68c6ab6 100755
--- a/tests/test.sh
+++ b/tests/test.sh
@@ -136,6 +136,24 @@
 assert_counted     'http://en.wikipedia.org/wiki/Robinson_Can\xC3\xB3' 'en' 
'Robinson_Can\xC3\xB3'
 assert_counted     'http://en.wikipedia.org/wiki/Robinson_Canó' 'en' 
'Robinson_Canó'
 
+start_test "Internal RemoteAddr - proxied"
+TEST_INTERNAL_PROXIED=`cat entry-internal-proxied.txt | "$FILTER" | wc -l`;
+
+if [ $? -ne 139 -a $TEST_INTERNAL_PROXIED -eq 1 ]; then
+  mark_test_passed
+else
+  mark_test_failed
+fi
+
+start_test "Internal RemoteAddr - not proxied"
+TEST_INTERNAL_NOT_PROXIED=`cat entry-internal-not-proxied.txt | "$FILTER" | wc 
-l`;
+
+if [ $? -ne 139 -a $TEST_INTERNAL_NOT_PROXIED -eq 0 ]; then
+  mark_test_passed
+else
+  mark_test_failed
+fi
+
 # -- printing statistics 
-------------------------------------------------------
 
 TESTS_FAILED=$((TESTS-TESTS_GOOD))

-- 
To view, visit https://gerrit.wikimedia.org/r/155709
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Id54308de10d95caf2b5acaa1022385ff430c1856
Gerrit-PatchSet: 3
Gerrit-Project: analytics/webstatscollector
Gerrit-Branch: kafka
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: QChris <christ...@quelltextlich.at>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to