Ananthrk has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/183551

Change subject: [WIP] UDF to get country code from IP address UDF to determine 
client IP address given values from remote_addr and XFF headers Change-Id: 
I6dd39300173cd378b1836649f926b21ec27cc032
......................................................................

[WIP]
UDF to get country code from IP address
UDF to determine client IP address given values from remote_addr and XFF headers
Change-Id: I6dd39300173cd378b1836649f926b21ec27cc032
---
M refinery-core/pom.xml
A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
A 
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
A 
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
A 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
A 
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
A 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
A 
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
9 files changed, 591 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source 
refs/changes/51/183551/1

diff --git a/refinery-core/pom.xml b/refinery-core/pom.xml
index 233394f..a863e7e 100644
--- a/refinery-core/pom.xml
+++ b/refinery-core/pom.xml
@@ -25,6 +25,12 @@
         </dependency>
 
         <dependency>
+            <groupId>com.maxmind.geoip2</groupId>
+            <artifactId>geoip2</artifactId>
+            <version>2.1.0</version>
+        </dependency>
+
+        <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
             <scope>test</scope>
diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
new file mode 100644
index 0000000..69af46d
--- /dev/null
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
@@ -0,0 +1,169 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.Map;
+import java.util.HashMap;
+
+import com.maxmind.geoip2.DatabaseReader;
+import com.maxmind.geoip2.exception.GeoIp2Exception;
+import com.maxmind.geoip2.model.CityResponse;
+import com.maxmind.geoip2.record.*;
+
+/**
+ * Contains functions to find geo information of an IP address using Maxmind's 
GeoIP2
+ */
+public class Geocode {
+
+    private DatabaseReader databaseReader;
+
+    //Constants to hold the keys to use in geo-coded data map
+    private static final String CONTINENT = "continent";
+    private static final String COUNTRY_CODE = "country_code";
+    private static final String COUNTRY = "country";
+    private static final String TIME_ZONE = "timezone";
+    private static final String CITY = "city";
+    private static final String POSTAL_CODE = "postal_code";
+    private static final String LATITUDE = "latitude";
+    private static final String LONGITUDE = "longitude";
+
+    private static final String UNKNOWN_CODE = "--";
+    private static final String UNKNOWN_VALUE = "Unknown";
+
+    // Default path to Maxmind database
+    private final String defaultDatabasePath = 
"/usr/share/GeoIP/GeoLite2-City.mmdb";
+
+    /**
+     * Creates a new Geocode object using the specified Maxmind database.
+     *
+     * @param   databasePath
+     *              String path to Maxmind GeoLite2 database
+     * @throws IOException
+     *             if whatever IO problem occurs
+     */
+    public Geocode(String databasePath) throws IOException {
+        if (databasePath == null || databasePath.isEmpty()) {
+            databasePath = defaultDatabasePath;
+        }
+
+        databaseReader = new DatabaseReader.Builder(new 
File(databasePath)).build();
+    }
+
+    /**
+     * Gets the country code for the given IP
+     * @param ip
+     *      String IP address
+     * @return
+     *      String
+     */
+    public final String getCountryCode(final String ip) {
+        try {
+            InetAddress ipAddress = InetAddress.getByName(ip);
+            CityResponse response = databaseReader.city(ipAddress);
+            Country country = response.getCountry();
+            return country.getIsoCode();
+        } catch (UnknownHostException hEx) {
+            return UNKNOWN_CODE;
+        } catch (IOException iEx) {
+            return UNKNOWN_CODE;
+        } catch (GeoIp2Exception gEx) {
+            return UNKNOWN_CODE;
+        }
+    }
+
+    /**
+     * Gets a map with geo-code fields for the given IP
+     * @param ip
+     *      String Ip address
+     * @return
+     *      Map
+     */
+    public final Map<String, Object> getGeocodedData(final String ip) {
+
+        InetAddress ipAddress = null;
+        //Initialize map with default values
+        Map<String, Object> geoData = getDefaultMap();
+
+        try {
+            ipAddress = InetAddress.getByName(ip);
+        } catch (UnknownHostException hEx) {
+            return geoData;
+        }
+
+        CityResponse response = null;
+        try {
+            response = databaseReader.city(ipAddress);
+        } catch (IOException iEx) {
+            return geoData;
+        } catch (GeoIp2Exception gEx) {
+            return geoData;
+        }
+
+        if (response == null)
+            return geoData;
+
+        Continent continent = response.getContinent();
+        if (continent != null && continent.getName() != null)
+            geoData.put(CONTINENT, continent.getName());
+
+        Country country = response.getCountry();
+        if (country != null && country.getIsoCode() != null) {
+            geoData.put(COUNTRY_CODE, country.getIsoCode());
+            geoData.put(COUNTRY, country.getName());
+        }
+
+        City city = response.getCity();
+        if (city != null && city.getName() != null)
+            geoData.put(CITY, city.getName());
+
+        Postal postal = response.getPostal();
+        if (postal != null && postal.getCode() != null)
+            geoData.put(POSTAL_CODE, postal.getCode());
+
+        Location location = response.getLocation();
+        if (location != null) {
+            geoData.put(LATITUDE, location.getLatitude());
+            geoData.put(LONGITUDE, location.getLongitude());
+            if (location.getTimeZone() != null)
+                geoData.put(TIME_ZONE, location.getTimeZone());
+        }
+
+        return geoData;
+    }
+
+    /**
+     * Creates a new geo data map with default values for all fields
+     * @return Map
+     */
+    private Map<String, Object> getDefaultMap() {
+        Map<String, Object> defaultGeoData = new HashMap<String, Object>();
+        defaultGeoData.put(CONTINENT, UNKNOWN_VALUE);
+        defaultGeoData.put(COUNTRY_CODE, UNKNOWN_CODE);
+        defaultGeoData.put(COUNTRY, UNKNOWN_VALUE);
+        defaultGeoData.put(CITY, UNKNOWN_VALUE);
+        defaultGeoData.put(POSTAL_CODE, UNKNOWN_VALUE);
+        defaultGeoData.put(LATITUDE, -1);
+        defaultGeoData.put(LONGITUDE, -1);
+        defaultGeoData.put(TIME_ZONE, UNKNOWN_VALUE);
+
+        return defaultGeoData;
+    }
+}
diff --git 
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
new file mode 100644
index 0000000..d637638
--- /dev/null
+++ 
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class IpUtil {
+
+    private static final Pattern IP4PATTERN = 
Pattern.compile("^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}$");
+    private static final Pattern IP6PATTERN = 
Pattern.compile("^(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}(?::(?:[0-9]{1,3}\\.){3}[0-9]{1,3})*$");
+
+    /**
+     *
+     * @param ip   the IP address in the RemoteHost header
+     * @param xff  A comma-separated list of ip addresses where the left most 
is the ip address of the originating
+     *             client. See for more details 
http://en.wikipedia.org/wiki/X-Forwarded-For
+     * @return String
+     */
+    public static String getClientIp(final String ip, final String xff) {
+        if (xff == null || "-".equals(xff) || "".equals(xff)) {
+            return ip;
+        }
+        else {
+            String[] proxiedIps = xff.split(",");
+            try {
+                // Iterate through each proxied IP address
+                // in XFF.  Return the first one that is a real IP.
+                // TODO: Make this smarter!
+                for (String proxiedIp : proxiedIps) {
+                    Matcher ip4 = IP4PATTERN.matcher(proxiedIp);
+                    Matcher ip6 = IP6PATTERN.matcher(proxiedIp);
+
+                    if ((!(
+                            proxiedIp.startsWith("127.0")    ||
+                                    proxiedIp.startsWith("192.168")  ||
+                                    proxiedIp.startsWith("10.")
+                            ))
+                            &&
+                            (
+                                    ip4.matches() || ip6.matches()
+                            )
+                            ) {
+                        return proxiedIp;
+                    }
+                }
+
+                // if we get this far, too bad!  Just return ipAddress.
+                return ip;
+            }
+            catch (ArrayIndexOutOfBoundsException e) {
+                return ip;
+            }
+        }
+    }
+}
diff --git 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
new file mode 100644
index 0000000..b9ef149
--- /dev/null
+++ 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
@@ -0,0 +1,123 @@
+// Copyright 2014 Wikimedia Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.io.IOException;
+import java.util.Map;
+
+import junit.framework.TestCase;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+public class TestGeocode extends TestCase {
+
+    private static String defaultDatabasePath = 
"/usr/share/GeoIP/GeoLite2-City.mmdb";
+
+    public void testDoGeoLookupIpv4US() throws IOException {
+        Geocode geocode = new Geocode(defaultDatabasePath);
+
+        // Arbitrary IPv4 address in the US
+        String ip = "96.56.123.2";
+        assertEquals("US", geocode.getCountryCode(ip));
+
+        Map<String, Object> geoData = geocode.getGeocodedData(ip);
+        assertNotNull("Geo data cannot be null", geoData);
+        assertEquals("North America", geoData.get("continent"));
+        assertEquals("US", geoData.get("country_code"));
+        assertEquals("United States", geoData.get("country"));
+        assertEquals("Bohemia", geoData.get("city"));
+        assertEquals("11716", geoData.get("postal_code"));
+        assertEquals(40.7699, geoData.get("latitude"));
+        assertEquals(-73.1263, geoData.get("longitude"));
+        assertEquals("America/New_York", geoData.get("timezone"));
+    }
+
+    public void testDoGeoLookupIpv6US() throws IOException {
+        Geocode geocode = new Geocode(defaultDatabasePath);
+
+        // Arbitrary IPv6 address in the US
+        String ip = "2600:1011:b103:9999:6864:ac5e:1686:c20f";
+        assertEquals("US", geocode.getCountryCode(ip));
+
+        Map<String, Object> geoData = geocode.getGeocodedData(ip);
+        assertNotNull("Geo data cannot be null", geoData);
+        assertEquals("North America", geoData.get("continent"));
+        assertEquals("US", geoData.get("country_code"));
+        assertEquals("United States", geoData.get("country"));
+        assertEquals("Phoenix", geoData.get("city"));
+        assertEquals("85001", geoData.get("postal_code"));
+        assertEquals(33.4484, geoData.get("latitude"));
+        assertEquals(-112.074, geoData.get("longitude"));
+        assertEquals("America/Phoenix", geoData.get("timezone"));
+    }
+
+    public void testDoGeoLookupIpv6Europe() throws IOException {
+        Geocode geocode = new Geocode(defaultDatabasePath);
+
+        // Arbitrary IPv6 address in the Germany
+        String ip = "2001:aa8:abcd:1234::2222";
+        assertEquals("DE", geocode.getCountryCode(ip));
+
+        Map<String, Object> geoData = geocode.getGeocodedData(ip);
+        assertNotNull("Geo data cannot be null", geoData);
+        assertEquals("Europe", geoData.get("continent"));
+        assertEquals("DE", geoData.get("country_code"));
+        assertEquals("Germany", geoData.get("country"));
+        assertEquals("Unknown", geoData.get("city"));
+        assertEquals("Unknown", geoData.get("postal_code"));
+        assertEquals(51.5, geoData.get("latitude"));
+        assertEquals(10.5, geoData.get("longitude"));
+        assertEquals("Unknown", geoData.get("timezone"));
+    }
+
+    public void testDoGeoLookupIpUnknown() throws IOException {
+        Geocode geocode = new Geocode(defaultDatabasePath);
+
+        // Invalid or unknown IP address
+        String ip = "-";
+        assertEquals("--", geocode.getCountryCode(ip));
+
+        Map<String, Object> geoData = geocode.getGeocodedData(ip);
+        assertNotNull("Geo data cannot be null", geoData);
+        assertEquals("--", geoData.get("country_code"));
+        assertEquals("Unknown", geoData.get("continent"));
+        assertEquals("--", geoData.get("country_code"));
+        assertEquals("Unknown", geoData.get("country"));
+        assertEquals("Unknown", geoData.get("city"));
+        assertEquals("Unknown", geoData.get("postal_code"));
+        assertEquals(-1, geoData.get("latitude"));
+        assertEquals(-1, geoData.get("longitude"));
+        assertEquals("Unknown", geoData.get("timezone"));
+    }
+
+    public void testDoGeoLookupWithNull() throws IOException {
+        Geocode geocode = new Geocode(defaultDatabasePath);
+
+        // Invalid IP address
+        String ip = null;
+        assertEquals("--", geocode.getCountryCode(ip));
+
+        Map<String, Object> geoData = geocode.getGeocodedData(ip);
+        assertNotNull("Geo data cannot be null", geoData);
+        assertEquals("Unknown", geoData.get("continent"));
+        assertEquals("--", geoData.get("country_code"));
+        assertEquals("Unknown", geoData.get("country"));
+        assertEquals("Unknown", geoData.get("city"));
+        assertEquals("Unknown", geoData.get("postal_code"));
+        assertEquals(-1, geoData.get("latitude"));
+        assertEquals(-1, geoData.get("longitude"));
+        assertEquals("Unknown", geoData.get("timezone"));
+    }
+}
\ No newline at end of file
diff --git 
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
new file mode 100644
index 0000000..2c76336
--- /dev/null
+++ 
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import junit.framework.TestCase;
+
+public class TestIpUtil extends TestCase {
+
+    public void testGetClientIp() {
+        String clientIp = IpUtil.getClientIp(
+                "37.228.105.17",                     // this ip address is an 
Opera proxy server
+                "101.209.27.230,%20101.209.27.230"   // these are random ip 
addresses
+        );
+
+        assertEquals("Return IP address is not the client's IP in XFF", 
"101.209.27.230", clientIp);
+    }
+
+    public void testGetClientIpWithInvalidXFF() {
+        String clientIp = IpUtil.getClientIp(
+                "101.209.27.230",
+                "127.0.0.1,%20101.209.27.230"   // invalid XFF ip addresses
+        );
+
+        assertEquals("Return IP address is not the client's IP", 
"101.209.27.230", clientIp);
+    }
+}
\ No newline at end of file
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
new file mode 100644
index 0000000..fe2d373
--- /dev/null
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.hive;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.Text;
+
+import org.wikimedia.analytics.refinery.core.IpUtil;
+
+/**
+ * A Hive UDF to extract client IP given the values from remote_addr and XFF 
headers.
+ * <p>
+ * Hive Usage:
+ *   ADD JAR /path/to/refinery-hive.jar;
+ *   CREATE TEMPORARY FUNCTION client_ip as 
'org.wikimedia.analytics.refinery.hive.ClientIpUDF';
+ *   SELECT client_ip(ip, xff) from webrequest where year = 2014 limit 10;
+ */
+public class ClientIpUDF extends UDF {
+
+    public Text evaluate(Text ip, Text xff) {
+        return new Text(IpUtil.getClientIp(ip.toString(), xff.toString()));
+    }
+}
\ No newline at end of file
diff --git 
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
new file mode 100644
index 0000000..a533d09
--- /dev/null
+++ 
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.hive;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.Text;
+import org.wikimedia.analytics.refinery.core.Geocode;
+
+import java.io.IOException;
+
+/**
+ * A Hive UDF to lookup country codes from IP addresses.
+ * <p>
+ * Hive Usage:
+ *   ADD JAR /path/to/refinery-hive.jar;
+ *   ADD FILE /usr/share/GeoIP/GeoLite2-City.mmdb;
+ *   CREATE TEMPORARY FUNCTION geocode_country as 
'org.wikimedia.analytics.refinery.hive.GeocodedCountryUDF';
+ *   -- Specify CWD relative paths for the database files.  When you do the 
above 'ADD JAR's, Hive
+ *   -- will put these files in the job's working directory and need to 
reference them relative to that.
+ *   SELECT geocode_country(ip, GeoLite2-City.mmdb) from webrequest where year 
= 2014 limit 10;
+ *
+ * TODO: Figure out a nice way to make default database paths just work in the 
Analytics Cluster.
+ */
+public class GeocodedCountryUDF extends UDF {
+
+    private Geocode geocode;
+
+    private final Text emptyString = new Text("");
+    private Text currentPath = emptyString;
+
+    public void init(Text databasePath) throws IOException {
+        geocode = new Geocode(databasePath.toString());
+    }
+
+    public Text evaluate(Text ip, Text databasePath) throws IOException {
+        if (geocode == null || !databasePath.equals(currentPath)) {
+            /*
+            either geocode has not been initialized or the user has passed a 
different database path than the one
+            we have already initialized with
+             */
+            init(databasePath);
+            currentPath = databasePath;
+        }
+        return new Text(geocode.getCountryCode(ip.toString()));
+    }
+
+    public Text evaluate(Text ip) throws IOException {
+        return evaluate(ip, emptyString);
+    }
+}
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
new file mode 100644
index 0000000..e383e8e
--- /dev/null
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.Text;
+
+public class TestClientIpUDF extends TestCase {
+
+    public void testEvaluate() {
+        ClientIpUDF clientIpUDF = new ClientIpUDF();
+        Text ip  = new Text("208.80.154.133");
+        Text xff = new Text("127.0.0.1,96.56.123.2");
+        assertEquals("96.56.123.2", clientIpUDF.evaluate(ip, xff).toString());
+    }
+
+    public void testEvaluateWithInvalidXFF() {
+        ClientIpUDF clientIpUDF = new ClientIpUDF();
+        Text ip  = new Text("208.80.154.133");
+        Text xff = new Text("127.0.0.1,%20101.209.27.230");
+        assertEquals("208.80.154.133", clientIpUDF.evaluate(ip, 
xff).toString());
+    }
+}
diff --git 
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
new file mode 100644
index 0000000..83fbfdd
--- /dev/null
+++ 
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright (C) 2014  Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+
+public class TestGeocodedCountryUDF extends TestCase {
+
+    public void testEvaluate() throws IOException {
+        GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+        Text result = geocodedCountryUDF.evaluate(new Text("96.56.123.2"));
+        assertEquals("US", result.toString());
+    }
+
+    public void testEvaluateWithValidDatabase() throws IOException {
+        GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+        Text result = geocodedCountryUDF.evaluate(new Text("96.56.123.2"),
+                new Text("/usr/share/GeoIP/GeoLite2-City.mmdb"));
+        assertEquals("US", result.toString());
+    }
+
+    public void testEvaluateWithInvalidDatabase() {
+        GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+        try {
+            geocodedCountryUDF.evaluate(new Text("96.56.123.2"), new 
Text("/usr/share/GeoIP/non-existent.mmdb"));
+            fail("Should throw an IOException for non-existent database");
+        } catch (IOException ex) {
+        }
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/183551
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6dd39300173cd378b1836649f926b21ec27cc032
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Ananthrk <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to