Ananthrk has uploaded a new change for review.
https://gerrit.wikimedia.org/r/183551
Change subject: [WIP] UDF to get country code from IP address UDF to determine
client IP address given values from remote_addr and XFF headers Change-Id:
I6dd39300173cd378b1836649f926b21ec27cc032
......................................................................
[WIP]
UDF to get country code from IP address
UDF to determine client IP address given values from remote_addr and XFF headers
Change-Id: I6dd39300173cd378b1836649f926b21ec27cc032
---
M refinery-core/pom.xml
A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
A refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
A
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
A
refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
A
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
A
refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
A
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
A
refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
9 files changed, 591 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery/source
refs/changes/51/183551/1
diff --git a/refinery-core/pom.xml b/refinery-core/pom.xml
index 233394f..a863e7e 100644
--- a/refinery-core/pom.xml
+++ b/refinery-core/pom.xml
@@ -25,6 +25,12 @@
</dependency>
<dependency>
+ <groupId>com.maxmind.geoip2</groupId>
+ <artifactId>geoip2</artifactId>
+ <version>2.1.0</version>
+ </dependency>
+
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
diff --git
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
new file mode 100644
index 0000000..69af46d
--- /dev/null
+++
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Geocode.java
@@ -0,0 +1,169 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.Map;
+import java.util.HashMap;
+
+import com.maxmind.geoip2.DatabaseReader;
+import com.maxmind.geoip2.exception.GeoIp2Exception;
+import com.maxmind.geoip2.model.CityResponse;
+import com.maxmind.geoip2.record.*;
+
+/**
+ * Contains functions to find geo information of an IP address using Maxmind's
GeoIP2
+ */
+public class Geocode {
+
+ private DatabaseReader databaseReader;
+
+ //Constants to hold the keys to use in geo-coded data map
+ private static final String CONTINENT = "continent";
+ private static final String COUNTRY_CODE = "country_code";
+ private static final String COUNTRY = "country";
+ private static final String TIME_ZONE = "timezone";
+ private static final String CITY = "city";
+ private static final String POSTAL_CODE = "postal_code";
+ private static final String LATITUDE = "latitude";
+ private static final String LONGITUDE = "longitude";
+
+ private static final String UNKNOWN_CODE = "--";
+ private static final String UNKNOWN_VALUE = "Unknown";
+
+ // Default path to Maxmind database
+ private final String defaultDatabasePath =
"/usr/share/GeoIP/GeoLite2-City.mmdb";
+
+ /**
+ * Creates a new Geocode object using the specified Maxmind database.
+ *
+ * @param databasePath
+ * String path to Maxmind GeoLite2 database
+ * @throws IOException
+ * if whatever IO problem occurs
+ */
+ public Geocode(String databasePath) throws IOException {
+ if (databasePath == null || databasePath.isEmpty()) {
+ databasePath = defaultDatabasePath;
+ }
+
+ databaseReader = new DatabaseReader.Builder(new
File(databasePath)).build();
+ }
+
+ /**
+ * Gets the country code for the given IP
+ * @param ip
+ * String IP address
+ * @return
+ * String
+ */
+ public final String getCountryCode(final String ip) {
+ try {
+ InetAddress ipAddress = InetAddress.getByName(ip);
+ CityResponse response = databaseReader.city(ipAddress);
+ Country country = response.getCountry();
+ return country.getIsoCode();
+ } catch (UnknownHostException hEx) {
+ return UNKNOWN_CODE;
+ } catch (IOException iEx) {
+ return UNKNOWN_CODE;
+ } catch (GeoIp2Exception gEx) {
+ return UNKNOWN_CODE;
+ }
+ }
+
+ /**
+ * Gets a map with geo-code fields for the given IP
+ * @param ip
+ * String Ip address
+ * @return
+ * Map
+ */
+ public final Map<String, Object> getGeocodedData(final String ip) {
+
+ InetAddress ipAddress = null;
+ //Initialize map with default values
+ Map<String, Object> geoData = getDefaultMap();
+
+ try {
+ ipAddress = InetAddress.getByName(ip);
+ } catch (UnknownHostException hEx) {
+ return geoData;
+ }
+
+ CityResponse response = null;
+ try {
+ response = databaseReader.city(ipAddress);
+ } catch (IOException iEx) {
+ return geoData;
+ } catch (GeoIp2Exception gEx) {
+ return geoData;
+ }
+
+ if (response == null)
+ return geoData;
+
+ Continent continent = response.getContinent();
+ if (continent != null && continent.getName() != null)
+ geoData.put(CONTINENT, continent.getName());
+
+ Country country = response.getCountry();
+ if (country != null && country.getIsoCode() != null) {
+ geoData.put(COUNTRY_CODE, country.getIsoCode());
+ geoData.put(COUNTRY, country.getName());
+ }
+
+ City city = response.getCity();
+ if (city != null && city.getName() != null)
+ geoData.put(CITY, city.getName());
+
+ Postal postal = response.getPostal();
+ if (postal != null && postal.getCode() != null)
+ geoData.put(POSTAL_CODE, postal.getCode());
+
+ Location location = response.getLocation();
+ if (location != null) {
+ geoData.put(LATITUDE, location.getLatitude());
+ geoData.put(LONGITUDE, location.getLongitude());
+ if (location.getTimeZone() != null)
+ geoData.put(TIME_ZONE, location.getTimeZone());
+ }
+
+ return geoData;
+ }
+
+ /**
+ * Creates a new geo data map with default values for all fields
+ * @return Map
+ */
+ private Map<String, Object> getDefaultMap() {
+ Map<String, Object> defaultGeoData = new HashMap<String, Object>();
+ defaultGeoData.put(CONTINENT, UNKNOWN_VALUE);
+ defaultGeoData.put(COUNTRY_CODE, UNKNOWN_CODE);
+ defaultGeoData.put(COUNTRY, UNKNOWN_VALUE);
+ defaultGeoData.put(CITY, UNKNOWN_VALUE);
+ defaultGeoData.put(POSTAL_CODE, UNKNOWN_VALUE);
+ defaultGeoData.put(LATITUDE, -1);
+ defaultGeoData.put(LONGITUDE, -1);
+ defaultGeoData.put(TIME_ZONE, UNKNOWN_VALUE);
+
+ return defaultGeoData;
+ }
+}
diff --git
a/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
new file mode 100644
index 0000000..d637638
--- /dev/null
+++
b/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/IpUtil.java
@@ -0,0 +1,70 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class IpUtil {
+
+ private static final Pattern IP4PATTERN =
Pattern.compile("^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}$");
+ private static final Pattern IP6PATTERN =
Pattern.compile("^(?:[0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}(?::(?:[0-9]{1,3}\\.){3}[0-9]{1,3})*$");
+
+ /**
+ *
+ * @param ip the IP address in the RemoteHost header
+ * @param xff A comma-separated list of ip addresses where the left most
is the ip address of the originating
+ * client. See for more details
http://en.wikipedia.org/wiki/X-Forwarded-For
+ * @return String
+ */
+ public static String getClientIp(final String ip, final String xff) {
+ if (xff == null || "-".equals(xff) || "".equals(xff)) {
+ return ip;
+ }
+ else {
+ String[] proxiedIps = xff.split(",");
+ try {
+ // Iterate through each proxied IP address
+ // in XFF. Return the first one that is a real IP.
+ // TODO: Make this smarter!
+ for (String proxiedIp : proxiedIps) {
+ Matcher ip4 = IP4PATTERN.matcher(proxiedIp);
+ Matcher ip6 = IP6PATTERN.matcher(proxiedIp);
+
+ if ((!(
+ proxiedIp.startsWith("127.0") ||
+ proxiedIp.startsWith("192.168") ||
+ proxiedIp.startsWith("10.")
+ ))
+ &&
+ (
+ ip4.matches() || ip6.matches()
+ )
+ ) {
+ return proxiedIp;
+ }
+ }
+
+ // if we get this far, too bad! Just return ipAddress.
+ return ip;
+ }
+ catch (ArrayIndexOutOfBoundsException e) {
+ return ip;
+ }
+ }
+ }
+}
diff --git
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
new file mode 100644
index 0000000..b9ef149
--- /dev/null
+++
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestGeocode.java
@@ -0,0 +1,123 @@
+// Copyright 2014 Wikimedia Foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package org.wikimedia.analytics.refinery.core;
+
+import java.io.IOException;
+import java.util.Map;
+
+import junit.framework.TestCase;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+public class TestGeocode extends TestCase {
+
+ private static String defaultDatabasePath =
"/usr/share/GeoIP/GeoLite2-City.mmdb";
+
+ public void testDoGeoLookupIpv4US() throws IOException {
+ Geocode geocode = new Geocode(defaultDatabasePath);
+
+ // Arbitrary IPv4 address in the US
+ String ip = "96.56.123.2";
+ assertEquals("US", geocode.getCountryCode(ip));
+
+ Map<String, Object> geoData = geocode.getGeocodedData(ip);
+ assertNotNull("Geo data cannot be null", geoData);
+ assertEquals("North America", geoData.get("continent"));
+ assertEquals("US", geoData.get("country_code"));
+ assertEquals("United States", geoData.get("country"));
+ assertEquals("Bohemia", geoData.get("city"));
+ assertEquals("11716", geoData.get("postal_code"));
+ assertEquals(40.7699, geoData.get("latitude"));
+ assertEquals(-73.1263, geoData.get("longitude"));
+ assertEquals("America/New_York", geoData.get("timezone"));
+ }
+
+ public void testDoGeoLookupIpv6US() throws IOException {
+ Geocode geocode = new Geocode(defaultDatabasePath);
+
+ // Arbitrary IPv6 address in the US
+ String ip = "2600:1011:b103:9999:6864:ac5e:1686:c20f";
+ assertEquals("US", geocode.getCountryCode(ip));
+
+ Map<String, Object> geoData = geocode.getGeocodedData(ip);
+ assertNotNull("Geo data cannot be null", geoData);
+ assertEquals("North America", geoData.get("continent"));
+ assertEquals("US", geoData.get("country_code"));
+ assertEquals("United States", geoData.get("country"));
+ assertEquals("Phoenix", geoData.get("city"));
+ assertEquals("85001", geoData.get("postal_code"));
+ assertEquals(33.4484, geoData.get("latitude"));
+ assertEquals(-112.074, geoData.get("longitude"));
+ assertEquals("America/Phoenix", geoData.get("timezone"));
+ }
+
+ public void testDoGeoLookupIpv6Europe() throws IOException {
+ Geocode geocode = new Geocode(defaultDatabasePath);
+
+ // Arbitrary IPv6 address in the Germany
+ String ip = "2001:aa8:abcd:1234::2222";
+ assertEquals("DE", geocode.getCountryCode(ip));
+
+ Map<String, Object> geoData = geocode.getGeocodedData(ip);
+ assertNotNull("Geo data cannot be null", geoData);
+ assertEquals("Europe", geoData.get("continent"));
+ assertEquals("DE", geoData.get("country_code"));
+ assertEquals("Germany", geoData.get("country"));
+ assertEquals("Unknown", geoData.get("city"));
+ assertEquals("Unknown", geoData.get("postal_code"));
+ assertEquals(51.5, geoData.get("latitude"));
+ assertEquals(10.5, geoData.get("longitude"));
+ assertEquals("Unknown", geoData.get("timezone"));
+ }
+
+ public void testDoGeoLookupIpUnknown() throws IOException {
+ Geocode geocode = new Geocode(defaultDatabasePath);
+
+ // Invalid or unknown IP address
+ String ip = "-";
+ assertEquals("--", geocode.getCountryCode(ip));
+
+ Map<String, Object> geoData = geocode.getGeocodedData(ip);
+ assertNotNull("Geo data cannot be null", geoData);
+ assertEquals("--", geoData.get("country_code"));
+ assertEquals("Unknown", geoData.get("continent"));
+ assertEquals("--", geoData.get("country_code"));
+ assertEquals("Unknown", geoData.get("country"));
+ assertEquals("Unknown", geoData.get("city"));
+ assertEquals("Unknown", geoData.get("postal_code"));
+ assertEquals(-1, geoData.get("latitude"));
+ assertEquals(-1, geoData.get("longitude"));
+ assertEquals("Unknown", geoData.get("timezone"));
+ }
+
+ public void testDoGeoLookupWithNull() throws IOException {
+ Geocode geocode = new Geocode(defaultDatabasePath);
+
+ // Invalid IP address
+ String ip = null;
+ assertEquals("--", geocode.getCountryCode(ip));
+
+ Map<String, Object> geoData = geocode.getGeocodedData(ip);
+ assertNotNull("Geo data cannot be null", geoData);
+ assertEquals("Unknown", geoData.get("continent"));
+ assertEquals("--", geoData.get("country_code"));
+ assertEquals("Unknown", geoData.get("country"));
+ assertEquals("Unknown", geoData.get("city"));
+ assertEquals("Unknown", geoData.get("postal_code"));
+ assertEquals(-1, geoData.get("latitude"));
+ assertEquals(-1, geoData.get("longitude"));
+ assertEquals("Unknown", geoData.get("timezone"));
+ }
+}
\ No newline at end of file
diff --git
a/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
new file mode 100644
index 0000000..2c76336
--- /dev/null
+++
b/refinery-core/src/test/java/org/wikimedia/analytics/refinery/core/TestIpUtil.java
@@ -0,0 +1,40 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.core;
+
+import junit.framework.TestCase;
+
+public class TestIpUtil extends TestCase {
+
+ public void testGetClientIp() {
+ String clientIp = IpUtil.getClientIp(
+ "37.228.105.17", // this ip address is an
Opera proxy server
+ "101.209.27.230,%20101.209.27.230" // these are random ip
addresses
+ );
+
+ assertEquals("Return IP address is not the client's IP in XFF",
"101.209.27.230", clientIp);
+ }
+
+ public void testGetClientIpWithInvalidXFF() {
+ String clientIp = IpUtil.getClientIp(
+ "101.209.27.230",
+ "127.0.0.1,%20101.209.27.230" // invalid XFF ip addresses
+ );
+
+ assertEquals("Return IP address is not the client's IP",
"101.209.27.230", clientIp);
+ }
+}
\ No newline at end of file
diff --git
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
new file mode 100644
index 0000000..fe2d373
--- /dev/null
+++
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/ClientIpUDF.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.hive;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.Text;
+
+import org.wikimedia.analytics.refinery.core.IpUtil;
+
+/**
+ * A Hive UDF to extract client IP given the values from remote_addr and XFF
headers.
+ * <p>
+ * Hive Usage:
+ * ADD JAR /path/to/refinery-hive.jar;
+ * CREATE TEMPORARY FUNCTION client_ip as
'org.wikimedia.analytics.refinery.hive.ClientIpUDF';
+ * SELECT client_ip(ip, xff) from webrequest where year = 2014 limit 10;
+ */
+public class ClientIpUDF extends UDF {
+
+ public Text evaluate(Text ip, Text xff) {
+ return new Text(IpUtil.getClientIp(ip.toString(), xff.toString()));
+ }
+}
\ No newline at end of file
diff --git
a/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
new file mode 100644
index 0000000..a533d09
--- /dev/null
+++
b/refinery-hive/src/main/java/org/wikimedia/analytics/refinery/hive/GeocodedCountryUDF.java
@@ -0,0 +1,64 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.wikimedia.analytics.refinery.hive;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.Text;
+import org.wikimedia.analytics.refinery.core.Geocode;
+
+import java.io.IOException;
+
+/**
+ * A Hive UDF to lookup country codes from IP addresses.
+ * <p>
+ * Hive Usage:
+ * ADD JAR /path/to/refinery-hive.jar;
+ * ADD FILE /usr/share/GeoIP/GeoLite2-City.mmdb;
+ * CREATE TEMPORARY FUNCTION geocode_country as
'org.wikimedia.analytics.refinery.hive.GeocodedCountryUDF';
+ * -- Specify CWD relative paths for the database files. When you do the
above 'ADD JAR's, Hive
+ * -- will put these files in the job's working directory and need to
reference them relative to that.
+ * SELECT geocode_country(ip, GeoLite2-City.mmdb) from webrequest where year
= 2014 limit 10;
+ *
+ * TODO: Figure out a nice way to make default database paths just work in the
Analytics Cluster.
+ */
+public class GeocodedCountryUDF extends UDF {
+
+ private Geocode geocode;
+
+ private final Text emptyString = new Text("");
+ private Text currentPath = emptyString;
+
+ public void init(Text databasePath) throws IOException {
+ geocode = new Geocode(databasePath.toString());
+ }
+
+ public Text evaluate(Text ip, Text databasePath) throws IOException {
+ if (geocode == null || !databasePath.equals(currentPath)) {
+ /*
+ either geocode has not been initialized or the user has passed a
different database path than the one
+ we have already initialized with
+ */
+ init(databasePath);
+ currentPath = databasePath;
+ }
+ return new Text(geocode.getCountryCode(ip.toString()));
+ }
+
+ public Text evaluate(Text ip) throws IOException {
+ return evaluate(ip, emptyString);
+ }
+}
diff --git
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
new file mode 100644
index 0000000..e383e8e
--- /dev/null
+++
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestClientIpUDF.java
@@ -0,0 +1,36 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.Text;
+
+public class TestClientIpUDF extends TestCase {
+
+ public void testEvaluate() {
+ ClientIpUDF clientIpUDF = new ClientIpUDF();
+ Text ip = new Text("208.80.154.133");
+ Text xff = new Text("127.0.0.1,96.56.123.2");
+ assertEquals("96.56.123.2", clientIpUDF.evaluate(ip, xff).toString());
+ }
+
+ public void testEvaluateWithInvalidXFF() {
+ ClientIpUDF clientIpUDF = new ClientIpUDF();
+ Text ip = new Text("208.80.154.133");
+ Text xff = new Text("127.0.0.1,%20101.209.27.230");
+ assertEquals("208.80.154.133", clientIpUDF.evaluate(ip,
xff).toString());
+ }
+}
diff --git
a/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
new file mode 100644
index 0000000..83fbfdd
--- /dev/null
+++
b/refinery-hive/src/test/java/org/wikimedia/analytics/refinery/hive/TestGeocodedCountryUDF.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright (C) 2014 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.wikimedia.analytics.refinery.hive;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+
+public class TestGeocodedCountryUDF extends TestCase {
+
+ public void testEvaluate() throws IOException {
+ GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+ Text result = geocodedCountryUDF.evaluate(new Text("96.56.123.2"));
+ assertEquals("US", result.toString());
+ }
+
+ public void testEvaluateWithValidDatabase() throws IOException {
+ GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+ Text result = geocodedCountryUDF.evaluate(new Text("96.56.123.2"),
+ new Text("/usr/share/GeoIP/GeoLite2-City.mmdb"));
+ assertEquals("US", result.toString());
+ }
+
+ public void testEvaluateWithInvalidDatabase() {
+ GeocodedCountryUDF geocodedCountryUDF = new GeocodedCountryUDF();
+ try {
+ geocodedCountryUDF.evaluate(new Text("96.56.123.2"), new
Text("/usr/share/GeoIP/non-existent.mmdb"));
+ fail("Should throw an IOException for non-existent database");
+ } catch (IOException ex) {
+ }
+ }
+}
--
To view, visit https://gerrit.wikimedia.org/r/183551
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I6dd39300173cd378b1836649f926b21ec27cc032
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery/source
Gerrit-Branch: master
Gerrit-Owner: Ananthrk <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits