This is an automated email from the ASF dual-hosted git repository. vbalaji pushed a commit to branch hackathon-0619 in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/hackathon-0619 by this push: new ae76c0f fixing pom dependencies and cleaning other stuff ae76c0f is described below commit ae76c0f4dcfc33d2758cdf680e6a32f0262ee014 Author: Nishith Agarwal <nagar...@uber.com> AuthorDate: Tue Jun 18 23:31:32 2019 -0700 fixing pom dependencies and cleaning other stuff --- hoodie-cli/pom.xml | 12 - hoodie-client/pom.xml | 1 + .../com/uber/hoodie/io/HoodieAppendHandle.java | 2 +- .../io/strategy/TestHoodieCompactionStrategy.java | 4 +- hoodie-common/pom.xml | 35 +- .../hoodie/common/util/DefaultSizeEstimator.java | 2 - .../common/util/HoodieRecordSizeEstimator.java | 1 - .../hoodie/common/util/ObjectSizeCalculator.java | 478 +++++++++++++++++++++ .../util/collection/ExternalSpillableMap.java | 2 +- hoodie-hadoop-mr/pom.xml | 11 +- hoodie-hive/pom.xml | 19 +- hoodie-spark/pom.xml | 9 +- hoodie-timeline-service/pom.xml | 5 - hoodie-utilities/pom.xml | 55 +-- pom.xml | 13 + 15 files changed, 524 insertions(+), 125 deletions(-) diff --git a/hoodie-cli/pom.xml b/hoodie-cli/pom.xml index 6c93edc..77d385c 100644 --- a/hoodie-cli/pom.xml +++ b/hoodie-cli/pom.xml @@ -30,7 +30,6 @@ <spring.shell.version>1.2.0.RELEASE</spring.shell.version> <jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass> <log4j.version>1.2.17</log4j.version> - <junit.version>4.10</junit.version> <notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir> </properties> @@ -146,11 +145,6 @@ <version>${spring.shell.version}</version> </dependency> <dependency> - <groupId>de.vandermeer</groupId> - <artifactId>asciitable</artifactId> - <version>0.2.5</version> - </dependency> - <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> </dependency> @@ -203,12 +197,6 @@ <artifactId>hoodie-common</artifactId> <version>${project.version}</version> </dependency> - <dependency> - <groupId>junit</groupId> - <artifactId>junit-dep</artifactId> - <version>${junit.version}</version> - <scope>test</scope> - </dependency> <dependency> <groupId>commons-dbcp</groupId> diff --git a/hoodie-client/pom.xml b/hoodie-client/pom.xml index 2995d38..39ff4ea 100644 --- a/hoodie-client/pom.xml +++ b/hoodie-client/pom.xml @@ -143,6 +143,7 @@ <groupId>com.beust</groupId> <artifactId>jcommander</artifactId> <version>1.48</version> + <scope>test</scope> </dependency> <!-- Parent dependencies --> diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java index cc98f56..faf1887 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java @@ -18,7 +18,7 @@ package com.uber.hoodie.io; -import com.beust.jcommander.internal.Maps; +import com.google.common.collect.Maps; import com.uber.hoodie.WriteStatus; import com.uber.hoodie.common.model.FileSlice; import com.uber.hoodie.common.model.HoodieDeltaWriteStat; diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java index 0962bd0..428eae3 100644 --- a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java +++ b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java @@ -21,8 +21,8 @@ package com.uber.hoodie.io.strategy; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import com.beust.jcommander.internal.Lists; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.uber.hoodie.avro.model.HoodieCompactionOperation; import com.uber.hoodie.common.model.HoodieDataFile; @@ -248,7 +248,7 @@ public class TestHoodieCompactionStrategy { private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config, Map<Long, List<Long>> sizesMap, Map<Long, String> keyToPartitionMap) { - List<HoodieCompactionOperation> operations = Lists.newArrayList(sizesMap.size()); + List<HoodieCompactionOperation> operations = new ArrayList<>(); sizesMap.forEach((k, v) -> { HoodieDataFile df = TestHoodieDataFile.newDataFile(k); diff --git a/hoodie-common/pom.xml b/hoodie-common/pom.xml index e9ece6b..79d14d1 100644 --- a/hoodie-common/pom.xml +++ b/hoodie-common/pom.xml @@ -84,10 +84,6 @@ <artifactId>rocksdbjni</artifactId> </dependency> <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - </dependency> - <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <exclusions> @@ -100,13 +96,11 @@ <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>${junit.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> - <version>${fasterxml.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> @@ -136,30 +130,14 @@ <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> - <version>4.5.4</version> - </dependency> - <dependency> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>fluent-hc</artifactId> - <version>4.5.4</version> - </dependency> - <dependency> - <groupId>com.esotericsoftware</groupId> - <artifactId>kryo</artifactId> </dependency> <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-mapred</artifactId> - <exclusions> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> </dependency> <dependency> <groupId>com.github.stefanbirkner</groupId> @@ -168,9 +146,12 @@ <scope>test</scope> </dependency> <dependency> - <groupId>com.twitter.common</groupId> - <artifactId>objectsize</artifactId> - <version>0.0.12</version> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-mapred</artifactId> </dependency> <dependency> <groupId>com.esotericsoftware</groupId> diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/DefaultSizeEstimator.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/DefaultSizeEstimator.java index b56ff23..66b0954 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/DefaultSizeEstimator.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/DefaultSizeEstimator.java @@ -18,8 +18,6 @@ package com.uber.hoodie.common.util; -import com.twitter.common.objectsize.ObjectSizeCalculator; - /** * Default implementation of size-estimator that uses Twitter's ObjectSizeCalculator * @param <T> diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java index ed024ed..9ccf152 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java @@ -18,7 +18,6 @@ package com.uber.hoodie.common.util; -import com.twitter.common.objectsize.ObjectSizeCalculator; import com.uber.hoodie.common.model.HoodieRecord; import com.uber.hoodie.common.model.HoodieRecordPayload; import org.apache.avro.Schema; diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/ObjectSizeCalculator.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ObjectSizeCalculator.java new file mode 100644 index 0000000..2c12502 --- /dev/null +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ObjectSizeCalculator.java @@ -0,0 +1,478 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// COPIED FROM https://github.com/twitter/commons/blob/master/src/java/com/twitter/common/objectsize/ +// ObjectSizeCalculator.java +// ================================================================================================= +// Copyright 2011 Twitter, Inc. +// ------------------------------------------------------------------------------------------------- +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this work except in compliance with the License. +// You may obtain a copy of the License in the LICENSE file, or at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ================================================================================================= + +package com.uber.hoodie.common.util; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.collect.Sets; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.lang.reflect.Array; +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +/** + * Contains utility methods for calculating the memory usage of objects. It + * only works on the HotSpot JVM, and infers the actual memory layout (32 bit + * vs. 64 bit word size, compressed object pointers vs. uncompressed) from + * best available indicators. It can reliably detect a 32 bit vs. 64 bit JVM. + * It can only make an educated guess at whether compressed OOPs are used, + * though; specifically, it knows what the JVM's default choice of OOP + * compression would be based on HotSpot version and maximum heap sizes, but if + * the choice is explicitly overridden with the <tt>-XX:{+|-}UseCompressedOops</tt> command line + * switch, it can not detect + * this fact and will report incorrect sizes, as it will presume the default JVM + * behavior. + * + * @author Attila Szegedi + */ +public class ObjectSizeCalculator { + + /** + * Describes constant memory overheads for various constructs in a JVM implementation. + */ + public interface MemoryLayoutSpecification { + + /** + * Returns the fixed overhead of an array of any type or length in this JVM. + * + * @return the fixed overhead of an array. + */ + int getArrayHeaderSize(); + + /** + * Returns the fixed overhead of for any {@link Object} subclass in this JVM. + * + * @return the fixed overhead of any object. + */ + int getObjectHeaderSize(); + + /** + * Returns the quantum field size for a field owned by an object in this JVM. + * + * @return the quantum field size for an object. + */ + int getObjectPadding(); + + /** + * Returns the fixed size of an object reference in this JVM. + * + * @return the size of all object references. + */ + int getReferenceSize(); + + /** + * Returns the quantum field size for a field owned by one of an object's ancestor superclasses + * in this JVM. + * + * @return the quantum field size for a superclass field. + */ + int getSuperclassFieldPadding(); + } + + private static class CurrentLayout { + + private static final MemoryLayoutSpecification SPEC = + getEffectiveMemoryLayoutSpecification(); + } + + /** + * Given an object, returns the total allocated size, in bytes, of the object + * and all other objects reachable from it. Attempts to to detect the current JVM memory layout, + * but may fail with {@link UnsupportedOperationException}; + * + * @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do + * anything special, it measures the size of all objects + * reachable through it (which will include its class loader, and by + * extension, all other Class objects loaded by + * the same loader, and all the parent class loaders). It doesn't provide the + * size of the static fields in the JVM class that the Class object + * represents. + * @return the total allocated size of the object and all other objects it + * retains. + * @throws UnsupportedOperationException if the current vm memory layout cannot be detected. + */ + public static long getObjectSize(Object obj) throws UnsupportedOperationException { + return obj == null ? 0 : new ObjectSizeCalculator(CurrentLayout.SPEC).calculateObjectSize(obj); + } + + // Fixed object header size for arrays. + private final int arrayHeaderSize; + // Fixed object header size for non-array objects. + private final int objectHeaderSize; + // Padding for the object size - if the object size is not an exact multiple + // of this, it is padded to the next multiple. + private final int objectPadding; + // Size of reference (pointer) fields. + private final int referenceSize; + // Padding for the fields of superclass before fields of subclasses are + // added. + private final int superclassFieldPadding; + + private final LoadingCache<Class<?>, ClassSizeInfo> classSizeInfos = + CacheBuilder.newBuilder().build(new CacheLoader<Class<?>, ClassSizeInfo>() { + public ClassSizeInfo load(Class<?> clazz) { + return new ClassSizeInfo(clazz); + } + }); + + + private final Set<Object> alreadyVisited = Sets.newIdentityHashSet(); + private final Deque<Object> pending = new ArrayDeque<Object>(16 * 1024); + private long size; + + /** + * Creates an object size calculator that can calculate object sizes for a given + * {@code memoryLayoutSpecification}. + * + * @param memoryLayoutSpecification a description of the JVM memory layout. + */ + public ObjectSizeCalculator(MemoryLayoutSpecification memoryLayoutSpecification) { + Preconditions.checkNotNull(memoryLayoutSpecification); + arrayHeaderSize = memoryLayoutSpecification.getArrayHeaderSize(); + objectHeaderSize = memoryLayoutSpecification.getObjectHeaderSize(); + objectPadding = memoryLayoutSpecification.getObjectPadding(); + referenceSize = memoryLayoutSpecification.getReferenceSize(); + superclassFieldPadding = memoryLayoutSpecification.getSuperclassFieldPadding(); + } + + /** + * Given an object, returns the total allocated size, in bytes, of the object + * and all other objects reachable from it. + * + * @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do + * anything special, it measures the size of all objects + * reachable through it (which will include its class loader, and by + * extension, all other Class objects loaded by + * the same loader, and all the parent class loaders). It doesn't provide the + * size of the static fields in the JVM class that the Class object + * represents. + * @return the total allocated size of the object and all other objects it + * retains. + */ + public synchronized long calculateObjectSize(Object obj) { + // Breadth-first traversal instead of naive depth-first with recursive + // implementation, so we don't blow the stack traversing long linked lists. + try { + for (; ; ) { + visit(obj); + if (pending.isEmpty()) { + return size; + } + obj = pending.removeFirst(); + } + } finally { + alreadyVisited.clear(); + pending.clear(); + size = 0; + } + } + + private void visit(Object obj) { + if (alreadyVisited.contains(obj)) { + return; + } + final Class<?> clazz = obj.getClass(); + if (clazz == ArrayElementsVisitor.class) { + ((ArrayElementsVisitor) obj).visit(this); + } else { + alreadyVisited.add(obj); + if (clazz.isArray()) { + visitArray(obj); + } else { + classSizeInfos.getUnchecked(clazz).visit(obj, this); + } + } + } + + private void visitArray(Object array) { + final Class<?> componentType = array.getClass().getComponentType(); + final int length = Array.getLength(array); + if (componentType.isPrimitive()) { + increaseByArraySize(length, getPrimitiveFieldSize(componentType)); + } else { + increaseByArraySize(length, referenceSize); + // If we didn't use an ArrayElementsVisitor, we would be enqueueing every + // element of the array here instead. For large arrays, it would + // tremendously enlarge the queue. In essence, we're compressing it into + // a small command object instead. This is different than immediately + // visiting the elements, as their visiting is scheduled for the end of + // the current queue. + switch (length) { + case 0: { + break; + } + case 1: { + enqueue(Array.get(array, 0)); + break; + } + default: { + enqueue(new ArrayElementsVisitor((Object[]) array)); + } + } + } + } + + private void increaseByArraySize(int length, long elementSize) { + increaseSize(roundTo(arrayHeaderSize + length * elementSize, objectPadding)); + } + + private static class ArrayElementsVisitor { + + private final Object[] array; + + ArrayElementsVisitor(Object[] array) { + this.array = array; + } + + public void visit(ObjectSizeCalculator calc) { + for (Object elem : array) { + if (elem != null) { + calc.visit(elem); + } + } + } + } + + void enqueue(Object obj) { + if (obj != null) { + pending.addLast(obj); + } + } + + void increaseSize(long objectSize) { + size += objectSize; + } + + @VisibleForTesting + static long roundTo(long x, int multiple) { + return ((x + multiple - 1) / multiple) * multiple; + } + + private class ClassSizeInfo { + + // Padded fields + header size + private final long objectSize; + // Only the fields size - used to calculate the subclasses' memory + // footprint. + private final long fieldsSize; + private final Field[] referenceFields; + + public ClassSizeInfo(Class<?> clazz) { + long fieldsSize = 0; + final List<Field> referenceFields = new LinkedList<Field>(); + for (Field f : clazz.getDeclaredFields()) { + if (Modifier.isStatic(f.getModifiers())) { + continue; + } + final Class<?> type = f.getType(); + if (type.isPrimitive()) { + fieldsSize += getPrimitiveFieldSize(type); + } else { + f.setAccessible(true); + referenceFields.add(f); + fieldsSize += referenceSize; + } + } + final Class<?> superClass = clazz.getSuperclass(); + if (superClass != null) { + final ClassSizeInfo superClassInfo = classSizeInfos.getUnchecked(superClass); + fieldsSize += roundTo(superClassInfo.fieldsSize, superclassFieldPadding); + referenceFields.addAll(Arrays.asList(superClassInfo.referenceFields)); + } + this.fieldsSize = fieldsSize; + this.objectSize = roundTo(objectHeaderSize + fieldsSize, objectPadding); + this.referenceFields = referenceFields.toArray( + new Field[referenceFields.size()]); + } + + void visit(Object obj, ObjectSizeCalculator calc) { + calc.increaseSize(objectSize); + enqueueReferencedObjects(obj, calc); + } + + public void enqueueReferencedObjects(Object obj, ObjectSizeCalculator calc) { + for (Field f : referenceFields) { + try { + calc.enqueue(f.get(obj)); + } catch (IllegalAccessException e) { + final AssertionError ae = new AssertionError( + "Unexpected denial of access to " + f); + ae.initCause(e); + throw ae; + } + } + } + } + + private static long getPrimitiveFieldSize(Class<?> type) { + if (type == boolean.class || type == byte.class) { + return 1; + } + if (type == char.class || type == short.class) { + return 2; + } + if (type == int.class || type == float.class) { + return 4; + } + if (type == long.class || type == double.class) { + return 8; + } + throw new AssertionError("Encountered unexpected primitive type " + + type.getName()); + } + + @VisibleForTesting + static MemoryLayoutSpecification getEffectiveMemoryLayoutSpecification() { + final String vmName = System.getProperty("java.vm.name"); + if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ") + || vmName.startsWith("OpenJDK") || vmName.startsWith("TwitterJDK"))) { + throw new UnsupportedOperationException( + "ObjectSizeCalculator only supported on HotSpot VM"); + } + + final String dataModel = System.getProperty("sun.arch.data.model"); + if ("32".equals(dataModel)) { + // Running with 32-bit data model + return new MemoryLayoutSpecification() { + @Override + public int getArrayHeaderSize() { + return 12; + } + + @Override + public int getObjectHeaderSize() { + return 8; + } + + @Override + public int getObjectPadding() { + return 8; + } + + @Override + public int getReferenceSize() { + return 4; + } + + @Override + public int getSuperclassFieldPadding() { + return 4; + } + }; + } else if (!"64".equals(dataModel)) { + throw new UnsupportedOperationException("Unrecognized value '" + + dataModel + "' of sun.arch.data.model system property"); + } + + final String strVmVersion = System.getProperty("java.vm.version"); + final int vmVersion = Integer.parseInt(strVmVersion.substring(0, + strVmVersion.indexOf('.'))); + if (vmVersion >= 17) { + long maxMemory = 0; + for (MemoryPoolMXBean mp : ManagementFactory.getMemoryPoolMXBeans()) { + maxMemory += mp.getUsage().getMax(); + } + if (maxMemory < 30L * 1024 * 1024 * 1024) { + // HotSpot 17.0 and above use compressed OOPs below 30GB of RAM total + // for all memory pools (yes, including code cache). + return new MemoryLayoutSpecification() { + @Override + public int getArrayHeaderSize() { + return 16; + } + + @Override + public int getObjectHeaderSize() { + return 12; + } + + @Override + public int getObjectPadding() { + return 8; + } + + @Override + public int getReferenceSize() { + return 4; + } + + @Override + public int getSuperclassFieldPadding() { + return 4; + } + }; + } + } + + // In other cases, it's a 64-bit uncompressed OOPs object model + return new MemoryLayoutSpecification() { + @Override + public int getArrayHeaderSize() { + return 24; + } + + @Override + public int getObjectHeaderSize() { + return 16; + } + + @Override + public int getObjectPadding() { + return 8; + } + + @Override + public int getReferenceSize() { + return 8; + } + + @Override + public int getSuperclassFieldPadding() { + return 8; + } + }; + } +} \ No newline at end of file diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java index 91095e2..4487e97 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java @@ -18,7 +18,7 @@ package com.uber.hoodie.common.util.collection; -import com.twitter.common.objectsize.ObjectSizeCalculator; +import com.uber.hoodie.common.util.ObjectSizeCalculator; import com.uber.hoodie.common.util.SizeEstimator; import com.uber.hoodie.exception.HoodieIOException; import java.io.IOException; diff --git a/hoodie-hadoop-mr/pom.xml b/hoodie-hadoop-mr/pom.xml index 0c866ff..94e7964 100644 --- a/hoodie-hadoop-mr/pom.xml +++ b/hoodie-hadoop-mr/pom.xml @@ -34,6 +34,7 @@ <groupId>com.uber.hoodie</groupId> <artifactId>hoodie-common</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>com.uber.hoodie</groupId> @@ -89,20 +90,10 @@ <artifactId>parquet-avro</artifactId> </dependency> <dependency> - <groupId>com.twitter.common</groupId> - <artifactId>objectsize</artifactId> - <version>0.0.12</version> - </dependency> - <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> </dependency> <dependency> - <groupId>com.esotericsoftware</groupId> - <artifactId>kryo</artifactId> - <scope>test</scope> - </dependency> - <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> diff --git a/hoodie-hive/pom.xml b/hoodie-hive/pom.xml index f6622b3..6fc16b2 100644 --- a/hoodie-hive/pom.xml +++ b/hoodie-hive/pom.xml @@ -55,6 +55,7 @@ <groupId>org.apache.thrift</groupId> <artifactId>libthrift</artifactId> <version>${thrift.version}</version> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.parquet</groupId> @@ -94,16 +95,6 @@ <artifactId>jcommander</artifactId> </dependency> - <dependency> - <groupId>org.apache.httpcomponents</groupId> - <artifactId>httpcore</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.httpcomponents</groupId> - <artifactId>httpclient</artifactId> - </dependency> - <!-- Hadoop Testing --> <dependency> <groupId>junit</groupId> @@ -144,11 +135,13 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <classifier>tests</classifier> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <classifier>tests</classifier> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> @@ -183,12 +176,6 @@ <classifier>tests</classifier> <scope>test</scope> </dependency> - <dependency> - <groupId>com.esotericsoftware.kryo</groupId> - <artifactId>kryo</artifactId> - <version>2.21</version> - <scope>test</scope> - </dependency> </dependencies> diff --git a/hoodie-spark/pom.xml b/hoodie-spark/pom.xml index 1e238c5..3da1122 100644 --- a/hoodie-spark/pom.xml +++ b/hoodie-spark/pom.xml @@ -179,6 +179,7 @@ <groupId>com.databricks</groupId> <artifactId>spark-avro_2.11</artifactId> <version>4.0.0</version> + <scope>provided</scope> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> @@ -217,11 +218,6 @@ </dependency> <dependency> - <groupId>org.apache.commons</groupId> - <artifactId>commons-configuration2</artifactId> - </dependency> - - <dependency> <groupId>${hive.groupid}</groupId> <artifactId>hive-service</artifactId> <version>${hive.version}</version> @@ -271,8 +267,7 @@ </dependency> <dependency> <groupId>junit</groupId> - <artifactId>junit-dep</artifactId> - <version>${junit.version}</version> + <artifactId>junit</artifactId> <scope>test</scope> </dependency> <dependency> diff --git a/hoodie-timeline-service/pom.xml b/hoodie-timeline-service/pom.xml index acafc32..321fd34 100644 --- a/hoodie-timeline-service/pom.xml +++ b/hoodie-timeline-service/pom.xml @@ -171,11 +171,6 @@ </exclusions> </dependency> <dependency> - <groupId>com.esotericsoftware</groupId> - <artifactId>kryo</artifactId> - <scope>test</scope> - </dependency> - <dependency> <groupId>org.mockito</groupId> <artifactId>mockito-all</artifactId> <version>1.10.19</version> diff --git a/hoodie-utilities/pom.xml b/hoodie-utilities/pom.xml index f1fc092..470e5b2 100644 --- a/hoodie-utilities/pom.xml +++ b/hoodie-utilities/pom.xml @@ -64,17 +64,6 @@ </repositories> <dependencies> - <dependency> - <groupId>io.javalin</groupId> - <artifactId>javalin</artifactId> - <version>2.4.0</version> - <exclusions> - <exclusion> - <groupId>org.eclipse.jetty</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> - </dependency> <dependency> <groupId>io.dropwizard.metrics</groupId> @@ -123,20 +112,16 @@ </dependency> <dependency> - <groupId>org.eclipse.jetty</groupId> - <artifactId>jetty-server</artifactId> - <version>7.6.0.v20120127</version> - </dependency> - - <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <classifier>tests</classifier> + <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <classifier>tests</classifier> + <scope>test</scope> <exclusions> <exclusion> <groupId>org.mortbay.jetty</groupId> @@ -166,6 +151,10 @@ <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> </exclusion> + <exclusion> + <groupId>org.eclipse.jetty.orbit</groupId> + <artifactId>javax.servlet</artifactId> + </exclusion> </exclusions> </dependency> @@ -210,19 +199,6 @@ </dependency> <dependency> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> - </dependency> - <dependency> - <groupId>commons-dbcp</groupId> - <artifactId>commons-dbcp</artifactId> - </dependency> - <dependency> - <groupId>commons-pool</groupId> - <artifactId>commons-pool</artifactId> - </dependency> - - <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpcore</artifactId> </dependency> @@ -231,10 +207,6 @@ <groupId>log4j</groupId> <artifactId>log4j</artifactId> </dependency> - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - </dependency> <dependency> <groupId>org.apache.hadoop</groupId> @@ -275,12 +247,6 @@ </dependency> <dependency> - <groupId>com.yammer.metrics</groupId> - <artifactId>metrics-core</artifactId> - <version>2.2.0</version> - </dependency> - - <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>${spark.version}</version> @@ -291,6 +257,14 @@ <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-8_2.11</artifactId> <version>${spark.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>com.databricks</groupId> + <artifactId>spark-avro_2.11</artifactId> + <version>4.0.0</version> + <scope>provided</scope> </dependency> <!-- Used for SQL templating --> @@ -314,7 +288,6 @@ <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro-mapred</artifactId> - <version>1.7.7</version> </dependency> <dependency> diff --git a/pom.xml b/pom.xml index 7648a74..43933a3 100644 --- a/pom.xml +++ b/pom.xml @@ -514,8 +514,21 @@ <groupId>org.apache.avro</groupId> <artifactId>avro-mapred</artifactId> <version>${avro.version}</version> + <scope>provided</scope> + <exclusions> + <exclusion> + <groupId>org.mortbay.jetty</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>${avro.version}</version> + <scope>provided</scope> + </dependency> <!-- we have to stay at <= 16.0, due to issues with HBase client --> <dependency> <groupId>com.google.guava</groupId>