CRUNCH-124: Add a TableSourceTarget interface and use it to abstract away the specific impls in the At factory methods
Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/8ce493ae Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/8ce493ae Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/8ce493ae Branch: refs/heads/master Commit: 8ce493aebbb0f5e724e62cb880ac1dd977356f75 Parents: a988ce2 Author: Josh Wills <[email protected]> Authored: Tue Dec 4 20:52:00 2012 -0800 Committer: Josh Wills <[email protected]> Committed: Tue Dec 4 20:52:00 2012 -0800 ---------------------------------------------------------------------- .../main/java/org/apache/crunch/SourceTarget.java | 2 +- .../java/org/apache/crunch/TableSourceTarget.java | 25 ++++++ crunch/src/main/java/org/apache/crunch/io/At.java | 65 ++++++++------- .../crunch/io/seq/SeqFileTableSourceTarget.java | 4 +- .../crunch/io/text/TextFileTableSourceTarget.java | 4 +- 5 files changed, 63 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/8ce493ae/crunch/src/main/java/org/apache/crunch/SourceTarget.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/SourceTarget.java b/crunch/src/main/java/org/apache/crunch/SourceTarget.java index 5d0cddc..09c03c6 100644 --- a/crunch/src/main/java/org/apache/crunch/SourceTarget.java +++ b/crunch/src/main/java/org/apache/crunch/SourceTarget.java @@ -20,7 +20,7 @@ package org.apache.crunch; /** * An interface for classes that implement both the {@code Source} and the * {@code Target} interfaces. - * + * */ public interface SourceTarget<T> extends Source<T>, Target { } http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/8ce493ae/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java new file mode 100644 index 0000000..9b1ed34 --- /dev/null +++ b/crunch/src/main/java/org/apache/crunch/TableSourceTarget.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch; + +/** + * An interface for classes that implement both the {@code TableSource} and the + * {@code Target} interfaces. + */ +public interface TableSourceTarget<K, V> extends TableSource<K, V>, SourceTarget<Pair<K, V>> { +} http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/8ce493ae/crunch/src/main/java/org/apache/crunch/io/At.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/io/At.java b/crunch/src/main/java/org/apache/crunch/io/At.java index 702e3be..d91647b 100644 --- a/crunch/src/main/java/org/apache/crunch/io/At.java +++ b/crunch/src/main/java/org/apache/crunch/io/At.java @@ -19,6 +19,7 @@ package org.apache.crunch.io; import org.apache.avro.specific.SpecificRecord; import org.apache.crunch.SourceTarget; +import org.apache.crunch.TableSourceTarget; import org.apache.crunch.io.avro.AvroFileSourceTarget; import org.apache.crunch.io.seq.SeqFileSourceTarget; import org.apache.crunch.io.seq.SeqFileTableSourceTarget; @@ -42,9 +43,9 @@ public class At { * * @param pathName The name of the path to the data on the filesystem * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file - * @return A new {@code AvroFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T extends SpecificRecord> AvroFileSourceTarget<T> avroFile(String pathName, Class<T> avroClass) { + public static <T extends SpecificRecord> SourceTarget<T> avroFile(String pathName, Class<T> avroClass) { return avroFile(new Path(pathName), avroClass); } @@ -53,9 +54,9 @@ public class At { * * @param path The {@code Path} to the data * @param avroClass The subclass of {@code SpecificRecord} to use for the Avro file - * @return A new {@code AvroFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T extends SpecificRecord> AvroFileSourceTarget<T> avroFile(Path path, Class<T> avroClass) { + public static <T extends SpecificRecord> SourceTarget<T> avroFile(Path path, Class<T> avroClass) { return avroFile(path, Avros.specifics(avroClass)); } @@ -64,9 +65,9 @@ public class At { * * @param pathName The name of the path to the data on the filesystem * @param avroType The {@code AvroType} for the Avro records - * @return A new {@code AvroFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> AvroFileSourceTarget<T> avroFile(String pathName, AvroType<T> avroType) { + public static <T> SourceTarget<T> avroFile(String pathName, AvroType<T> avroType) { return avroFile(new Path(pathName), avroType); } @@ -75,9 +76,9 @@ public class At { * * @param path The {@code Path} to the data * @param avroType The {@code AvroType} for the Avro records - * @return A new {@code AvroFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> AvroFileSourceTarget<T> avroFile(Path path, AvroType<T> avroType) { + public static <T> SourceTarget<T> avroFile(Path path, AvroType<T> avroType) { return new AvroFileSourceTarget<T>(path, avroType); } @@ -87,9 +88,9 @@ public class At { * * @param pathName The name of the path to the data on the filesystem * @param valueClass The {@code Writable} type for the value of the SequenceFile entry - * @return A new {@code SeqFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T extends Writable> SeqFileSourceTarget<T> sequenceFile(String pathName, Class<T> valueClass) { + public static <T extends Writable> SourceTarget<T> sequenceFile(String pathName, Class<T> valueClass) { return sequenceFile(new Path(pathName), valueClass); } @@ -99,9 +100,9 @@ public class At { * * @param path The {@code Path} to the data * @param valueClass The {@code Writable} type for the value of the SequenceFile entry - * @return A new {@code SeqFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T extends Writable> SeqFileSourceTarget<T> sequenceFile(Path path, Class<T> valueClass) { + public static <T extends Writable> SourceTarget<T> sequenceFile(Path path, Class<T> valueClass) { return sequenceFile(path, Writables.writables(valueClass)); } @@ -111,9 +112,9 @@ public class At { * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code PType} for the value of the SequenceFile entry - * @return A new {@code SeqFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> SeqFileSourceTarget<T> sequenceFile(String pathName, PType<T> ptype) { + public static <T> SourceTarget<T> sequenceFile(String pathName, PType<T> ptype) { return sequenceFile(new Path(pathName), ptype); } @@ -123,9 +124,9 @@ public class At { * * @param path The {@code Path} to the data * @param ptype The {@code PType} for the value of the SequenceFile entry - * @return A new {@code SeqFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> SeqFileSourceTarget<T> sequenceFile(Path path, PType<T> ptype) { + public static <T> SourceTarget<T> sequenceFile(Path path, PType<T> ptype) { return new SeqFileSourceTarget<T>(path, ptype); } @@ -136,9 +137,9 @@ public class At { * @param pathName The name of the path to the data on the filesystem * @param keyClass The {@code Writable} type for the key of the SequenceFile entry * @param valueClass The {@code Writable} type for the value of the SequenceFile entry - * @return A new {@code SeqFileTableSourceTarget<K, V>} instance + * @return A new {@code TableSourceTarget<K, V>} instance */ - public static <K extends Writable, V extends Writable> SeqFileTableSourceTarget<K, V> sequenceFile( + public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile( String pathName, Class<K> keyClass, Class<V> valueClass) { return sequenceFile(new Path(pathName), keyClass, valueClass); } @@ -150,9 +151,9 @@ public class At { * @param path The {@code Path} to the data * @param keyClass The {@code Writable} type for the key of the SequenceFile entry * @param valueClass The {@code Writable} type for the value of the SequenceFile entry - * @return A new {@code SeqFileTableSourceTarget<K, V>} instance + * @return A new {@code TableSourceTarget<K, V>} instance */ - public static <K extends Writable, V extends Writable> SeqFileTableSourceTarget<K, V> sequenceFile( + public static <K extends Writable, V extends Writable> TableSourceTarget<K, V> sequenceFile( Path path, Class<K> keyClass, Class<V> valueClass) { return sequenceFile(path, Writables.writables(keyClass), Writables.writables(valueClass)); } @@ -164,9 +165,9 @@ public class At { * @param pathName The name of the path to the data on the filesystem * @param keyType The {@code PType} for the key of the SequenceFile entry * @param valueType The {@code PType} for the value of the SequenceFile entry - * @return A new {@code SeqFileTableSourceTarget<K, V>} instance + * @return A new {@code TableSourceTarget<K, V>} instance */ - public static <K, V> SeqFileTableSourceTarget<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) { + public static <K, V> TableSourceTarget<K, V> sequenceFile(String pathName, PType<K> keyType, PType<V> valueType) { return sequenceFile(new Path(pathName), keyType, valueType); } @@ -177,9 +178,9 @@ public class At { * @param path The {@code Path} to the data * @param keyType The {@code PType} for the key of the SequenceFile entry * @param valueType The {@code PType} for the value of the SequenceFile entry - * @return A new {@code SeqFileTableSourceTarget<K, V>} instance + * @return A new {@code TableSourceTarget<K, V>} instance */ - public static <K, V> SeqFileTableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) { + public static <K, V> TableSourceTarget<K, V> sequenceFile(Path path, PType<K> keyType, PType<V> valueType) { PTypeFamily ptf = keyType.getFamily(); return new SeqFileTableSourceTarget<K, V>(path, ptf.tableOf(keyType, valueType)); } @@ -188,9 +189,9 @@ public class At { * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given path name. * * @param pathName The name of the path to the data on the filesystem - * @return A new {@code TextFileSourceTarget<String>} instance + * @return A new {@code SourceTarget<String>} instance */ - public static TextFileSourceTarget<String> textFile(String pathName) { + public static SourceTarget<String> textFile(String pathName) { return textFile(new Path(pathName)); } @@ -198,9 +199,9 @@ public class At { * Creates a {@code SourceTarget<String>} instance for the text file(s) at the given {@code Path}. * * @param path The {@code Path} to the data - * @return A new {@code TextFileSourceTarget<String>} instance + * @return A new {@code SourceTarget<String>} instance */ - public static TextFileSourceTarget<String> textFile(Path path) { + public static SourceTarget<String> textFile(Path path) { return textFile(path, Writables.strings()); } @@ -210,9 +211,9 @@ public class At { * * @param pathName The name of the path to the data on the filesystem * @param ptype The {@code PType<T>} to use to process the input text - * @return A new {@code TextFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> TextFileSourceTarget<T> textFile(String pathName, PType<T> ptype) { + public static <T> SourceTarget<T> textFile(String pathName, PType<T> ptype) { return textFile(new Path(pathName), ptype); } @@ -222,9 +223,9 @@ public class At { * * @param path The {@code Path} to the data * @param ptype The {@code PType<T>} to use to process the input text - * @return A new {@code TextFileSourceTarget<T>} instance + * @return A new {@code SourceTarget<T>} instance */ - public static <T> TextFileSourceTarget<T> textFile(Path path, PType<T> ptype) { + public static <T> SourceTarget<T> textFile(Path path, PType<T> ptype) { return new TextFileSourceTarget<T>(path, ptype); } } http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/8ce493ae/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java index e13de1d..ebdf319 100644 --- a/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java +++ b/crunch/src/main/java/org/apache/crunch/io/seq/SeqFileTableSourceTarget.java @@ -18,7 +18,7 @@ package org.apache.crunch.io.seq; import org.apache.crunch.Pair; -import org.apache.crunch.TableSource; +import org.apache.crunch.TableSourceTarget; import org.apache.crunch.io.FileNamingScheme; import org.apache.crunch.io.SequentialFileNamingScheme; import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl; @@ -26,7 +26,7 @@ import org.apache.crunch.types.PTableType; import org.apache.hadoop.fs.Path; public class SeqFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements - TableSource<K, V> { + TableSourceTarget<K, V> { private final PTableType<K, V> tableType; public SeqFileTableSourceTarget(String path, PTableType<K, V> tableType) { http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/8ce493ae/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java b/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java index bdc83a1..dec97e5 100644 --- a/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java +++ b/crunch/src/main/java/org/apache/crunch/io/text/TextFileTableSourceTarget.java @@ -18,7 +18,7 @@ package org.apache.crunch.io.text; import org.apache.crunch.Pair; -import org.apache.crunch.TableSource; +import org.apache.crunch.TableSourceTarget; import org.apache.crunch.io.FileNamingScheme; import org.apache.crunch.io.SequentialFileNamingScheme; import org.apache.crunch.io.impl.ReadableSourcePathTargetImpl; @@ -32,7 +32,7 @@ import org.apache.hadoop.fs.Path; * the keys and the values. */ public class TextFileTableSourceTarget<K, V> extends ReadableSourcePathTargetImpl<Pair<K, V>> implements - TableSource<K, V> { + TableSourceTarget<K, V> { private final PTableType<K, V> tableType;
