hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)
Repository: hive Updated Branches: refs/heads/branch-1 92730ca5b -> 1a9ed419e HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a9ed419 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a9ed419 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a9ed419 Branch: refs/heads/branch-1 Commit: 1a9ed419e0997b1673662cb904159e3ae7de4468 Parents: 92730ca Author: Eugene KoifmanAuthored: Tue Mar 28 12:03:22 2017 -0700 Committer: Eugene Koifman Committed: Tue Mar 28 12:03:22 2017 -0700 -- .../hcatalog/streaming/StrictRegexWriter.java | 211 +++ .../hive/hcatalog/streaming/TestStreaming.java | 75 ++- 2 files changed, 283 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1a9ed419/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java new file mode 100644 index 000..cf495ab --- /dev/null +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java @@ -0,0 +1,211 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hcatalog.streaming; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.RegexSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * Streaming Writer handles text input data with regex. Uses + * org.apache.hadoop.hive.serde2.RegexSerDe + */ +public class StrictRegexWriter extends AbstractRecordWriter { + private RegexSerDe serde; + private final StructObjectInspector recordObjInspector; + private final ObjectInspector[] bucketObjInspectors; + private final StructField[] bucketStructFields; + + /** + * @deprecated As of release 1.3/2.1. Replaced by {@link #StrictRegexWriter(HiveEndPoint, HiveConf, StreamingConnection)} + */ + @Deprecated + public StrictRegexWriter(HiveEndPoint endPoint) throws ConnectionError, SerializationError, StreamingException { +this(endPoint, null, null); + } + + /** + * @deprecated As of release 1.3/2.1. Replaced by {@link #StrictRegexWriter(HiveEndPoint, HiveConf, StreamingConnection)} + */ + @Deprecated + public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf) throws StreamingException { +this(endPoint, conf, null); + } + + /** + * @deprecated As of release 1.3/2.1. Replaced by {@link #StrictRegexWriter(String, HiveEndPoint, HiveConf, StreamingConnection)} + */ + @Deprecated + public StrictRegexWriter(String regex, HiveEndPoint endPoint, HiveConf conf) throws StreamingException { +this(regex, endPoint, conf, null); + } + + /** + * @param endPoint the end point to write to + * @param conn connection this Writer is to be used with + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection
hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan, via Eugene Koifman)
Repository: hive Updated Branches: refs/heads/branch-1.2 07c86120e -> a3f718f7f HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan, via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a3f718f7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a3f718f7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a3f718f7 Branch: refs/heads/branch-1.2 Commit: a3f718f7f7b1d850e67a3019cbf99d9699e09d7d Parents: 07c8612 Author: Eugene KoifmanAuthored: Tue Mar 28 08:15:57 2017 -0700 Committer: Eugene Koifman Committed: Tue Mar 28 08:15:57 2017 -0700 -- .../hcatalog/streaming/StrictRegexWriter.java | 156 +++ .../hive/hcatalog/streaming/TestStreaming.java | 86 -- 2 files changed, 230 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a3f718f7/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java new file mode 100644 index 000..c76f582 --- /dev/null +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hcatalog.streaming; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.RegexSerDe; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.io.Text; + +/** + * Streaming Writer handles text input data with regex. Uses + * org.apache.hadoop.hive.serde2.RegexSerDe + */ +public class StrictRegexWriter extends AbstractRecordWriter { + private RegexSerDe serde; + private String regex; + + /** + * + * @param endPoint the end point to write to + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint) + throws ConnectionError, SerializationError, StreamingException { +super(endPoint, null); + } + + /** + * + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf) + throws ConnectionError, SerializationError, StreamingException { +super(endPoint, conf); + } + + + /** + * + * @param regex to parse the data + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(String regex, HiveEndPoint endPoint, HiveConf conf) + throws ConnectionError, SerializationError, StreamingException { + super(endPoint, conf); + this.regex = regex; + } + + @Override + SerDe getSerde() throws SerializationError { +if(serde!=null) { + return serde; +} +serde = createSerde(tbl, conf, regex); +return serde; + } + + @Override + public void write(long transactionId, byte[] record) + throws StreamingIOFailure, SerializationError { +try
hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)
Repository: hive Updated Branches: refs/heads/master 8613ef200 -> ea3be9549 HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea3be954 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea3be954 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea3be954 Branch: refs/heads/master Commit: ea3be9549dca7eaed5e838bbcb69d2372817ce42 Parents: 8613ef2 Author: Eugene KoifmanAuthored: Wed Mar 22 13:22:08 2017 -0700 Committer: Eugene Koifman Committed: Wed Mar 22 13:22:08 2017 -0700 -- .../hcatalog/streaming/StrictRegexWriter.java | 188 +++ .../hive/hcatalog/streaming/TestStreaming.java | 81 +++- 2 files changed, 263 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ea3be954/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java -- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java new file mode 100644 index 000..78987ab --- /dev/null +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java @@ -0,0 +1,188 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.hcatalog.streaming; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.RegexSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * Streaming Writer handles text input data with regex. Uses + * org.apache.hadoop.hive.serde2.RegexSerDe + */ +public class StrictRegexWriter extends AbstractRecordWriter { + private RegexSerDe serde; + private final StructObjectInspector recordObjInspector; + private final ObjectInspector[] bucketObjInspectors; + private final StructField[] bucketStructFields; + + /** + * @param endPoint the end point to write to + * @param conn connection this Writer is to be used with + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection conn) + throws ConnectionError, SerializationError, StreamingException { +this(null, endPoint, null, conn); + } + + /** + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @param conn connection this Writer is to be used with + * @throws ConnectionError + * @throws SerializationError + * @throws StreamingException + */ + public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf, StreamingConnection conn) + throws ConnectionError, SerializationError, StreamingException { +this(null, endPoint, conf, conn); + } + + /** + * @param regex to parse the data + * @param endPoint the end point to write to + * @param conf a Hive conf object. Should be null if not using advanced Hive settings. + * @param conn connection this Writer is to be used with + * @throws ConnectionError + *