hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)

2017-03-28 Thread ekoifman
Repository: hive
Updated Branches:
  refs/heads/branch-1 92730ca5b -> 1a9ed419e


HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive 
Sink (Kalyan via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a9ed419
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a9ed419
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a9ed419

Branch: refs/heads/branch-1
Commit: 1a9ed419e0997b1673662cb904159e3ae7de4468
Parents: 92730ca
Author: Eugene Koifman 
Authored: Tue Mar 28 12:03:22 2017 -0700
Committer: Eugene Koifman 
Committed: Tue Mar 28 12:03:22 2017 -0700

--
 .../hcatalog/streaming/StrictRegexWriter.java   | 211 +++
 .../hive/hcatalog/streaming/TestStreaming.java  |  75 ++-
 2 files changed, 283 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1a9ed419/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
new file mode 100644
index 000..cf495ab
--- /dev/null
+++ 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
@@ -0,0 +1,211 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.RegexSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Streaming Writer handles text input data with regex. Uses
+ * org.apache.hadoop.hive.serde2.RegexSerDe
+ */
+public class StrictRegexWriter extends AbstractRecordWriter {
+  private RegexSerDe serde;
+  private final StructObjectInspector recordObjInspector;
+  private final ObjectInspector[] bucketObjInspectors;
+  private final StructField[] bucketStructFields;
+  
+  /**
+   * @deprecated As of release 1.3/2.1.  Replaced by {@link 
#StrictRegexWriter(HiveEndPoint, HiveConf, StreamingConnection)}
+   */
+  @Deprecated
+  public StrictRegexWriter(HiveEndPoint endPoint) throws ConnectionError, 
SerializationError, StreamingException {
+this(endPoint, null, null);
+  }
+
+  /**
+   * @deprecated As of release 1.3/2.1.  Replaced by {@link 
#StrictRegexWriter(HiveEndPoint, HiveConf, StreamingConnection)}
+   */
+  @Deprecated
+  public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf) throws 
StreamingException {
+this(endPoint, conf, null);
+  }
+  
+  /**
+   * @deprecated As of release 1.3/2.1.  Replaced by {@link 
#StrictRegexWriter(String, HiveEndPoint, HiveConf, StreamingConnection)}
+   */
+  @Deprecated
+  public StrictRegexWriter(String regex, HiveEndPoint endPoint, HiveConf conf) 
throws StreamingException {
+this(regex, endPoint, conf, null);
+  }
+  
+  /**
+   * @param endPoint the end point to write to
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection 

hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan, via Eugene Koifman)

2017-03-28 Thread ekoifman
Repository: hive
Updated Branches:
  refs/heads/branch-1.2 07c86120e -> a3f718f7f


HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive 
Sink (Kalyan, via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a3f718f7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a3f718f7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a3f718f7

Branch: refs/heads/branch-1.2
Commit: a3f718f7f7b1d850e67a3019cbf99d9699e09d7d
Parents: 07c8612
Author: Eugene Koifman 
Authored: Tue Mar 28 08:15:57 2017 -0700
Committer: Eugene Koifman 
Committed: Tue Mar 28 08:15:57 2017 -0700

--
 .../hcatalog/streaming/StrictRegexWriter.java   | 156 +++
 .../hive/hcatalog/streaming/TestStreaming.java  |  86 --
 2 files changed, 230 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a3f718f7/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
new file mode 100644
index 000..c76f582
--- /dev/null
+++ 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.RegexSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Streaming Writer handles text input data with regex. Uses
+ * org.apache.hadoop.hive.serde2.RegexSerDe
+ */
+public class StrictRegexWriter extends AbstractRecordWriter {
+  private RegexSerDe serde;
+  private String regex;
+
+  /**
+   *
+   * @param endPoint the end point to write to
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint)
+  throws ConnectionError, SerializationError, StreamingException {
+super(endPoint, null);
+  }
+
+  /**
+   *
+   * @param endPoint the end point to write to
+   * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf)
+  throws ConnectionError, SerializationError, StreamingException {
+super(endPoint, conf);
+  }
+  
+  
+  /**
+  *
+  * @param regex to parse the data
+  * @param endPoint the end point to write to
+  * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+  * @throws ConnectionError
+  * @throws SerializationError
+  * @throws StreamingException
+  */
+ public StrictRegexWriter(String regex, HiveEndPoint endPoint, HiveConf conf)
+ throws ConnectionError, SerializationError, StreamingException {
+   super(endPoint, conf);
+   this.regex = regex;
+ }
+
+  @Override
+  SerDe getSerde() throws SerializationError {
+if(serde!=null) {
+  return serde;
+}
+serde = createSerde(tbl, conf, regex);
+return serde;
+  }
+
+  @Override
+  public void write(long transactionId, byte[] record)
+  throws StreamingIOFailure, SerializationError {
+try 

hive git commit: HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive Sink (Kalyan via Eugene Koifman)

2017-03-22 Thread ekoifman
Repository: hive
Updated Branches:
  refs/heads/master 8613ef200 -> ea3be9549


HIVE-15691 Create StrictRegexWriter to work with RegexSerializer for Flume Hive 
Sink (Kalyan via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea3be954
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea3be954
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea3be954

Branch: refs/heads/master
Commit: ea3be9549dca7eaed5e838bbcb69d2372817ce42
Parents: 8613ef2
Author: Eugene Koifman 
Authored: Wed Mar 22 13:22:08 2017 -0700
Committer: Eugene Koifman 
Committed: Wed Mar 22 13:22:08 2017 -0700

--
 .../hcatalog/streaming/StrictRegexWriter.java   | 188 +++
 .../hive/hcatalog/streaming/TestStreaming.java  |  81 +++-
 2 files changed, 263 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ea3be954/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
--
diff --git 
a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
new file mode 100644
index 000..78987ab
--- /dev/null
+++ 
b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/StrictRegexWriter.java
@@ -0,0 +1,188 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.hcatalog.streaming;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.RegexSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Streaming Writer handles text input data with regex. Uses
+ * org.apache.hadoop.hive.serde2.RegexSerDe
+ */
+public class StrictRegexWriter extends AbstractRecordWriter {
+  private RegexSerDe serde;
+  private final StructObjectInspector recordObjInspector;
+  private final ObjectInspector[] bucketObjInspectors;
+  private final StructField[] bucketStructFields;
+  
+  /**
+   * @param endPoint the end point to write to
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, StreamingConnection conn)
+  throws ConnectionError, SerializationError, StreamingException {
+this(null, endPoint, null, conn);
+  }
+  
+  /**
+   * @param endPoint the end point to write to
+   * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   * @throws SerializationError
+   * @throws StreamingException
+   */
+  public StrictRegexWriter(HiveEndPoint endPoint, HiveConf conf, 
StreamingConnection conn)
+  throws ConnectionError, SerializationError, StreamingException {
+this(null, endPoint, conf, conn);
+  }
+  
+  /**
+   * @param regex to parse the data
+   * @param endPoint the end point to write to
+   * @param conf a Hive conf object. Should be null if not using advanced Hive 
settings.
+   * @param conn connection this Writer is to be used with
+   * @throws ConnectionError
+   *