Re: issues with Hive 3 simple sellect from an ORC table

Sungwoo Park Tue, 12 Jun 2018 08:45:47 -0700

This is a diff file that let me compile Hive 3.0 on Hadoop 2.8.0 (and also
run it on Hadoop 2.7.x).


diff --git a/pom.xml b/pom.xml
index c57ff58..8445288 100644
--- a/pom.xml
+++ b/pom.xml
@@ -146,7 +146,7 @@
     <guava.version>19.0</guava.version>
     <groovy.version>2.4.11</groovy.version>
     <h2database.version>1.3.166</h2database.version>
-    <hadoop.version>3.1.0</hadoop.version>
+    <hadoop.version>2.8.0</hadoop.version>

 
<hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
     <hamcrest.version>1.3</hamcrest.version>
     <hbase.version>2.0.0-alpha4</hbase.version>
@@ -1212,7 +1212,7 @@
                   <onlyWhenRelease>true</onlyWhenRelease>
                 </requireReleaseDeps>
               </rules>
-              <fail>true</fail>
+              <fail>false</fail>
             </configuration>
           </execution>
           <execution>
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index b13f73b..21d8541 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -277,7 +277,7 @@ protected void openInternal(String[]
additionalFilesNotFromConf,
     } else {
       this.resources = new HiveResources(createTezDir(sessionId,
"resources"));
       ensureLocalResources(conf, additionalFilesNotFromConf);
-      LOG.info("Created new resources: " + resources);
+      LOG.info("Created new resources: " + this.resources);
     }

     // unless already installed on all the cluster nodes, we'll have to
@@ -639,7 +639,6 @@ public void ensureLocalResources(Configuration conf,
String[] newFilesNotFromCon
    * @throws Exception
    */
   void close(boolean keepDagFilesDir) throws Exception {
-    console = null;
     appJarLr = null;

     try {
@@ -665,6 +664,7 @@ void close(boolean keepDagFilesDir) throws Exception {
         }
       }
     } finally {
+      console = null;
       try {
         cleanupScratchDir();
       } finally {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 84ae157..be66787 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -160,7 +160,9 @@ public int execute(DriverContext driverContext) {
       if (userName == null) {
         userName = "anonymous";
       } else {
-        groups =
UserGroupInformation.createRemoteUser(userName).getGroups();
+        groups =
Arrays.asList(UserGroupInformation.createRemoteUser(userName).getGroupNames());
+        // TODO: for Hadoop 2.8.0+, just call getGroups():
+        //   groups =
UserGroupInformation.createRemoteUser(userName).getGroups();
       }
       MappingInput mi = new MappingInput(userName, groups,
           ss.getHiveVariables().get("wmpool"),
ss.getHiveVariables().get("wmapp"));
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
index 1ae8194..aaf0c62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
@@ -472,7 +472,7 @@ static EventLogger getInstance(HiveConf conf) {
       if (instance == null) {
         synchronized (EventLogger.class) {
           if (instance == null) {
-            instance = new EventLogger(conf, SystemClock.getInstance());
+            instance = new EventLogger(conf, new SystemClock());
             ShutdownHookManager.addShutdownHook(instance::shutdown);
           }
         }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 183515a..2f393c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -1051,7 +1051,9 @@ else if (prev != null && next.maxWriteId ==
prev.maxWriteId
      */
     Collections.sort(original, (HdfsFileStatusWithId o1,
HdfsFileStatusWithId o2) -> {
       //this does "Path.uri.compareTo(that.uri)"
-      return o1.getFileStatus().compareTo(o2.getFileStatus());
+      return
o1.getFileStatus().getPath().compareTo(o2.getFileStatus().getPath());
+      // TODO: for Hadoop 2.8+
+      // return o1.getFileStatus().compareTo(o2.getFileStatus());
     });

     // Note: isRawFormat is invalid for non-ORC tables. It will always
return true, so we're good.
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
b/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
index 5e117fe..4367107 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
@@ -76,7 +76,7 @@ public void setup() throws Exception {
   @Test
   public void testPreEventLog() throws Exception {
     context.setHookType(HookType.PRE_EXEC_HOOK);
-    EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+    EventLogger evtLogger = new EventLogger(conf, new SystemClock());
     evtLogger.handle(context);
     evtLogger.shutdown();

@@ -105,7 +105,7 @@ public void testPreEventLog() throws Exception {
   public void testPostEventLog() throws Exception {
     context.setHookType(HookType.POST_EXEC_HOOK);

-    EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+    EventLogger evtLogger = new EventLogger(conf, new SystemClock());
     evtLogger.handle(context);
     evtLogger.shutdown();

@@ -124,7 +124,7 @@ public void testPostEventLog() throws Exception {
   public void testFailureEventLog() throws Exception {
     context.setHookType(HookType.ON_FAILURE_HOOK);

-    EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+    EventLogger evtLogger = new EventLogger(conf, new SystemClock());
     evtLogger.handle(context);
     evtLogger.shutdown();

@@ -149,7 +149,7 @@ private HiveHookEventProto loadEvent(HiveConf conf,
String tmpFolder)
     Assert.assertEquals(1, status.length);

     DatePartitionedLogger<HiveHookEventProto> logger = new
DatePartitionedLogger<>(
-        HiveHookEventProto.PARSER, path, conf, SystemClock.getInstance());
+        HiveHookEventProto.PARSER, path, conf, new SystemClock());
     ProtoMessageReader<HiveHookEventProto> reader =
logger.getReader(status[0].getPath());
     HiveHookEventProto event = reader.readEvent();
     Assert.assertNotNull(event);
diff --git
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index ec06a88..7009a09 100644
---
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -1127,11 +1127,10 @@ public Boolean run() throws Exception {

   @Override
   public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration
conf) throws IOException {
-       DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst)
-        .withSyncFolder(true)
-        .withCRC(true)
-        .preserve(FileAttribute.BLOCKSIZE)
-        .build();
+    DistCpOptions options = new DistCpOptions(srcPaths, dst);
+    options.setSyncFolder(true);
+    options.setSkipCRC(true);
+    options.preserve(FileAttribute.BLOCKSIZE);

     // Creates the command-line parameters for distcp
     List<String> params = constructDistCpParams(srcPaths, dst, conf);




On Sat, Jun 9, 2018 at 3:16 AM, Owen O'Malley <owen.omal...@gmail.com>
wrote:

> Mich,
>   Try changing the hadoop.version in Hive's pom.xml to 2.7.3 and re-build.
> That should at least let you know where the problem are.
>
> .. Owen
>
> On Fri, Jun 8, 2018 at 9:05 AM, Mich Talebzadeh <mich.talebza...@gmail.com
> > wrote:
>
>> Hi Owen,
>>
>> It is 2.7.3
>>
>>  hadoop version
>> Hadoop 2.7.3
>> Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r
>> baa91f7c6bc9cb92be5982de4719c1c8af91ccff
>> Compiled by root on 2016-08-18T01:41Z
>>
>>
>> Dr Mich Talebzadeh
>>
>>
>>
>> LinkedIn * 
>> https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
>> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
>>
>>
>>
>> http://talebzadehmich.wordpress.com
>>
>>
>> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any loss, damage or destruction of data or any other property which may
>> arise from relying on this email's technical content is explicitly
>> disclaimed. The author will in no case be liable for any monetary damages
>> arising from such loss, damage or destruction.
>>
>>
>>
>> On 8 June 2018 at 16:59, Owen O'Malley <owen.omal...@gmail.com> wrote:
>>
>>> This looks like there is an API incompatibility between the version of
>>> hadoop expected and the version used. Which version of hadoop are you using?
>>>
>>> .. Owen
>>>
>>> On Jun 8, 2018, at 08:31, Mich Talebzadeh <mich.talebza...@gmail.com>
>>> wrote:
>>>
>>> Just installed and upgraded to Hive 3 where fun and game started :)
>>>
>>> First I had to set below as per this JIRA
>>> <https://www.mail-archive.com/issues@hive.apache.org/msg121026.html> to
>>> make hive server 2 start and stay up
>>>
>>> setting hive.metastore.event.db.notification.api.auth to false on
>>> hive-site.xml
>>>
>>> Now when I connect via beeline I see this error
>>>
>>>
>>> *0: jdbc:hive2://rhes75:10099/default> select * from sales limit 10;*
>>> Error: java.io.IOException: java.lang.RuntimeException: ORC split
>>> generation failed with exception: java.lang.NoSuchMethodError:
>>> org.apache.hadoop.fs.FileStatus.compareTo(Lorg/apache/hadoop/fs/FileStatus;)I
>>> (state=,code=0)
>>>
>>> Table is an ORC table as follows and used to work fine
>>>
>>> 0: jdbc:hive2://rhes75:10099/default> desc formatted sales;
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> |           col_name            |
>>> data_type                      |
>>> comment                    |
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> | # col_name                    | data_type
>>> | comment                                      |
>>> | prod_id                       | bigint
>>> |                                              |
>>> | cust_id                       | bigint
>>> |                                              |
>>> | time_id                       | timestamp
>>> |                                              |
>>> | channel_id                    | bigint
>>> |                                              |
>>> | promo_id                      | bigint
>>> |                                              |
>>> | quantity_sold                 | decimal(10,0)
>>> |                                              |
>>> | amount_sold                   | decimal(10,0)
>>> |                                              |
>>> |                               | NULL
>>> | NULL                                         |
>>> | # Partition Information       | NULL
>>> | NULL                                         |
>>> | # col_name                    | data_type
>>> | comment                                      |
>>> | year                          | int
>>> |                                              |
>>> | month                         | int
>>> |                                              |
>>> |                               | NULL
>>> | NULL                                         |
>>> | # Detailed Table Information  | NULL
>>> | NULL                                         |
>>> | Database:                     | oraclehadoop
>>> | NULL                                         |
>>> | OwnerType:                    | USER
>>> | NULL                                         |
>>> | Owner:                        | hduser
>>> | NULL                                         |
>>> | CreateTime:                   | Wed May 31 16:31:47 BST
>>> 2017                       | NULL
>>> |
>>> | LastAccessTime:               | UNKNOWN
>>> | NULL                                         |
>>> | Retention:                    | 0
>>> | NULL                                         |
>>> | Location:                     | 
>>> hdfs://rhes75:9000/user/hive/warehouse/oraclehadoop.db/sales
>>> | NULL                                         |
>>> | Table Type:                   | MANAGED_TABLE
>>> | NULL                                         |
>>> | Table Parameters:             | NULL
>>> | NULL                                         |
>>> |                               | COLUMN_STATS_ACCURATE
>>> | {\"BASIC_STATS\":\"true\"}                   |
>>> |                               | numFiles
>>> | 12544                                        |
>>> |                               | numPartitions
>>> | 49                                           |
>>> |                               | numRows
>>> | 917359                                       |
>>> |                               | orc.bloom.filter.columns
>>> | PROD_ID,CUST_ID,TIME_ID,CHANNEL_ID,PROMO_ID  |
>>> |                               | orc.bloom.filter.fpp
>>> | 0.05                                         |
>>> |                               | orc.compress
>>> | SNAPPY                                       |
>>> |                               | orc.create.index
>>> | true                                         |
>>> |                               | orc.row.index.stride
>>> | 10000                                        |
>>> |                               | orc.stripe.size
>>> | 268435456                                    |
>>> |                               | rawDataSize
>>> | 271538264                                    |
>>> |                               | totalSize
>>> | 77602053                                     |
>>> |                               | transient_lastDdlTime
>>> | 1496244707                                   |
>>> |                               | NULL
>>> | NULL                                         |
>>> | # Storage Information         | NULL
>>> | NULL                                         |
>>> | SerDe Library:                | org.apache.hadoop.hive.ql.io.orc.OrcSerde
>>> | NULL                                         |
>>> | InputFormat:                  | 
>>> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
>>> | NULL                                         |
>>> | OutputFormat:                 | 
>>> org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
>>> | NULL                                         |
>>> | Compressed:                   | No
>>> | NULL                                         |
>>> | Num Buckets:                  | 256
>>> | NULL                                         |
>>> | Bucket Columns:               | [prod_id, cust_id, time_id,
>>> channel_id, promo_id]  | NULL                                         |
>>> | Sort Columns:                 | []
>>> | NULL                                         |
>>> | Storage Desc Params:          | NULL
>>> | NULL                                         |
>>> |                               | serialization.format
>>> | 1                                            |
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> 48 rows selected (0.561 seconds)
>>>
>>>
>>>
>>> Dr Mich Talebzadeh
>>>
>>>
>>>
>>> LinkedIn * 
>>> https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
>>> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
>>>
>>>
>>>
>>> http://talebzadehmich.wordpress.com
>>>
>>>
>>> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>>> any loss, damage or destruction of data or any other property which may
>>> arise from relying on this email's technical content is explicitly
>>> disclaimed. The author will in no case be liable for any monetary damages
>>> arising from such loss, damage or destruction.
>>>
>>>
>>>
>>>
>>
>

Re: issues with Hive 3 simple sellect from an ORC table

Reply via email to