This is a diff file that let me compile Hive 3.0 on Hadoop 2.8.0 (and also
run it on Hadoop 2.7.x).
diff --git a/pom.xml b/pom.xml
index c57ff58..8445288 100644
--- a/pom.xml
+++ b/pom.xml
@@ -146,7 +146,7 @@
<guava.version>19.0</guava.version>
<groovy.version>2.4.11</groovy.version>
<h2database.version>1.3.166</h2database.version>
- <hadoop.version>3.1.0</hadoop.version>
+ <hadoop.version>2.8.0</hadoop.version>
<hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
<hamcrest.version>1.3</hamcrest.version>
<hbase.version>2.0.0-alpha4</hbase.version>
@@ -1212,7 +1212,7 @@
<onlyWhenRelease>true</onlyWhenRelease>
</requireReleaseDeps>
</rules>
- <fail>true</fail>
+ <fail>false</fail>
</configuration>
</execution>
<execution>
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index b13f73b..21d8541 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -277,7 +277,7 @@ protected void openInternal(String[]
additionalFilesNotFromConf,
} else {
this.resources = new HiveResources(createTezDir(sessionId,
"resources"));
ensureLocalResources(conf, additionalFilesNotFromConf);
- LOG.info("Created new resources: " + resources);
+ LOG.info("Created new resources: " + this.resources);
}
// unless already installed on all the cluster nodes, we'll have to
@@ -639,7 +639,6 @@ public void ensureLocalResources(Configuration conf,
String[] newFilesNotFromCon
* @throws Exception
*/
void close(boolean keepDagFilesDir) throws Exception {
- console = null;
appJarLr = null;
try {
@@ -665,6 +664,7 @@ void close(boolean keepDagFilesDir) throws Exception {
}
}
} finally {
+ console = null;
try {
cleanupScratchDir();
} finally {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
index 84ae157..be66787 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezTask.java
@@ -160,7 +160,9 @@ public int execute(DriverContext driverContext) {
if (userName == null) {
userName = "anonymous";
} else {
- groups =
UserGroupInformation.createRemoteUser(userName).getGroups();
+ groups =
Arrays.asList(UserGroupInformation.createRemoteUser(userName).getGroupNames());
+ // TODO: for Hadoop 2.8.0+, just call getGroups():
+ // groups =
UserGroupInformation.createRemoteUser(userName).getGroups();
}
MappingInput mi = new MappingInput(userName, groups,
ss.getHiveVariables().get("wmpool"),
ss.getHiveVariables().get("wmapp"));
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
index 1ae8194..aaf0c62 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java
@@ -472,7 +472,7 @@ static EventLogger getInstance(HiveConf conf) {
if (instance == null) {
synchronized (EventLogger.class) {
if (instance == null) {
- instance = new EventLogger(conf, SystemClock.getInstance());
+ instance = new EventLogger(conf, new SystemClock());
ShutdownHookManager.addShutdownHook(instance::shutdown);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 183515a..2f393c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -1051,7 +1051,9 @@ else if (prev != null && next.maxWriteId ==
prev.maxWriteId
*/
Collections.sort(original, (HdfsFileStatusWithId o1,
HdfsFileStatusWithId o2) -> {
//this does "Path.uri.compareTo(that.uri)"
- return o1.getFileStatus().compareTo(o2.getFileStatus());
+ return
o1.getFileStatus().getPath().compareTo(o2.getFileStatus().getPath());
+ // TODO: for Hadoop 2.8+
+ // return o1.getFileStatus().compareTo(o2.getFileStatus());
});
// Note: isRawFormat is invalid for non-ORC tables. It will always
return true, so we're good.
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
b/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
index 5e117fe..4367107 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/hooks/TestHiveProtoLoggingHook.java
@@ -76,7 +76,7 @@ public void setup() throws Exception {
@Test
public void testPreEventLog() throws Exception {
context.setHookType(HookType.PRE_EXEC_HOOK);
- EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+ EventLogger evtLogger = new EventLogger(conf, new SystemClock());
evtLogger.handle(context);
evtLogger.shutdown();
@@ -105,7 +105,7 @@ public void testPreEventLog() throws Exception {
public void testPostEventLog() throws Exception {
context.setHookType(HookType.POST_EXEC_HOOK);
- EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+ EventLogger evtLogger = new EventLogger(conf, new SystemClock());
evtLogger.handle(context);
evtLogger.shutdown();
@@ -124,7 +124,7 @@ public void testPostEventLog() throws Exception {
public void testFailureEventLog() throws Exception {
context.setHookType(HookType.ON_FAILURE_HOOK);
- EventLogger evtLogger = new EventLogger(conf,
SystemClock.getInstance());
+ EventLogger evtLogger = new EventLogger(conf, new SystemClock());
evtLogger.handle(context);
evtLogger.shutdown();
@@ -149,7 +149,7 @@ private HiveHookEventProto loadEvent(HiveConf conf,
String tmpFolder)
Assert.assertEquals(1, status.length);
DatePartitionedLogger<HiveHookEventProto> logger = new
DatePartitionedLogger<>(
- HiveHookEventProto.PARSER, path, conf, SystemClock.getInstance());
+ HiveHookEventProto.PARSER, path, conf, new SystemClock());
ProtoMessageReader<HiveHookEventProto> reader =
logger.getReader(status[0].getPath());
HiveHookEventProto event = reader.readEvent();
Assert.assertNotNull(event);
diff --git
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index ec06a88..7009a09 100644
---
a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++
b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -1127,11 +1127,10 @@ public Boolean run() throws Exception {
@Override
public boolean runDistCp(List<Path> srcPaths, Path dst, Configuration
conf) throws IOException {
- DistCpOptions options = new DistCpOptions.Builder(srcPaths, dst)
- .withSyncFolder(true)
- .withCRC(true)
- .preserve(FileAttribute.BLOCKSIZE)
- .build();
+ DistCpOptions options = new DistCpOptions(srcPaths, dst);
+ options.setSyncFolder(true);
+ options.setSkipCRC(true);
+ options.preserve(FileAttribute.BLOCKSIZE);
// Creates the command-line parameters for distcp
List<String> params = constructDistCpParams(srcPaths, dst, conf);
On Sat, Jun 9, 2018 at 3:16 AM, Owen O'Malley <[email protected]>
wrote:
> Mich,
> Try changing the hadoop.version in Hive's pom.xml to 2.7.3 and re-build.
> That should at least let you know where the problem are.
>
> .. Owen
>
> On Fri, Jun 8, 2018 at 9:05 AM, Mich Talebzadeh <[email protected]
> > wrote:
>
>> Hi Owen,
>>
>> It is 2.7.3
>>
>> hadoop version
>> Hadoop 2.7.3
>> Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r
>> baa91f7c6bc9cb92be5982de4719c1c8af91ccff
>> Compiled by root on 2016-08-18T01:41Z
>>
>>
>> Dr Mich Talebzadeh
>>
>>
>>
>> LinkedIn *
>> https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
>> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
>>
>>
>>
>> http://talebzadehmich.wordpress.com
>>
>>
>> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>> any loss, damage or destruction of data or any other property which may
>> arise from relying on this email's technical content is explicitly
>> disclaimed. The author will in no case be liable for any monetary damages
>> arising from such loss, damage or destruction.
>>
>>
>>
>> On 8 June 2018 at 16:59, Owen O'Malley <[email protected]> wrote:
>>
>>> This looks like there is an API incompatibility between the version of
>>> hadoop expected and the version used. Which version of hadoop are you using?
>>>
>>> .. Owen
>>>
>>> On Jun 8, 2018, at 08:31, Mich Talebzadeh <[email protected]>
>>> wrote:
>>>
>>> Just installed and upgraded to Hive 3 where fun and game started :)
>>>
>>> First I had to set below as per this JIRA
>>> <https://www.mail-archive.com/[email protected]/msg121026.html> to
>>> make hive server 2 start and stay up
>>>
>>> setting hive.metastore.event.db.notification.api.auth to false on
>>> hive-site.xml
>>>
>>> Now when I connect via beeline I see this error
>>>
>>>
>>> *0: jdbc:hive2://rhes75:10099/default> select * from sales limit 10;*
>>> Error: java.io.IOException: java.lang.RuntimeException: ORC split
>>> generation failed with exception: java.lang.NoSuchMethodError:
>>> org.apache.hadoop.fs.FileStatus.compareTo(Lorg/apache/hadoop/fs/FileStatus;)I
>>> (state=,code=0)
>>>
>>> Table is an ORC table as follows and used to work fine
>>>
>>> 0: jdbc:hive2://rhes75:10099/default> desc formatted sales;
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> | col_name |
>>> data_type |
>>> comment |
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> | # col_name | data_type
>>> | comment |
>>> | prod_id | bigint
>>> | |
>>> | cust_id | bigint
>>> | |
>>> | time_id | timestamp
>>> | |
>>> | channel_id | bigint
>>> | |
>>> | promo_id | bigint
>>> | |
>>> | quantity_sold | decimal(10,0)
>>> | |
>>> | amount_sold | decimal(10,0)
>>> | |
>>> | | NULL
>>> | NULL |
>>> | # Partition Information | NULL
>>> | NULL |
>>> | # col_name | data_type
>>> | comment |
>>> | year | int
>>> | |
>>> | month | int
>>> | |
>>> | | NULL
>>> | NULL |
>>> | # Detailed Table Information | NULL
>>> | NULL |
>>> | Database: | oraclehadoop
>>> | NULL |
>>> | OwnerType: | USER
>>> | NULL |
>>> | Owner: | hduser
>>> | NULL |
>>> | CreateTime: | Wed May 31 16:31:47 BST
>>> 2017 | NULL
>>> |
>>> | LastAccessTime: | UNKNOWN
>>> | NULL |
>>> | Retention: | 0
>>> | NULL |
>>> | Location: |
>>> hdfs://rhes75:9000/user/hive/warehouse/oraclehadoop.db/sales
>>> | NULL |
>>> | Table Type: | MANAGED_TABLE
>>> | NULL |
>>> | Table Parameters: | NULL
>>> | NULL |
>>> | | COLUMN_STATS_ACCURATE
>>> | {\"BASIC_STATS\":\"true\"} |
>>> | | numFiles
>>> | 12544 |
>>> | | numPartitions
>>> | 49 |
>>> | | numRows
>>> | 917359 |
>>> | | orc.bloom.filter.columns
>>> | PROD_ID,CUST_ID,TIME_ID,CHANNEL_ID,PROMO_ID |
>>> | | orc.bloom.filter.fpp
>>> | 0.05 |
>>> | | orc.compress
>>> | SNAPPY |
>>> | | orc.create.index
>>> | true |
>>> | | orc.row.index.stride
>>> | 10000 |
>>> | | orc.stripe.size
>>> | 268435456 |
>>> | | rawDataSize
>>> | 271538264 |
>>> | | totalSize
>>> | 77602053 |
>>> | | transient_lastDdlTime
>>> | 1496244707 |
>>> | | NULL
>>> | NULL |
>>> | # Storage Information | NULL
>>> | NULL |
>>> | SerDe Library: | org.apache.hadoop.hive.ql.io.orc.OrcSerde
>>> | NULL |
>>> | InputFormat: |
>>> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
>>> | NULL |
>>> | OutputFormat: |
>>> org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
>>> | NULL |
>>> | Compressed: | No
>>> | NULL |
>>> | Num Buckets: | 256
>>> | NULL |
>>> | Bucket Columns: | [prod_id, cust_id, time_id,
>>> channel_id, promo_id] | NULL |
>>> | Sort Columns: | []
>>> | NULL |
>>> | Storage Desc Params: | NULL
>>> | NULL |
>>> | | serialization.format
>>> | 1 |
>>> +-------------------------------+---------------------------
>>> -------------------------+----------------------------------
>>> ------------+
>>> 48 rows selected (0.561 seconds)
>>>
>>>
>>>
>>> Dr Mich Talebzadeh
>>>
>>>
>>>
>>> LinkedIn *
>>> https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
>>> <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*
>>>
>>>
>>>
>>> http://talebzadehmich.wordpress.com
>>>
>>>
>>> *Disclaimer:* Use it at your own risk. Any and all responsibility for
>>> any loss, damage or destruction of data or any other property which may
>>> arise from relying on this email's technical content is explicitly
>>> disclaimed. The author will in no case be liable for any monetary damages
>>> arising from such loss, damage or destruction.
>>>
>>>
>>>
>>>
>>
>