This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new f379447 HIVE-21698: TezSessionState#ensureLocalResources() causes IndexOutOfBoundsException while localizing resources (Naresh P R, reviewed by Sankar Hariappan) f379447 is described below commit f379447a328fcd5ed0996d3b7b44368a76f7662f Author: Naresh P R <prnaresh.nar...@gmail.com> AuthorDate: Fri Aug 23 19:13:28 2019 +0530 HIVE-21698: TezSessionState#ensureLocalResources() causes IndexOutOfBoundsException while localizing resources (Naresh P R, reviewed by Sankar Hariappan) Signed-off-by: Sankar Hariappan <sank...@apache.org> --- .../apache/hadoop/hive/ql/exec/tez/DagUtils.java | 37 ++++++++++++---------- .../hadoop/hive/ql/exec/tez/TezSessionState.java | 10 +++--- .../hadoop/hive/ql/exec/tez/TestTezTask.java | 5 +-- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index 6055967..077c94f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -1029,14 +1029,22 @@ public class DagUtils { // reference HDFS based resource directly, to use distribute cache efficiently. addHdfsResource(conf, tmpResources, LocalResourceType.FILE, getHdfsTempFilesFromConf(conf)); // local resources are session based. - addTempResources(conf, tmpResources, hdfsDirPathStr, LocalResourceType.FILE, getLocalTempFilesFromConf(conf), null); + tmpResources.addAll( + addTempResources(conf, hdfsDirPathStr, LocalResourceType.FILE, + getLocalTempFilesFromConf(conf), null).values() + ); } else { // all resources including HDFS are session based. - addTempResources(conf, tmpResources, hdfsDirPathStr, LocalResourceType.FILE, getTempFilesFromConf(conf), null); + tmpResources.addAll( + addTempResources(conf, hdfsDirPathStr, LocalResourceType.FILE, + getTempFilesFromConf(conf), null).values() + ); } - addTempResources(conf, tmpResources, hdfsDirPathStr, LocalResourceType.ARCHIVE, - getTempArchivesFromConf(conf), null); + tmpResources.addAll( + addTempResources(conf, hdfsDirPathStr, LocalResourceType.ARCHIVE, + getTempArchivesFromConf(conf), null).values() + ); return tmpResources; } @@ -1102,26 +1110,22 @@ public class DagUtils { * @param hdfsDirPathStr Destination directory in HDFS. * @param conf Configuration. * @param inputOutputJars The file names to localize. - * @return List<LocalResource> local resources to add to execution + * @return Map<String, LocalResource> (srcPath, local resources) to add to execution * @throws IOException when hdfs operation fails. * @throws LoginException when getDefaultDestDir fails with the same exception */ - public List<LocalResource> localizeTempFiles(String hdfsDirPathStr, Configuration conf, - String[] inputOutputJars, String[] skipJars) throws IOException, LoginException { + public Map<String, LocalResource> localizeTempFiles(String hdfsDirPathStr, Configuration conf, + String[] inputOutputJars, String[] skipJars) throws IOException { if (inputOutputJars == null) { return null; } - List<LocalResource> tmpResources = new ArrayList<LocalResource>(); - addTempResources(conf, tmpResources, hdfsDirPathStr, - LocalResourceType.FILE, inputOutputJars, skipJars); - return tmpResources; + return addTempResources(conf, hdfsDirPathStr, LocalResourceType.FILE, inputOutputJars, skipJars); } - private void addTempResources(Configuration conf, - List<LocalResource> tmpResources, String hdfsDirPathStr, - LocalResourceType type, - String[] files, String[] skipFiles) throws IOException { + private Map<String, LocalResource> addTempResources(Configuration conf, String hdfsDirPathStr, + LocalResourceType type, String[] files, String[] skipFiles) throws IOException { HashSet<Path> skipFileSet = null; + Map<String, LocalResource> tmpResourcesMap = new HashMap<>(); if (skipFiles != null) { skipFileSet = new HashSet<>(); for (String skipFile : skipFiles) { @@ -1142,8 +1146,9 @@ public class DagUtils { Path hdfsFilePath = new Path(hdfsDirPathStr, getResourceBaseName(new Path(file))); LocalResource localResource = localizeResource(new Path(file), hdfsFilePath, type, conf); - tmpResources.add(localResource); + tmpResourcesMap.put(file, localResource); } + return tmpResourcesMap; } public FileStatus getHiveJarDirectory(Configuration conf) throws IOException, LoginException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java index 767b359..e2db0c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java @@ -608,7 +608,7 @@ public class TezSessionState { } // Localize the non-conf resources that are missing from the current list. - List<LocalResource> newResources = null; + Map<String, LocalResource> newResources = null; if (newFilesNotFromConf != null && newFilesNotFromConf.length > 0) { boolean hasResources = !resources.additionalFilesNotFromConf.isEmpty(); if (hasResources) { @@ -623,10 +623,8 @@ public class TezSessionState { String[] skipFilesFromConf = DagUtils.getTempFilesFromConf(conf); newResources = utils.localizeTempFiles(dir, conf, newFilesNotFromConf, skipFilesFromConf); if (newResources != null) { - resources.localizedResources.addAll(newResources); - } - for (int i=0;i<newFilesNotFromConf.length;i++) { - resources.additionalFilesNotFromConf.put(newFilesNotFromConf[i], newResources.get(i)); + resources.localizedResources.addAll(newResources.values()); + resources.additionalFilesNotFromConf.putAll(newResources); } } else { resources.localizedResources.addAll(resources.additionalFilesNotFromConf.values()); @@ -640,7 +638,7 @@ public class TezSessionState { // TODO: Do we really need all this nonsense? if (session != null) { if (newResources != null && !newResources.isEmpty()) { - session.addAppMasterLocalFiles(DagUtils.createTezLrMap(null, newResources)); + session.addAppMasterLocalFiles(DagUtils.createTezLrMap(null, newResources.values())); } if (!resources.localizedResources.isEmpty()) { session.addAppMasterLocalFiles( diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java index 4e4a979..c14dc62 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezTask.java @@ -233,9 +233,10 @@ public class TestTezTask { @Test public void testExistingSessionGetsStorageHandlerResources() throws Exception { - final String[] inputOutputJars = new String[] {"file:///tmp/foo.jar"}; + final String jarFilePath = "file:///tmp/foo.jar"; + final String[] inputOutputJars = new String[] {jarFilePath}; LocalResource res = createResource(inputOutputJars[0]); - final List<LocalResource> resources = Collections.singletonList(res); + final Map<String, LocalResource> resources = Collections.singletonMap(jarFilePath, res); when(utils.localizeTempFiles(anyString(), any(Configuration.class), eq(inputOutputJars), any(String[].class))).thenReturn(resources);