HI I adopt distributed cache to implement the semi-joins. I am using CDH4.1.2
the Map side setup function is as [1]: It works well in my eclipse indigo, howevet it goes wrong when I run it in cli: The exeception in one of the containers refers to[2]. How could I solve this exception? Regards [1]------------------------------------------- @Override public void setup(Context context){ try { //add DistributedCache files to the Mapper. //this DistributedCache files are on the HDFS URI[] cacheFiles = DistributedCache.getCacheFiles(context.getConfiguration()); if (cacheFiles != null && cacheFiles.length > 0){ Path path = new Path(cacheFiles[0].toString()); fs = FileSystem.get(path.toUri(),context.getConfiguration()); in = fs.open(path); String line ; String[] tokens ; while ((line = in.readLine()) != null ) { tokens = StringUtils.split(line,'|'); if (tokens != null && tokens.length >= 3){ joinData.put(tokens[0], tokens[1]);//key:ifid ;value:cmtsid } } } } catch (IOException e) { e.printStackTrace(); }finally{ try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } [2]---------------------------- 2013-05-13 16:11:08,444 INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Downloading public rsrc:{ hdfs://Hadoop01:8040/user/hadoop/sqoop/CMTSIFTABLE/part-m-00000, 1366969672048, FILE } 2013-05-13 16:11:08,446 INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Created localizer for container_1366976077492_0022_01_000001 2013-05-13 16:11:08,446 INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Failed to download rsrc { { hdfs://Hadoop01:8040/user/hadoop/sqoop/CMTSIFTABLE/part-m-00000, 1366969672048, FILE },pending,[(container_1366976077492_0022_01_000001)],9096622200855127,DOWNLOADING} java.io.FileNotFoundException: File /tmp/nm-local-dir/filecache does not exist at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:455) at org.apache.hadoop.fs.FileSystem.primitiveMkdir(FileSystem.java:919) at org.apache.hadoop.fs.DelegateToFileSystem.mkdir(DelegateToFileSystem.java:143) at org.apache.hadoop.fs.FilterFs.mkdir(FilterFs.java:189) at org.apache.hadoop.fs.FileContext$4.next(FileContext.java:706) at org.apache.hadoop.fs.FileContext$4.next(FileContext.java:703) at org.apache.hadoop.fs.FileContext$FSLinkResolver.resolve(FileContext.java:2333) at org.apache.hadoop.fs.FileContext.mkdir(FileContext.java:703) at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:147) at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:49) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) at java.util.concurrent.FutureTask.run(FutureTask.java:138) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441) at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) at java.util.concurrent.FutureTask.run(FutureTask.java:138) at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908) at java.lang.Thread.run(Thread.java:662)