Author: daryn Date: Fri Aug 9 19:52:20 2013 New Revision: 1512470 URL: http://svn.apache.org/r1512470 Log: HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn)
Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1512470&r1=1512469&r2=1512470&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt Fri Aug 9 19:52:20 2013 @@ -17,6 +17,8 @@ Release 0.23.10 - UNRELEASED BUG FIXES + HADOOP-9757. Har metadata cache can grow without limit (Cristina Abad via daryn) + Release 0.23.9 - 2013-07-08 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java?rev=1512470&r1=1512469&r2=1512470&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java Fri Aug 9 19:52:20 2013 @@ -24,11 +24,12 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.LinkedHashMap; import java.util.Map; import java.util.TreeMap; import java.util.HashMap; -import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -56,10 +57,12 @@ public class HarFileSystem extends Filte private static final Log LOG = LogFactory.getLog(HarFileSystem.class); + public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries"; + public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10; + public static final int VERSION = 3; - private static final Map<URI, HarMetaData> harMetaCache = - new ConcurrentHashMap<URI, HarMetaData>(); + private static Map<URI, HarMetaData> harMetaCache; // uri representation of this Har filesystem private URI uri; @@ -87,7 +90,14 @@ public class HarFileSystem extends Filte public HarFileSystem(FileSystem fs) { super(fs); } - + + private synchronized void initializeMetadataCache(Configuration conf) { + if (harMetaCache == null) { + int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT); + harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize)); + } + } + /** * Initialize a Har filesystem per har archive. The * archive home directory is the top level directory @@ -102,6 +112,9 @@ public class HarFileSystem extends Filte * to be used in case not specified. */ public void initialize(URI name, Configuration conf) throws IOException { + // initialize the metadata cache, if needed + initializeMetadataCache(conf); + // decode the name URI underLyingURI = decodeHarURI(name, conf); // we got the right har Path- now check if this is @@ -1066,4 +1079,18 @@ public class HarFileSystem extends Filte HarMetaData getMetadata() { return metadata; } + + private static class LruCache<K, V> extends LinkedHashMap<K, V> { + private final int MAX_ENTRIES; + + public LruCache(int maxEntries) { + super(maxEntries + 1, 1.0f, true); + MAX_ENTRIES = maxEntries; + } + + @Override + protected boolean removeEldestEntry(Map.Entry<K, V> eldest) { + return size() > MAX_ENTRIES; + } + } } Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java?rev=1512470&r1=1512469&r2=1512470&view=diff ============================================================================== --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java Fri Aug 9 19:52:20 2013 @@ -82,7 +82,7 @@ public class TestHarFileSystemBasics { localFileSystem.createNewFile(masterIndexPath); assertTrue(localFileSystem.exists(masterIndexPath)); - writeVersionToMasterIndexImpl(HarFileSystem.VERSION); + writeVersionToMasterIndexImpl(HarFileSystem.VERSION, masterIndexPath); final HarFileSystem harFileSystem = new HarFileSystem(localFileSystem); final URI uri = new URI("har://" + harPath.toString()); @@ -90,8 +90,25 @@ public class TestHarFileSystemBasics { return harFileSystem; } - private void writeVersionToMasterIndexImpl(int version) throws IOException { - final Path masterIndexPath = new Path(harPath, "_masterindex"); + private HarFileSystem createHarFileSystem(final Configuration conf, Path aHarPath) + throws Exception { + localFileSystem.mkdirs(aHarPath); + final Path indexPath = new Path(aHarPath, "_index"); + final Path masterIndexPath = new Path(aHarPath, "_masterindex"); + localFileSystem.createNewFile(indexPath); + assertTrue(localFileSystem.exists(indexPath)); + localFileSystem.createNewFile(masterIndexPath); + assertTrue(localFileSystem.exists(masterIndexPath)); + + writeVersionToMasterIndexImpl(HarFileSystem.VERSION, masterIndexPath); + + final HarFileSystem harFileSystem = new HarFileSystem(localFileSystem); + final URI uri = new URI("har://" + aHarPath.toString()); + harFileSystem.initialize(uri, conf); + return harFileSystem; + } + + private void writeVersionToMasterIndexImpl(int version, Path masterIndexPath) throws IOException { // write Har version into the master index: final FSDataOutputStream fsdos = localFileSystem.create(masterIndexPath); try { @@ -173,6 +190,29 @@ public class TestHarFileSystemBasics { } @Test + public void testPositiveLruMetadataCacheFs() throws Exception { + // Init 2nd har file system on the same underlying FS, so the + // metadata gets reused: + HarFileSystem hfs = new HarFileSystem(localFileSystem); + URI uri = new URI("har://" + harPath.toString()); + hfs.initialize(uri, new Configuration()); + // the metadata should be reused from cache: + assertTrue(hfs.getMetadata() == harFileSystem.getMetadata()); + + // Create more hars, until the cache is full + 1; the last creation should evict the first entry from the cache + for (int i = 0; i <= hfs.METADATA_CACHE_ENTRIES_DEFAULT; i++) { + Path p = new Path(rootPath, "path1/path2/my" + i +".har"); + createHarFileSystem(conf, p); + } + + // The first entry should not be in the cache anymore: + hfs = new HarFileSystem(localFileSystem); + uri = new URI("har://" + harPath.toString()); + hfs.initialize(uri, new Configuration()); + assertTrue(hfs.getMetadata() != harFileSystem.getMetadata()); + } + + @Test public void testPositiveInitWithoutUnderlyingFS() throws Exception { // Init HarFS with no constructor arg, so that the underlying FS object // is created on demand or got from cache in #initialize() method. @@ -218,7 +258,7 @@ public class TestHarFileSystemBasics { // time with 1 second accuracy: Thread.sleep(1000); // write an unsupported version: - writeVersionToMasterIndexImpl(7777); + writeVersionToMasterIndexImpl(7777, new Path(harPath, "_masterindex")); // init the Har: final HarFileSystem hfs = new HarFileSystem(localFileSystem);