[partial-ns] Implement LevelDB-based fsimage.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/23dbb0f9 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/23dbb0f9 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/23dbb0f9 Branch: refs/heads/feature-HDFS-8286 Commit: 23dbb0f9f5046ac508ba09ca960f1466fb552c61 Parents: 2d7972d Author: Haohui Mai <whe...@apache.org> Authored: Thu May 28 16:49:13 2015 -0700 Committer: Haohui Mai <whe...@apache.org> Committed: Fri Jun 12 13:57:01 2015 -0700 ---------------------------------------------------------------------- .../hdfs/server/namenode/FSDirectory.java | 32 ++++- .../server/namenode/LevelDBChildrenView.java | 75 +++++++++++ .../server/namenode/LevelDBROTransaction.java | 129 +++++++++++++++++++ .../server/namenode/LevelDBRWTransaction.java | 84 ++++++++++++ .../namenode/LevelDBReplayTransaction.java | 85 ++++++++++++ 5 files changed, 399 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/23dbb0f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 8744916..0e50d8c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.XAttrHelper; +import org.apache.hadoop.hdfs.hdfsdb.Options; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.EncryptionZone; @@ -100,7 +101,6 @@ import static org.apache.hadoop.util.Time.now; @InterfaceAudience.Private public class FSDirectory implements Closeable { static final Logger LOG = LoggerFactory.getLogger(FSDirectory.class); - private static INodeDirectory createRoot(FSNamesystem namesystem) { final INodeDirectory r = new INodeDirectory( ROOT_INODE_ID, @@ -184,6 +184,12 @@ public class FSDirectory implements Closeable { private final FSEditLog editLog; private INodeAttributeProvider attributeProvider; + private final boolean enableLevelDb; + private final org.apache.hadoop.hdfs.hdfsdb.DB levelDb; + + org.apache.hadoop.hdfs.hdfsdb.DB getLevelDb() { + return levelDb; + } public void setINodeAttributeProvider(INodeAttributeProvider provider) { attributeProvider = provider; @@ -244,11 +250,12 @@ public class FSDirectory implements Closeable { } RWTransaction newRWTransaction() { - return new RWTransaction(this); + return enableLevelDb ? new LevelDBRWTransaction(this) : new RWTransaction(this); } public ROTransaction newROTransaction() { - return new ROTransaction(db()); + return enableLevelDb ? new LevelDBROTransaction(this, levelDb) + : new ROTransaction(this); } public ReplayTransaction newReplayTransaction() { @@ -338,9 +345,22 @@ public class FSDirectory implements Closeable { namesystem = ns; this.editLog = ns.getEditLog(); ezManager = new EncryptionZoneManager(this, conf); - this.db = new DB(dirLock); - // TODO: Load fsimage - db.addRoot(createRootForFlatNS(ns)); + this.enableLevelDb = conf.getBoolean("dfs.partialns", false); + if (enableLevelDb) { + String dbPath = conf.get("dfs.partialns.path"); + Options options = new Options().createIfMissing(true); + this.levelDb = org.apache.hadoop.hdfs.hdfsdb.DB.open(options, dbPath); + try (RWTransaction tx = newRWTransaction().begin()) { + tx.putINode(ROOT_INODE_ID, createRootForFlatNS(ns)); + tx.commit(); + } + this.db = null; + } else { + this.db = new DB(dirLock); + // TODO: Load fsimage + db.addRoot(createRootForFlatNS(ns)); + this.levelDb = null; + } } FSNamesystem getFSNamesystem() { http://git-wip-us.apache.org/repos/asf/hadoop/blob/23dbb0f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBChildrenView.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBChildrenView.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBChildrenView.java new file mode 100644 index 0000000..5ddeb66 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBChildrenView.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under oo + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import java.nio.ByteOrder; +import java.util.AbstractMap; +import java.util.Iterator; +import java.util.Map; + +class LevelDBChildrenView extends DBChildrenView { + private final long parentId; + private final org.apache.hadoop.hdfs.hdfsdb.Iterator it; + + LevelDBChildrenView(long parentId, org.apache.hadoop.hdfs.hdfsdb.Iterator it) { + this.parentId = parentId; + this.it = it; + } + + @Override + public int size() { + return 0; + } + + @Override + public void seekTo(ByteBuffer start) { + byte[] key = LevelDBROTransaction.inodeChildKey(parentId, start); + it.seek(key); + } + + @Override + public void close() throws IOException { + it.close(); + } + + @Override + public Iterator<Map.Entry<ByteBuffer, Long>> iterator() { + return new Iterator<Map.Entry<ByteBuffer, Long>>() { + @Override + public boolean hasNext() { + if (!it.hasNext()) { + return false; + } + byte[] key = it.peekNext().getKey(); + return key.length >= 10 && key[9] == 1; + } + + @Override + public Map.Entry<ByteBuffer, Long> next() { + Map.Entry<byte[], byte[]> n = it.next(); + long v = ByteBuffer.wrap(n.getValue()).order(ByteOrder.LITTLE_ENDIAN) + .asLongBuffer().get(); + return new AbstractMap.SimpleImmutableEntry<>( + ByteBuffer.wrap(n.getKey(), 10, n.getKey().length - 10), v); + } + }; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/23dbb0f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBROTransaction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBROTransaction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBROTransaction.java new file mode 100644 index 0000000..f55ed63 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBROTransaction.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hdfs.hdfsdb.*; +import org.apache.hadoop.hdfs.hdfsdb.DB; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static org.apache.hadoop.hdfs.server.namenode.INodeId.INVALID_INODE_ID; + +class LevelDBROTransaction extends ROTransaction { + private final org.apache.hadoop.hdfs.hdfsdb.DB hdfsdb; + private static final ReadOptions OPTIONS = new ReadOptions(); + LevelDBROTransaction(FSDirectory fsd, org.apache.hadoop.hdfs.hdfsdb.DB db) { + super(fsd); + this.hdfsdb = db; + } + + LevelDBROTransaction begin() { + fsd.readLock(); + return this; + } + + @Override + FlatINode getINode(long id) { + return getFlatINode(id, hdfsdb); + } + + @Override + long getChild(long parentId, ByteBuffer localName) { + return getChild(parentId, localName, hdfsdb); + } + + @Override + DBChildrenView childrenView(long parent) { + return getChildrenView(parent, hdfsdb); + } + + static FlatINode getFlatINode( + long id, org.apache.hadoop.hdfs.hdfsdb.DB hdfsdb) { + byte[] key = inodeKey(id); + try { + byte[] bytes = hdfsdb.get(OPTIONS, key); + if (bytes == null) { + return null; + } + return FlatINode.wrap(bytes); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + static byte[] inodeKey(long id) { + return new byte[]{'I', + (byte) ((id >> 56) & 0xff), + (byte) ((id >> 48) & 0xff), + (byte) ((id >> 40) & 0xff), + (byte) ((id >> 32) & 0xff), + (byte) ((id >> 24) & 0xff), + (byte) ((id >> 16) & 0xff), + (byte) ((id >> 8) & 0xff), + (byte) (id & 0xff), + 0 + }; + } + + static long getChild(long parentId, ByteBuffer localName, DB hdfsdb) { + Preconditions.checkArgument(localName.hasRemaining()); + byte[] key = inodeChildKey(parentId, localName); + try { + byte[] bytes = hdfsdb.get(OPTIONS, key); + if (bytes == null) { + return INVALID_INODE_ID; + } + return ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN) + .asLongBuffer().get(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + static byte[] inodeChildKey(long parentId, ByteBuffer localName) { + byte[] key = new byte[10 + localName.remaining()]; + key[0] = 'I'; + for (int i = 0; i < 8; ++i) { + key[1 + i] = (byte) ((parentId >> ((7 - i) * 8)) & 0xff); + } + key[9] = 1; + ByteBuffer.wrap(key, 10, localName.remaining()).put(localName.duplicate()); + return key; + } + + static DBChildrenView getChildrenView(long parent, DB hdfsdb) { + byte[] key = new byte[]{'I', + (byte) ((parent >> 56) & 0xff), + (byte) ((parent >> 48) & 0xff), + (byte) ((parent >> 40) & 0xff), + (byte) ((parent >> 32) & 0xff), + (byte) ((parent >> 24) & 0xff), + (byte) ((parent >> 16) & 0xff), + (byte) ((parent >> 8) & 0xff), + (byte) (parent & 0xff), + 1 + }; + Iterator it = hdfsdb.iterator(OPTIONS); + it.seek(key); + return new LevelDBChildrenView(parent, it); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/23dbb0f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBRWTransaction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBRWTransaction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBRWTransaction.java new file mode 100644 index 0000000..3f14cff --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBRWTransaction.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import com.google.protobuf.ByteString; +import org.apache.hadoop.hdfs.hdfsdb.WriteBatch; +import org.apache.hadoop.hdfs.hdfsdb.WriteOptions; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +class LevelDBRWTransaction extends RWTransaction { + private static final WriteOptions WRITE_OPTIONS = + new WriteOptions().skipWal(true); + private final WriteBatch batch = new WriteBatch(); + private final org.apache.hadoop.hdfs.hdfsdb.DB hdfsdb; + LevelDBRWTransaction(FSDirectory fsd) { + super(fsd); + this.hdfsdb = fsd.getLevelDb(); + } + + @Override + FlatINode getINode(long id) { + return LevelDBROTransaction.getFlatINode(id, hdfsdb); + } + + @Override + long getChild(long parentId, ByteBuffer localName) { + return LevelDBROTransaction.getChild(parentId, localName, hdfsdb); + } + + @Override + DBChildrenView childrenView(long parent) { + return LevelDBROTransaction.getChildrenView(parent, hdfsdb); + } + + @Override + public void close() throws IOException { + super.close(); + batch.close(); + } + + void putINode(long id, ByteString inode) { + batch.put(LevelDBROTransaction.inodeKey(id), inode.toByteArray()); + } + + void putChild(long parentId, ByteBuffer localName, long id) { + byte[] v = new byte[8]; + ByteBuffer.wrap(v).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer().put(id); + batch.put(LevelDBROTransaction.inodeChildKey(parentId, localName), v); + } + + void deleteINode(long inodeId) { + batch.delete(LevelDBROTransaction.inodeKey(inodeId)); + } + + void deleteChild(long parentId, ByteBuffer localName) { + batch.delete(LevelDBROTransaction.inodeChildKey(parentId, localName)); + } + + void commit() { + try { + hdfsdb.write(WRITE_OPTIONS, batch); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/23dbb0f9/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBReplayTransaction.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBReplayTransaction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBReplayTransaction.java new file mode 100644 index 0000000..e1b8eff --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LevelDBReplayTransaction.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import com.google.protobuf.ByteString; +import org.apache.hadoop.hdfs.hdfsdb.WriteBatch; +import org.apache.hadoop.hdfs.hdfsdb.WriteOptions; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +public class LevelDBReplayTransaction extends ReplayTransaction { + private static final WriteOptions WRITE_OPTIONS = + new WriteOptions().skipWal(true); + private final WriteBatch batch = new WriteBatch(); + private final org.apache.hadoop.hdfs.hdfsdb.DB hdfsdb; + LevelDBReplayTransaction(FSDirectory fsd, + org.apache.hadoop.hdfs.hdfsdb.DB hdfsdb) { + super(fsd); + this.hdfsdb = hdfsdb; + } + + @Override + FlatINode getINode(long id) { + return LevelDBROTransaction.getFlatINode(id, hdfsdb); + } + + @Override + long getChild(long parentId, ByteBuffer localName) { + return LevelDBROTransaction.getChild(parentId, localName, hdfsdb); + } + + @Override + DBChildrenView childrenView(long parent) { + return LevelDBROTransaction.getChildrenView(parent, hdfsdb); + } + + @Override + public void close() throws IOException { + super.close(); + batch.close(); + } + + void putINode(long id, ByteString inode) { + batch.put(LevelDBROTransaction.inodeKey(id), inode.toByteArray()); + } + + void putChild(long parentId, ByteBuffer localName, long id) { + byte[] v = new byte[8]; + ByteBuffer.wrap(v).order(ByteOrder.LITTLE_ENDIAN).asLongBuffer().put(id); + batch.put(LevelDBROTransaction.inodeChildKey(parentId, localName), v); + } + + void deleteINode(long inodeId) { + batch.delete(LevelDBROTransaction.inodeKey(inodeId)); + } + + void deleteChild(long parentId, ByteBuffer localName) { + batch.delete(LevelDBROTransaction.inodeChildKey(parentId, localName)); + } + + void commit() { + try { + hdfsdb.write(WRITE_OPTIONS, batch); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +}