[ 
https://issues.apache.org/jira/browse/HDFS-16855?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17640039#comment-17640039
 ] 

ASF GitHub Bot commented on HDFS-16855:
---------------------------------------

dingshun3016 opened a new pull request, #5170:
URL: https://github.com/apache/hadoop/pull/5170

   When patching the datanode's fine-grained lock, we found that the datanode 
couldn't start,maybe happened deadlock,when addBlockPool, so we can remove it.
   
   
org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl#addBlockPool
    get writeLock
   
   
org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl#deepCopyReplica
  need readLock
   
   because it is not the same thread, so the write lock cannot be downgraded to 
a read lock




> Remove the redundant write lock in addBlockPool
> -----------------------------------------------
>
>                 Key: HDFS-16855
>                 URL: https://issues.apache.org/jira/browse/HDFS-16855
>             Project: Hadoop HDFS
>          Issue Type: Bug
>            Reporter: dingshun
>            Priority: Major
>
> When patching the datanode's fine-grained lock, we found that the datanode 
> couldn't start,maybe happened deadlock,when addBlockPool, so we can remove it.
> {code:java}
> // getspaceused classname
>   <property>
>     <name>fs.getspaceused.classname</name>
>     
> <value>org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.ReplicaCachingGetSpaceUsed</value>
>   </property> {code}
> {code:java}
> // 
> org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl#addBlockPool
>  
> // get writeLock
> @Override
> public void addBlockPool(String bpid, Configuration conf)
>     throws IOException {
>   LOG.info("Adding block pool " + bpid);
>   AddBlockPoolException volumeExceptions = new AddBlockPoolException();
>   try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.BLOCK_POOl, 
> bpid)) {
>     try {
>       volumes.addBlockPool(bpid, conf);
>     } catch (AddBlockPoolException e) {
>       volumeExceptions.mergeException(e);
>     }
>     volumeMap.initBlockPool(bpid);
>     Set<String> vols = storageMap.keySet();
>     for (String v : vols) {
>       lockManager.addLock(LockLevel.VOLUME, bpid, v);
>     }
>   }
>  
> } {code}
> {code:java}
> // 
> org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl#deepCopyReplica
> // need readLock
> void replicas(String bpid, Consumer<Iterator<ReplicaInfo>> consumer) {
>   LightWeightResizableGSet<Block, ReplicaInfo> m = null;
>   try (AutoCloseDataSetLock l = lockManager.readLock(LockLevel.BLOCK_POOl, 
> bpid)) {
>     m = map.get(bpid);
>     if (m !=null) {
>       m.getIterator(consumer);
>     }
>   }
> } {code}
>  
> because it is not the same thread, so the write lock cannot be downgraded to 
> a read lock
> {code:java}
> void addBlockPool(final String bpid, final Configuration conf) throws 
> IOException {
>   long totalStartTime = Time.monotonicNow();
>   final Map<FsVolumeSpi, IOException> unhealthyDataDirs =
>       new ConcurrentHashMap<FsVolumeSpi, IOException>();
>   List<Thread> blockPoolAddingThreads = new ArrayList<Thread>();
>   for (final FsVolumeImpl v : volumes) {
>     Thread t = new Thread() {
>       public void run() {
>         try (FsVolumeReference ref = v.obtainReference()) {
>           FsDatasetImpl.LOG.info("Scanning block pool " + bpid +
>               " on volume " + v + "...");
>           long startTime = Time.monotonicNow();
>           v.addBlockPool(bpid, conf);
>           long timeTaken = Time.monotonicNow() - startTime;
>           FsDatasetImpl.LOG.info("Time taken to scan block pool " + bpid +
>               " on " + v + ": " + timeTaken + "ms");
>         } catch (IOException ioe) {
>           FsDatasetImpl.LOG.info("Caught exception while scanning " + v +
>               ". Will throw later.", ioe);
>           unhealthyDataDirs.put(v, ioe);
>         }
>       }
>     };
>     blockPoolAddingThreads.add(t);
>     t.start();
>   }
>   for (Thread t : blockPoolAddingThreads) {
>     try {
>       t.join();
>     } catch (InterruptedException ie) {
>       throw new IOException(ie);
>     }
>   }
> } {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to