[ https://issues.apache.org/jira/browse/HDDS-1461?focusedWorklogId=240312&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-240312 ]
ASF GitHub Bot logged work on HDDS-1461: ---------------------------------------- Author: ASF GitHub Bot Created on: 10/May/19 16:22 Start Date: 10/May/19 16:22 Worklog Time Spent: 10m Work Description: lokeshj1703 commented on pull request #782: HDDS-1461. Optimize listStatus api in OzoneFileSystem URL: https://github.com/apache/hadoop/pull/782#discussion_r282951671 ########## File path: hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java ########## @@ -1546,6 +1552,101 @@ public OmKeyInfo lookupFile(OmKeyArgs args) throws IOException { ResultCodes.NOT_A_FILE); } + /** + * List the status for a file or a directory and its contents. + * + * @param args Key args + * @param recursive For a directory if true all the descendants of a + * particular directory are listed + * @param startKey Key from which listing needs to start. If startKey exists + * its status is included in the final list. + * @param numEntries Number of entries to list from the start key + * @return list of file status + */ + public List<OzoneFileStatus> listStatus(OmKeyArgs args, boolean recursive, + String startKey, long numEntries) throws IOException { + Preconditions.checkNotNull(args, "Key args can not be null"); + String volumeName = args.getVolumeName(); + String bucketName = args.getBucketName(); + String keyName = args.getKeyName(); + + List<OzoneFileStatus> fileStatusList = new ArrayList<>(); + try { + metadataManager.getLock().acquireBucketLock(volumeName, bucketName); + if (Strings.isNullOrEmpty(startKey)) { + OzoneFileStatus fileStatus = getFileStatus(args); + if (fileStatus.isFile()) { + return Collections.singletonList(fileStatus); + } + startKey = OzoneFSUtils.addTrailingSlashIfNeeded(keyName); + } + + String seekKeyInDb = + metadataManager.getOzoneKey(volumeName, bucketName, startKey); + String keyInDb = OzoneFSUtils.addTrailingSlashIfNeeded( + metadataManager.getOzoneKey(volumeName, bucketName, keyName)); + TableIterator<String, ? extends Table.KeyValue<String, OmKeyInfo>> + iterator = metadataManager.getKeyTable().iterator(); + iterator.seek(seekKeyInDb); + + if (!iterator.hasNext()) { + return Collections.emptyList(); + } + + if (iterator.key().equals(keyInDb)) { + // skip the key which needs to be listed + iterator.next(); + } + + while (iterator.hasNext() && numEntries - fileStatusList.size() > 0) { + String entryInDb = iterator.key(); + OmKeyInfo value = iterator.value().getValue(); + if (entryInDb.startsWith(keyInDb)) { + String entryKeyName = value.getKeyName(); + if (recursive) { + // for recursive list all the entries + fileStatusList.add(new OzoneFileStatus(value, scmBlockSize, + !OzoneFSUtils.isFile(entryKeyName))); + iterator.next(); + } else { + // get the child of the directory to list from the entry. For + // example if directory to list is /a and entry is /a/b/c where + // c is a file. The immediate child is b which is a directory. c + // should not be listed as child of a. + String immediateChild = OzoneFSUtils + .getImmediateChild(entryKeyName, keyName); + boolean isFile = OzoneFSUtils.isFile(immediateChild); + if (isFile) { + fileStatusList + .add(new OzoneFileStatus(value, scmBlockSize, !isFile)); + iterator.next(); + } else { + // if entry is a directory + fileStatusList.add(new OzoneFileStatus(immediateChild)); + // skip the other descendants of this child directory. + iterator.seek( + getNextGreaterString(volumeName, bucketName, immediateChild)); + } + } + } else { + break; + } + } + } finally { + metadataManager.getLock().releaseBucketLock(volumeName, bucketName); + } + return fileStatusList; + } + + private String getNextGreaterString(String volumeName, String bucketName, + String keyPrefix) { + // TODO: Use string codec + // Increment the last character of the string and return the new ozone key. + String nextPrefix = keyPrefix.substring(0, keyPrefix.length() - 1) + + String.valueOf((char) (keyPrefix.charAt(keyPrefix.length() - 1) + 1)); Review comment: The first character in the ASCII table would not work in our case. The second commit uses codec of RDBStore for handling the UTF-8 case. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 240312) Time Spent: 40m (was: 0.5h) > Optimize listStatus api in OzoneFileSystem > ------------------------------------------ > > Key: HDDS-1461 > URL: https://issues.apache.org/jira/browse/HDDS-1461 > Project: Hadoop Distributed Data Store > Issue Type: Sub-task > Components: Ozone Filesystem, Ozone Manager > Reporter: Lokesh Jain > Assignee: Lokesh Jain > Priority: Major > Labels: pull-request-available > Time Spent: 40m > Remaining Estimate: 0h > > Currently in listStatus we make multiple getFileStatus calls. This can be > optimized by converting to a single rpc call for listStatus. > Also currently listStatus has to traverse a directory recursively in order to > list its immediate children. This happens because in OzoneManager all the > metadata is stored in rocksdb sorted on keynames. The Jira also aims to fix > this by using seek api provided by rocksdb. -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org