[ 
https://issues.apache.org/jira/browse/HADOOP-15891?focusedWorklogId=480589&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-480589
 ]

ASF GitHub Bot logged work on HADOOP-15891:
-------------------------------------------

                Author: ASF GitHub Bot
            Created on: 09/Sep/20 05:01
            Start Date: 09/Sep/20 05:01
    Worklog Time Spent: 10m 
      Work Description: umamaheswararao commented on a change in pull request 
#2185:
URL: https://github.com/apache/hadoop/pull/2185#discussion_r485332518



##########
File path: 
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
##########
@@ -217,6 +239,7 @@ public Path getMountedOnPath() {
   Path homeDir = null;
   private boolean enableInnerCache = false;
   private InnerCache cache;
+  private boolean evictCacheOnClose = false;

Review comment:
       Do you see any issue if we make it true? If no issues, we can simply 
clean it on close right instead of having another config?
   
   Seems like this is an improvement to existing code. If you want, I am ok to 
file small JIRA and fix this cleanup thing.( I am assuming it's not necessarily 
needed with this.)

##########
File path: 
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestRegexMountPoint.java
##########
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.viewfs;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Test Regex Mount Point.
+ */
+public class TestRegexMountPoint {
+  private static final Logger LOGGER =
+      LoggerFactory.getLogger(TestRegexMountPoint.class.getName());
+
+  private InodeTree inodeTree;
+  private Configuration conf;
+
+  class TestRegexMountPointFileSystem {
+    public URI getUri() {
+      return uri;
+    }
+
+    private URI uri;
+
+    TestRegexMountPointFileSystem(URI uri) {
+      String uriStr = uri == null ? "null" : uri.toString();
+      LOGGER.info("Create TestRegexMountPointFileSystem Via URI:" + uriStr);
+      this.uri = uri;
+    }
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    ConfigUtil.addLink(conf, TestRegexMountPoint.class.getName(), "/mnt",
+        URI.create("file:///"));
+
+    inodeTree = new InodeTree<TestRegexMountPointFileSystem>(conf,
+        TestRegexMountPoint.class.getName(), null, false) {
+      @Override
+      protected TestRegexMountPointFileSystem getTargetFileSystem(
+          final URI uri) {
+        return new TestRegexMountPointFileSystem(uri);
+      }
+
+      @Override
+      protected TestRegexMountPointFileSystem getTargetFileSystem(
+          final INodeDir<TestRegexMountPointFileSystem> dir) {
+        return new TestRegexMountPointFileSystem(null);
+      }
+
+      @Override
+      protected TestRegexMountPointFileSystem getTargetFileSystem(
+          final String settings, final URI[] mergeFsURIList) {
+        return new TestRegexMountPointFileSystem(null);
+      }
+    };
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    inodeTree = null;
+  }
+
+  @Test
+  public void testGetVarListInString() throws IOException {
+    String srcRegex = "/(\\w+)";
+    String target = "/$0/${1}/$1/${2}/${2}";
+    RegexMountPoint regexMountPoint =
+        new RegexMountPoint(inodeTree, srcRegex, target, null);
+    regexMountPoint.initialize();
+    Map<String, Set<String>> varMap = regexMountPoint.getVarInDestPathMap();
+    Assert.assertEquals(varMap.size(), 3);
+    Assert.assertEquals(varMap.get("0").size(), 1);
+    Assert.assertTrue(varMap.get("0").contains("$0"));
+    Assert.assertEquals(varMap.get("1").size(), 2);
+    Assert.assertTrue(varMap.get("1").contains("${1}"));
+    Assert.assertTrue(varMap.get("1").contains("$1"));
+    Assert.assertEquals(varMap.get("2").size(), 1);
+    Assert.assertTrue(varMap.get("2").contains("${2}"));
+  }
+
+  @Test
+  public void testResolve() throws IOException {
+    String regexStr = "^/user/(?<username>\\w+)";
+    String dstPathStr = "/namenode1/testResolve/$username";
+    String settingsStr = null;
+    RegexMountPoint regexMountPoint =
+        new RegexMountPoint(inodeTree, regexStr, dstPathStr, settingsStr);
+    regexMountPoint.initialize();
+    InodeTree.ResolveResult resolveResult =
+        regexMountPoint.resolve("/user/hadoop/file1", true);
+    Assert.assertEquals(resolveResult.kind, InodeTree.ResultKind.EXTERNAL_DIR);
+    Assert.assertTrue(
+        resolveResult.targetFileSystem
+            instanceof TestRegexMountPointFileSystem);
+    Assert.assertTrue(resolveResult.resolvedPath.equals("/user/hadoop"));
+    Assert.assertTrue(
+        resolveResult.targetFileSystem
+            instanceof TestRegexMountPointFileSystem);
+    Assert.assertTrue(
+        ((TestRegexMountPointFileSystem) resolveResult.targetFileSystem)
+            .getUri().toString().equals("/namenode1/testResolve/hadoop"));
+    Assert.assertTrue(resolveResult.remainingPath.toString().equals("/file1"));
+  }
+
+  @Test
+  public void testResolveWithInterceptor() throws IOException {
+    String regexStr = "^/user/(?<username>\\w+)";
+    String dstPathStr = "/namenode1/testResolve/$username";
+    // Replace "_" with "-"
+    RegexMountPointResolvedDstPathReplaceInterceptor interceptor =
+        new RegexMountPointResolvedDstPathReplaceInterceptor("_", "-");
+    // replaceresolvedpath:_:-
+    String settingsStr = interceptor.serializeToString();
+    RegexMountPoint regexMountPoint =
+        new RegexMountPoint(inodeTree, regexStr, dstPathStr, settingsStr);
+    regexMountPoint.initialize();
+    InodeTree.ResolveResult resolveResult =
+        regexMountPoint.resolve("/user/hadoop_user1/file_index", true);
+    Assert.assertEquals(resolveResult.kind, InodeTree.ResultKind.EXTERNAL_DIR);
+    Assert.assertTrue(
+        resolveResult.targetFileSystem
+            instanceof TestRegexMountPointFileSystem);
+    Assert.assertTrue(resolveResult.resolvedPath.equals("/user/hadoop_user1"));

Review comment:
       Still this asserts can use assertEquals method? Please use them all 
places.

##########
File path: hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFs.md
##########
@@ -366,6 +366,69 @@ Don't want to change scheme or difficult to copy 
mount-table configurations to a
 
 Please refer to the [View File System Overload Scheme 
Guide](./ViewFsOverloadScheme.html)
 
+Regex Pattern Based Mount Points
+--------------------------------
+
+The view file system mount points were a Key-Value based mapping system. It is 
not friendly for user cases which mapping config could be abstracted to rules. 
E.g. Users want to provide a GCS bucket per user and there might be thousands 
of users in total. The old key-value based approach won't work well for several 
reasons:
+
+1. The mount table is used by FileSystem clients. There's a cost to spread the 
config to all clients and we should avoid it if possible. The [View File System 
Overload Scheme Guide](./ViewFsOverloadScheme.html) could help the distribution 
by central mount table management. But the mount table still have to be updated 
on every change. The change could be greatly avoided if provide a rule-based 
mount table.
+
+2. The client have to understand all the KVs in the mount table. This is not 
ideal when the mountable grows to thousands of items. E.g. thousands of file 
systems might be initialized even users only need one. And the config itself 
will become bloated at scale.
+
+### Understand the Difference
+
+In the key-value based mount table, view file system treats every mount point 
as a partition. There's several file system APIs which will lead to operation 
on all partitions. E.g. there's an HDFS cluster with multiple mount. Users want 
to run “hadoop fs -put file viewfs://hdfs.namenode.apache.org/tmp/” cmd to copy 
data from local disk to our HDFS cluster. The cmd will trigger ViewFileSystem 
to call setVerifyChecksum() method which will initialize the file system for 
every mount point.
+For a regex-base rule mount table entry, we couldn't know what's corresponding 
path until parsing. So the regex based mount table entry will be ignored on 
such cases. The file system (ChRootedFileSystem) will be created upon 
accessing. But the underlying file system will be cached by inner cache of 
ViewFileSystem.

Review comment:
       >For a regex-base rule mount table entry, we couldn't know what's 
corresponding path until parsing. 
   Whatever we know should be added to mountPoints? So, that getMountPoints 
will return known fs-es? It may be a good idea to add Java doc on API level. 
   I am ok to have this in followup JIRA to cover all of this aspects.

##########
File path: 
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemLinkRegex.java
##########
@@ -0,0 +1,473 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.viewfs;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
+import org.apache.hadoop.fs.FsConstants;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static 
org.apache.hadoop.fs.viewfs.RegexMountPoint.INTERCEPTOR_INTERNAL_SEP;
+import static org.junit.Assert.assertSame;
+
+/**
+ * Test linkRegex node type for view file system.
+ */
+public class TestViewFileSystemLinkRegex extends ViewFileSystemBaseTest {
+  public static final Logger LOGGER =
+      LoggerFactory.getLogger(TestViewFileSystemLinkRegex.class);
+
+  private static FileSystem fsDefault;
+  private static MiniDFSCluster cluster;
+  private static Configuration clusterConfig;
+  private static final int NAME_SPACES_COUNT = 3;
+  private static final int DATA_NODES_COUNT = 3;
+  private static final int FS_INDEX_DEFAULT = 0;
+  private static final FileSystem[] FS_HDFS = new 
FileSystem[NAME_SPACES_COUNT];
+  private static final String CLUSTER_NAME =
+      "TestViewFileSystemLinkRegexCluster";
+  private static final File TEST_DIR = GenericTestUtils
+      .getTestDir(TestViewFileSystemLinkRegex.class.getSimpleName());
+  private static final String TEST_BASE_PATH =
+      "/tmp/TestViewFileSystemLinkRegex";
+
+  @Override
+  protected FileSystemTestHelper createFileSystemHelper() {
+    return new FileSystemTestHelper(TEST_BASE_PATH);
+  }
+
+  @BeforeClass
+  public static void clusterSetupAtBeginning() throws IOException {
+    SupportsBlocks = true;
+    clusterConfig = ViewFileSystemTestSetup.createConfig();
+    clusterConfig.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
+        true);
+    cluster = new MiniDFSCluster.Builder(clusterConfig).nnTopology(
+        MiniDFSNNTopology.simpleFederatedTopology(NAME_SPACES_COUNT))
+        .numDataNodes(DATA_NODES_COUNT).build();
+    cluster.waitClusterUp();
+
+    for (int i = 0; i < NAME_SPACES_COUNT; i++) {
+      FS_HDFS[i] = cluster.getFileSystem(i);
+    }
+    fsDefault = FS_HDFS[FS_INDEX_DEFAULT];
+  }
+
+  @AfterClass
+  public static void clusterShutdownAtEnd() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    fsTarget = fsDefault;
+    super.setUp();
+  }
+
+  /**
+   * Override this so that we don't set the targetTestRoot to any path under 
the
+   * root of the FS, and so that we don't try to delete the test dir, but 
rather
+   * only its contents.
+   */
+  @Override
+  void initializeTargetTestRoot() throws IOException {
+    targetTestRoot = fsDefault.makeQualified(new Path("/"));
+    for (FileStatus status : fsDefault.listStatus(targetTestRoot)) {
+      fsDefault.delete(status.getPath(), true);
+    }
+  }
+
+  @Override
+  void setupMountPoints() {
+    super.setupMountPoints();
+  }
+
+  @Override
+  int getExpectedDelegationTokenCount() {
+    return 1; // all point to the same fs so 1 unique token
+  }
+
+  @Override
+  int getExpectedDelegationTokenCountWithCredentials() {
+    return 1;
+  }
+
+  public String buildReplaceInterceptorSettingString(String srcRegex,
+      String replaceString) {
+    return
+        
RegexMountPointInterceptorType.REPLACE_RESOLVED_DST_PATH.getConfigName()
+            + INTERCEPTOR_INTERNAL_SEP + srcRegex + INTERCEPTOR_INTERNAL_SEP
+            + replaceString;
+  }
+
+  public String linkInterceptorSettings(
+      List<String> interceptorSettingStrList) {
+    StringBuilder stringBuilder = new StringBuilder();
+    int listSize = interceptorSettingStrList.size();
+    for (int i = 0; i < listSize; ++i) {
+      stringBuilder.append(interceptorSettingStrList.get(i));
+      if (i < listSize - 1) {
+        stringBuilder.append(RegexMountPoint.INTERCEPTOR_SEP);
+      }
+    }
+    return stringBuilder.toString();
+  }
+
+  private void createDirWithChildren(
+      FileSystem fileSystem, Path dir, List<Path> childrenFiles)
+      throws IOException {
+    Assert.assertTrue(fileSystem.mkdirs(dir));
+    int index = 0;
+    for (Path childFile : childrenFiles) {
+      createFile(fileSystem, childFile, index, true);
+    }
+  }
+
+  private void createFile(
+      FileSystem fileSystem, Path file, int dataLenToWrite, boolean overwrite)
+      throws IOException {
+    FSDataOutputStream outputStream = null;
+    try {
+      outputStream = fileSystem.create(file, overwrite);
+      for (int i = 0; i < dataLenToWrite; ++i) {
+        outputStream.writeByte(i);
+      }
+      outputStream.close();
+    } finally {
+      if (outputStream != null) {
+        outputStream.close();
+      }
+    }
+  }
+
+  private void createDirWithChildren(
+      FileSystem fileSystem, Path dir, int childrenFilesCnt)
+      throws IOException {
+    List<Path> childrenFiles = new ArrayList<>(childrenFilesCnt);
+    for (int i = 0; i < childrenFilesCnt; ++i) {
+      childrenFiles.add(new Path(dir, "file" + i));
+    }
+    createDirWithChildren(fileSystem, dir, childrenFiles);
+  }
+
+  /**
+   * The function used to test regex mountpoints.
+   * @param config - get mountable config from this conf
+   * @param regexStr - the src path regex expression that applies to this 
config
+   * @param dstPathStr - the string of target path
+   * @param interceptorSettings - the serialized interceptor string to be
+   *                           applied while resolving the mapping
+   * @param dirPathBeforeMountPoint - the src path user passed in to be mapped.
+   * @param expectedResolveResult - the expected path after resolve
+   *                             dirPathBeforeMountPoint via regex mountpint.
+   * @param childrenFilesCnt - the child files under dirPathBeforeMountPoint to
+   *                         be created
+   * @throws IOException
+   * @throws URISyntaxException
+   */
+  private void testRegexMountpoint(
+      Configuration config,
+      String regexStr,
+      String dstPathStr,
+      String interceptorSettings,
+      Path dirPathBeforeMountPoint,
+      Path expectedResolveResult,
+      int childrenFilesCnt)
+      throws IOException, URISyntaxException {
+    FileSystem vfs = null;
+    try {
+      // Set up test env
+      createDirWithChildren(
+          fsTarget, expectedResolveResult, childrenFilesCnt);
+      ConfigUtil.addLinkRegex(
+          config, CLUSTER_NAME, regexStr, dstPathStr, interceptorSettings);
+
+      // Asserts
+      URI viewFsUri = new URI(
+          FsConstants.VIEWFS_SCHEME, CLUSTER_NAME, "/", null, null);
+      vfs = FileSystem.get(viewFsUri, config);

Review comment:
       small suggestion here:
   Move above lines out of try block. Then use try(FileSystem vfs = = 
FileSystem.get(viewFsUri, config)){
   
   }
   This should close automatically after block. So, we can remove finally block 
below?

##########
File path: 
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java
##########
@@ -86,12 +86,21 @@
    */
   String CONFIG_VIEWFS_LINK_MERGE_SLASH = "linkMergeSlash";
 
+  /**
+   * Config variable for specifying a regex link which uses regular expressions
+   * as source and target could use group captured in src.
+   * E.g. (^/(?<firstDir>\\w+), /prefix-${firstDir}) =>
+   *   (/path1/file1 => /prefix-path1/file1)
+   */
+  String CONFIG_VIEWFS_LINK_REGEX = "linkRegex";
+
   FsPermission PERMISSION_555 = new FsPermission((short) 0555);
 
   String CONFIG_VIEWFS_RENAME_STRATEGY = "fs.viewfs.rename.strategy";
 
   /**
    * Enable ViewFileSystem to cache all children filesystems in inner cache.

Review comment:
       Is the below comment is still valid?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 480589)
    Time Spent: 4h 50m  (was: 4h 40m)

> Provide Regex Based Mount Point In Inode Tree
> ---------------------------------------------
>
>                 Key: HADOOP-15891
>                 URL: https://issues.apache.org/jira/browse/HADOOP-15891
>             Project: Hadoop Common
>          Issue Type: New Feature
>          Components: viewfs
>            Reporter: zhenzhao wang
>            Assignee: zhenzhao wang
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: HADOOP-15891.015.patch, HDFS-13948.001.patch, 
> HDFS-13948.002.patch, HDFS-13948.003.patch, HDFS-13948.004.patch, 
> HDFS-13948.005.patch, HDFS-13948.006.patch, HDFS-13948.007.patch, 
> HDFS-13948.008.patch, HDFS-13948.009.patch, HDFS-13948.011.patch, 
> HDFS-13948.012.patch, HDFS-13948.013.patch, HDFS-13948.014.patch, HDFS-13948_ 
> Regex Link Type In Mont Table-V0.pdf, HDFS-13948_ Regex Link Type In Mount 
> Table-v1.pdf
>
>          Time Spent: 4h 50m
>  Remaining Estimate: 0h
>
> This jira is created to support regex based mount point in Inode Tree. We 
> noticed that mount point only support fixed target path. However, we might 
> have user cases when target needs to refer some fields from source. e.g. We 
> might want a mapping of /cluster1/user1 => /cluster1-dc1/user-nn-user1, we 
> want to refer `cluster` and `user` field in source to construct target. It's 
> impossible to archive this with current link type. Though we could set 
> one-to-one mapping, the mount table would become bloated if we have thousands 
> of users. Besides, a regex mapping would empower us more flexibility. So we 
> are going to build a regex based mount point which target could refer groups 
> from src regex mapping. 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to