[ 
https://issues.apache.org/jira/browse/HADOOP-19343?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17956490#comment-17956490
 ] 

ASF GitHub Bot commented on HADOOP-19343:
-----------------------------------------

arunkumarchacko commented on code in PR #7721:
URL: https://github.com/apache/hadoop/pull/7721#discussion_r2131402326


##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ApiErrorExtractor.java:
##########
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.gs;
+
+import com.google.api.client.googleapis.json.GoogleJsonError;
+import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo;
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpResponseException;
+import com.google.api.client.http.HttpStatusCodes;
+import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList;
+import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables;
+import java.io.IOException;
+import java.util.List;
+import javax.annotation.Nullable;
+
+/**
+ * Translates exceptions from API calls into higher-level meaning, while 
allowing injectability for
+ * testing how API errors are handled.
+ */
+class ApiErrorExtractor {
+
+  /** Singleton instance of the ApiErrorExtractor. */
+  public static final ApiErrorExtractor INSTANCE = new ApiErrorExtractor();
+
+  public static final int STATUS_CODE_RANGE_NOT_SATISFIABLE = 416;
+
+  public static final String GLOBAL_DOMAIN = "global";
+  public static final String USAGE_LIMITS_DOMAIN = "usageLimits";
+
+  public static final String RATE_LIMITED_REASON = "rateLimitExceeded";
+  public static final String USER_RATE_LIMITED_REASON = 
"userRateLimitExceeded";
+
+  public static final String QUOTA_EXCEEDED_REASON = "quotaExceeded";
+
+  // These come with "The account for ... has been disabled" message.
+  public static final String ACCOUNT_DISABLED_REASON = "accountDisabled";
+
+  // These come with "Project marked for deletion" message.
+  public static final String ACCESS_NOT_CONFIGURED_REASON = 
"accessNotConfigured";
+
+  // These are 400 error codes with "resource 'xyz' is not ready" message.
+  // These sometimes happens when create operation is still in-flight but 
resource
+  // representation is already available via get call.
+  // Only explanation I could find for this is described here:
+  //    java/com/google/cloud/cluster/data/cognac/cognac.proto

Review Comment:
   Removed.



##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java:
##########
@@ -86,4 +122,256 @@ void close() {
       gcs = null;
     }
   }
+
+  public FileInfo getFileInfo(URI path) throws IOException {
+    checkArgument(path != null, "path must not be null");
+    // Validate the given path. true == allow empty object name.
+    // One should be able to get info about top level directory (== bucket),
+    // therefore we allow object name to be empty.
+    StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true);
+    FileInfo fileInfo =
+        FileInfo.fromItemInfo(
+            getFileInfoInternal(resourceId, /* inferImplicitDirectories= */ 
true));
+    LOG.trace("getFileInfo(path: {}): {}", path, fileInfo);
+    return fileInfo;
+  }
+
+  private GoogleCloudStorageItemInfo getFileInfoInternal(
+      StorageResourceId resourceId,
+      boolean inferImplicitDirectories)
+      throws IOException {
+    if (resourceId.isRoot() || resourceId.isBucket()) {
+      return gcs.getItemInfo(resourceId);
+    }
+
+    StorageResourceId dirId = resourceId.toDirectoryId();
+    if (!resourceId.isDirectory()) {
+      GoogleCloudStorageItemInfo itemInfo = gcs.getItemInfo(resourceId);
+      if (itemInfo.exists()) {
+        return itemInfo;
+      }
+
+      if (inferImplicitDirectories) {
+        // TODO: Set max result
+        List<GoogleCloudStorageItemInfo> listDirResult = gcs.listObjectInfo(
+            resourceId.getBucketName(),
+            resourceId.getObjectName(),
+            GET_FILE_INFO_LIST_OPTIONS);
+        LOG.info("List for getMetadat returned {}. {}", listDirResult.size(), 
listDirResult);

Review Comment:
   Done



##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateBucketOptions.java:
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.gs;
+
+import java.time.Duration;
+
+final class CreateBucketOptions {
+  static final CreateBucketOptions DEFAULT = new Builder().build(); // TODO: 
Make sure the defaults
+  private final String location;
+  private final String storageClass;
+  private final Duration ttl;
+  private final String projectId;
+
+  private CreateBucketOptions(Builder builder) {
+    this.location = builder.location;
+    this.storageClass = builder.storageClass;
+    this.ttl = builder.ttl;
+    this.projectId = builder.projectId;
+  }
+
+  public String getLocation() {
+    return location;
+  }
+
+  public String getStorageClass() {
+    return storageClass;
+  }
+
+  public Duration getTtl() { // Changed return type to Duration

Review Comment:
   Done



##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java:
##########
@@ -402,18 +465,29 @@ public Path getWorkingDirectory() {
   }
 
   @Override
-  public boolean mkdirs(final Path path, final FsPermission fsPermission) 
throws IOException {
-    LOG.trace("mkdirs({})", path);
-    throw new UnsupportedOperationException(path.toString());
-  }
+  public boolean mkdirs(final Path hadoopPath, final FsPermission permission) 
throws IOException {
+    checkArgument(hadoopPath != null, "hadoopPath must not be null");
 
-//  /**
-//   * Gets the default replication factor.
-//   */
-//  @Override
-//  public short getDefaultReplication() {
-//    return REPLICATION_FACTOR_DEFAULT;
-//  }
+    checkOpen();
+
+    URI gcsPath = getGcsPath(hadoopPath);
+    try {
+      getGcsFs().mkdirs(gcsPath);
+    } catch (java.nio.file.FileAlreadyExistsException faee) {
+      // Need to convert to the Hadoop flavor of FileAlreadyExistsException.
+      throw (FileAlreadyExistsException)

Review Comment:
   without the typecast, the code wont compile and fails with error. 
"unreported exception java.lang.Throwable; must be caught or declared to be 
thrown"



##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateBucketOptions.java:
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.gs;
+
+import java.time.Duration;
+
+final class CreateBucketOptions {
+  static final CreateBucketOptions DEFAULT = new Builder().build(); // TODO: 
Make sure the defaults

Review Comment:
   Added more details. The TODO comments will be addressed very soon.



##########
hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/contract/GoogleContract.java:
##########
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.gs.contract;
+
+import org.apache.hadoop.fs.gs.TestConfiguration;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.contract.AbstractBondedFSContract;
+
+/** Contract of GoogleHadoopFileSystem via scheme "gs". */
+public class GoogleContract extends AbstractBondedFSContract {
+  private static final String CONTRACT_XML = "contract/gs.xml";
+
+  public GoogleContract(Configuration conf) {
+    super(conf);
+    addConfResource(CONTRACT_XML);
+    conf.set("fs.contract.test.fs.gs", "gs://arunchacko-oss-test-bucket"); // 
TODO:

Review Comment:
   Sure. will take this up next.
   
   BTW, to run the contract test now, change the bucket name and run from a GCP 
VM after doing "auth login". Also need to have auth-keys.xml  in the test 
resource path.



##########
hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java:
##########
@@ -86,4 +122,256 @@ void close() {
       gcs = null;
     }
   }
+
+  public FileInfo getFileInfo(URI path) throws IOException {
+    checkArgument(path != null, "path must not be null");
+    // Validate the given path. true == allow empty object name.
+    // One should be able to get info about top level directory (== bucket),
+    // therefore we allow object name to be empty.
+    StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true);
+    FileInfo fileInfo =
+        FileInfo.fromItemInfo(
+            getFileInfoInternal(resourceId, /* inferImplicitDirectories= */ 
true));
+    LOG.trace("getFileInfo(path: {}): {}", path, fileInfo);
+    return fileInfo;
+  }
+
+  private GoogleCloudStorageItemInfo getFileInfoInternal(
+      StorageResourceId resourceId,
+      boolean inferImplicitDirectories)
+      throws IOException {
+    if (resourceId.isRoot() || resourceId.isBucket()) {
+      return gcs.getItemInfo(resourceId);
+    }
+
+    StorageResourceId dirId = resourceId.toDirectoryId();
+    if (!resourceId.isDirectory()) {
+      GoogleCloudStorageItemInfo itemInfo = gcs.getItemInfo(resourceId);
+      if (itemInfo.exists()) {
+        return itemInfo;
+      }
+
+      if (inferImplicitDirectories) {
+        // TODO: Set max result
+        List<GoogleCloudStorageItemInfo> listDirResult = gcs.listObjectInfo(
+            resourceId.getBucketName(),
+            resourceId.getObjectName(),
+            GET_FILE_INFO_LIST_OPTIONS);
+        LOG.info("List for getMetadat returned {}. {}", listDirResult.size(), 
listDirResult);
+        if (!listDirResult.isEmpty()) {
+          LOG.info("Get metadata for directory returned non empty{}", 
listDirResult);
+          return 
GoogleCloudStorageItemInfo.createInferredDirectory(resourceId.toDirectoryId());
+        }
+      }
+    }
+
+    List<GoogleCloudStorageItemInfo> listDirInfo = 
ImmutableList.of(gcs.getItemInfo(dirId));
+    if (listDirInfo.isEmpty()) {
+      return GoogleCloudStorageItemInfo.createNotFound(resourceId);
+    }
+    checkState(listDirInfo.size() <= 2, "listed more than 2 objects: '%s'", 
listDirInfo);
+    GoogleCloudStorageItemInfo dirInfo = Iterables.get(listDirInfo, /* 
position= */ 0);
+    checkState(
+        dirInfo.getResourceId().equals(dirId) || !inferImplicitDirectories,
+        "listed wrong object '%s', but should be '%s'",
+        dirInfo.getResourceId(),
+        resourceId);
+    return dirInfo.getResourceId().equals(dirId) && dirInfo.exists()
+        ? dirInfo
+        : GoogleCloudStorageItemInfo.createNotFound(resourceId);
+  }
+
+  public void mkdirs(URI path) throws IOException {
+    LOG.trace("mkdirs(path: {})", path);
+    checkNotNull(path, "path should not be null");
+
+    /* allowEmptyObjectName= */
+    StorageResourceId resourceId =
+        StorageResourceId.fromUriPath(path, /* allowEmptyObjectName= */ true);
+    if (resourceId.isRoot()) {
+      // GCS_ROOT directory always exists, no need to go through the rest of 
the method.
+      return;
+    }
+
+    // In case path is a bucket we just attempt to create it without 
additional checks
+    if (resourceId.isBucket()) {
+      try {
+        gcs.createBucket(resourceId.getBucketName(), 
CreateBucketOptions.DEFAULT);
+      } catch (FileAlreadyExistsException e) {
+        // This means that bucket already exist, and we do not need to do 
anything.
+        LOG.trace("mkdirs: {} already exists, ignoring creation failure", 
resourceId, e);
+      }
+      return;
+    }
+
+    resourceId = resourceId.toDirectoryId();
+
+    // Before creating a leaf directory we need to check if there are no 
conflicting files
+    // with the same name as any subdirectory
+//    if (options.isEnsureNoConflictingItems()) {

Review Comment:
   Yes, this code will be added. Some of the contract tests are failing due to 
this. Converted to a "TODO" comment.





> Add native support for GCS connector
> ------------------------------------
>
>                 Key: HADOOP-19343
>                 URL: https://issues.apache.org/jira/browse/HADOOP-19343
>             Project: Hadoop Common
>          Issue Type: Improvement
>          Components: fs
>    Affects Versions: 3.5.0
>            Reporter: Abhishek Modi
>            Assignee: Arunkumar Chacko
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: GCS connector for Hadoop.pdf, Google Cloud Storage 
> connector for Hadoop-1.pdf, Google Cloud Storage connector for Hadoop.pdf, 
> Google Cloud Storage connector for Hadoop.v1.pdf, Google Cloud Storage 
> connector for Hadoop_v1.pdf
>
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to