[ https://issues.apache.org/jira/browse/HADOOP-19343?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17956490#comment-17956490 ]
ASF GitHub Bot commented on HADOOP-19343: ----------------------------------------- arunkumarchacko commented on code in PR #7721: URL: https://github.com/apache/hadoop/pull/7721#discussion_r2131402326 ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/ApiErrorExtractor.java: ########## @@ -0,0 +1,330 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import com.google.api.client.googleapis.json.GoogleJsonError; +import com.google.api.client.googleapis.json.GoogleJsonError.ErrorInfo; +import com.google.api.client.googleapis.json.GoogleJsonResponseException; +import com.google.api.client.http.HttpResponseException; +import com.google.api.client.http.HttpStatusCodes; +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; +import org.apache.hadoop.thirdparty.com.google.common.collect.Iterables; +import java.io.IOException; +import java.util.List; +import javax.annotation.Nullable; + +/** + * Translates exceptions from API calls into higher-level meaning, while allowing injectability for + * testing how API errors are handled. + */ +class ApiErrorExtractor { + + /** Singleton instance of the ApiErrorExtractor. */ + public static final ApiErrorExtractor INSTANCE = new ApiErrorExtractor(); + + public static final int STATUS_CODE_RANGE_NOT_SATISFIABLE = 416; + + public static final String GLOBAL_DOMAIN = "global"; + public static final String USAGE_LIMITS_DOMAIN = "usageLimits"; + + public static final String RATE_LIMITED_REASON = "rateLimitExceeded"; + public static final String USER_RATE_LIMITED_REASON = "userRateLimitExceeded"; + + public static final String QUOTA_EXCEEDED_REASON = "quotaExceeded"; + + // These come with "The account for ... has been disabled" message. + public static final String ACCOUNT_DISABLED_REASON = "accountDisabled"; + + // These come with "Project marked for deletion" message. + public static final String ACCESS_NOT_CONFIGURED_REASON = "accessNotConfigured"; + + // These are 400 error codes with "resource 'xyz' is not ready" message. + // These sometimes happens when create operation is still in-flight but resource + // representation is already available via get call. + // Only explanation I could find for this is described here: + // java/com/google/cloud/cluster/data/cognac/cognac.proto Review Comment: Removed. ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java: ########## @@ -86,4 +122,256 @@ void close() { gcs = null; } } + + public FileInfo getFileInfo(URI path) throws IOException { + checkArgument(path != null, "path must not be null"); + // Validate the given path. true == allow empty object name. + // One should be able to get info about top level directory (== bucket), + // therefore we allow object name to be empty. + StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true); + FileInfo fileInfo = + FileInfo.fromItemInfo( + getFileInfoInternal(resourceId, /* inferImplicitDirectories= */ true)); + LOG.trace("getFileInfo(path: {}): {}", path, fileInfo); + return fileInfo; + } + + private GoogleCloudStorageItemInfo getFileInfoInternal( + StorageResourceId resourceId, + boolean inferImplicitDirectories) + throws IOException { + if (resourceId.isRoot() || resourceId.isBucket()) { + return gcs.getItemInfo(resourceId); + } + + StorageResourceId dirId = resourceId.toDirectoryId(); + if (!resourceId.isDirectory()) { + GoogleCloudStorageItemInfo itemInfo = gcs.getItemInfo(resourceId); + if (itemInfo.exists()) { + return itemInfo; + } + + if (inferImplicitDirectories) { + // TODO: Set max result + List<GoogleCloudStorageItemInfo> listDirResult = gcs.listObjectInfo( + resourceId.getBucketName(), + resourceId.getObjectName(), + GET_FILE_INFO_LIST_OPTIONS); + LOG.info("List for getMetadat returned {}. {}", listDirResult.size(), listDirResult); Review Comment: Done ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateBucketOptions.java: ########## @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.time.Duration; + +final class CreateBucketOptions { + static final CreateBucketOptions DEFAULT = new Builder().build(); // TODO: Make sure the defaults + private final String location; + private final String storageClass; + private final Duration ttl; + private final String projectId; + + private CreateBucketOptions(Builder builder) { + this.location = builder.location; + this.storageClass = builder.storageClass; + this.ttl = builder.ttl; + this.projectId = builder.projectId; + } + + public String getLocation() { + return location; + } + + public String getStorageClass() { + return storageClass; + } + + public Duration getTtl() { // Changed return type to Duration Review Comment: Done ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleHadoopFileSystem.java: ########## @@ -402,18 +465,29 @@ public Path getWorkingDirectory() { } @Override - public boolean mkdirs(final Path path, final FsPermission fsPermission) throws IOException { - LOG.trace("mkdirs({})", path); - throw new UnsupportedOperationException(path.toString()); - } + public boolean mkdirs(final Path hadoopPath, final FsPermission permission) throws IOException { + checkArgument(hadoopPath != null, "hadoopPath must not be null"); -// /** -// * Gets the default replication factor. -// */ -// @Override -// public short getDefaultReplication() { -// return REPLICATION_FACTOR_DEFAULT; -// } + checkOpen(); + + URI gcsPath = getGcsPath(hadoopPath); + try { + getGcsFs().mkdirs(gcsPath); + } catch (java.nio.file.FileAlreadyExistsException faee) { + // Need to convert to the Hadoop flavor of FileAlreadyExistsException. + throw (FileAlreadyExistsException) Review Comment: without the typecast, the code wont compile and fails with error. "unreported exception java.lang.Throwable; must be caught or declared to be thrown" ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/CreateBucketOptions.java: ########## @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs; + +import java.time.Duration; + +final class CreateBucketOptions { + static final CreateBucketOptions DEFAULT = new Builder().build(); // TODO: Make sure the defaults Review Comment: Added more details. The TODO comments will be addressed very soon. ########## hadoop-tools/hadoop-gcp/src/test/java/org/apache/hadoop/fs/gs/contract/GoogleContract.java: ########## @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.gs.contract; + +import org.apache.hadoop.fs.gs.TestConfiguration; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractBondedFSContract; + +/** Contract of GoogleHadoopFileSystem via scheme "gs". */ +public class GoogleContract extends AbstractBondedFSContract { + private static final String CONTRACT_XML = "contract/gs.xml"; + + public GoogleContract(Configuration conf) { + super(conf); + addConfResource(CONTRACT_XML); + conf.set("fs.contract.test.fs.gs", "gs://arunchacko-oss-test-bucket"); // TODO: Review Comment: Sure. will take this up next. BTW, to run the contract test now, change the bucket name and run from a GCP VM after doing "auth login". Also need to have auth-keys.xml in the test resource path. ########## hadoop-tools/hadoop-gcp/src/main/java/org/apache/hadoop/fs/gs/GoogleCloudStorageFileSystem.java: ########## @@ -86,4 +122,256 @@ void close() { gcs = null; } } + + public FileInfo getFileInfo(URI path) throws IOException { + checkArgument(path != null, "path must not be null"); + // Validate the given path. true == allow empty object name. + // One should be able to get info about top level directory (== bucket), + // therefore we allow object name to be empty. + StorageResourceId resourceId = StorageResourceId.fromUriPath(path, true); + FileInfo fileInfo = + FileInfo.fromItemInfo( + getFileInfoInternal(resourceId, /* inferImplicitDirectories= */ true)); + LOG.trace("getFileInfo(path: {}): {}", path, fileInfo); + return fileInfo; + } + + private GoogleCloudStorageItemInfo getFileInfoInternal( + StorageResourceId resourceId, + boolean inferImplicitDirectories) + throws IOException { + if (resourceId.isRoot() || resourceId.isBucket()) { + return gcs.getItemInfo(resourceId); + } + + StorageResourceId dirId = resourceId.toDirectoryId(); + if (!resourceId.isDirectory()) { + GoogleCloudStorageItemInfo itemInfo = gcs.getItemInfo(resourceId); + if (itemInfo.exists()) { + return itemInfo; + } + + if (inferImplicitDirectories) { + // TODO: Set max result + List<GoogleCloudStorageItemInfo> listDirResult = gcs.listObjectInfo( + resourceId.getBucketName(), + resourceId.getObjectName(), + GET_FILE_INFO_LIST_OPTIONS); + LOG.info("List for getMetadat returned {}. {}", listDirResult.size(), listDirResult); + if (!listDirResult.isEmpty()) { + LOG.info("Get metadata for directory returned non empty{}", listDirResult); + return GoogleCloudStorageItemInfo.createInferredDirectory(resourceId.toDirectoryId()); + } + } + } + + List<GoogleCloudStorageItemInfo> listDirInfo = ImmutableList.of(gcs.getItemInfo(dirId)); + if (listDirInfo.isEmpty()) { + return GoogleCloudStorageItemInfo.createNotFound(resourceId); + } + checkState(listDirInfo.size() <= 2, "listed more than 2 objects: '%s'", listDirInfo); + GoogleCloudStorageItemInfo dirInfo = Iterables.get(listDirInfo, /* position= */ 0); + checkState( + dirInfo.getResourceId().equals(dirId) || !inferImplicitDirectories, + "listed wrong object '%s', but should be '%s'", + dirInfo.getResourceId(), + resourceId); + return dirInfo.getResourceId().equals(dirId) && dirInfo.exists() + ? dirInfo + : GoogleCloudStorageItemInfo.createNotFound(resourceId); + } + + public void mkdirs(URI path) throws IOException { + LOG.trace("mkdirs(path: {})", path); + checkNotNull(path, "path should not be null"); + + /* allowEmptyObjectName= */ + StorageResourceId resourceId = + StorageResourceId.fromUriPath(path, /* allowEmptyObjectName= */ true); + if (resourceId.isRoot()) { + // GCS_ROOT directory always exists, no need to go through the rest of the method. + return; + } + + // In case path is a bucket we just attempt to create it without additional checks + if (resourceId.isBucket()) { + try { + gcs.createBucket(resourceId.getBucketName(), CreateBucketOptions.DEFAULT); + } catch (FileAlreadyExistsException e) { + // This means that bucket already exist, and we do not need to do anything. + LOG.trace("mkdirs: {} already exists, ignoring creation failure", resourceId, e); + } + return; + } + + resourceId = resourceId.toDirectoryId(); + + // Before creating a leaf directory we need to check if there are no conflicting files + // with the same name as any subdirectory +// if (options.isEnsureNoConflictingItems()) { Review Comment: Yes, this code will be added. Some of the contract tests are failing due to this. Converted to a "TODO" comment. > Add native support for GCS connector > ------------------------------------ > > Key: HADOOP-19343 > URL: https://issues.apache.org/jira/browse/HADOOP-19343 > Project: Hadoop Common > Issue Type: Improvement > Components: fs > Affects Versions: 3.5.0 > Reporter: Abhishek Modi > Assignee: Arunkumar Chacko > Priority: Major > Labels: pull-request-available > Attachments: GCS connector for Hadoop.pdf, Google Cloud Storage > connector for Hadoop-1.pdf, Google Cloud Storage connector for Hadoop.pdf, > Google Cloud Storage connector for Hadoop.v1.pdf, Google Cloud Storage > connector for Hadoop_v1.pdf > > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org