(gravitino) branch main updated: [#4132] feat(bundled-catalog): remove all propertiesMeta from bundled catalog (#4178)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new b75d58da7 [#4132] feat(bundled-catalog): remove all propertiesMeta from bundled catalog (#4178) b75d58da7 is described below commit b75d58da76cfd62e4524645b7bff5fd5a736ad04 Author: FANNG AuthorDate: Tue Jul 23 14:54:34 2024 +0800 [#4132] feat(bundled-catalog): remove all propertiesMeta from bundled catalog (#4178) ### What changes were proposed in this pull request? - remove all propertiesMeta from bundled catalog - rename `bundled-catalog` to `catalog-common` ### Why are the changes needed? Fix: #4132 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests --- catalogs/bundled-catalog/build.gradle.kts | 100 --- .../gravitino/catalog/common/ClassProvider.java| 85 - .../build.gradle.kts} | 25 +--- .../gravitino/catalog/hive/HiveConstants.java | 48 .../catalog/hive/HiveStorageConstants.java | 63 ++ .../gravitino/catalog/hive/StorageFormat.java | 83 + .../apache/gravitino/catalog/hive/TableType.java} | 27 ++--- .../gravitino/catalog/jdbc/JdbcConstants.java} | 21 +--- .../lakehouse/iceberg/IcebergConstants.java| 0 .../gravitino/catalog/mysql/MysqlConstants.java} | 26 +--- .../catalog/property/PropertyConverter.java| 0 catalogs/catalog-hive/build.gradle.kts | 1 + .../catalog/hive/HiveCatalogOperations.java| 1 - .../catalog/hive/HiveCatalogPropertiesMeta.java| 18 +-- .../catalog/hive/HiveSchemaPropertiesMetadata.java | 2 +- .../apache/gravitino/catalog/hive/HiveTable.java | 9 +- .../catalog/hive/HiveTablePropertiesMetadata.java | 135 +++-- .../hive/integration/test/CatalogHiveIT.java | 22 ++-- catalogs/catalog-jdbc-common/build.gradle.kts | 1 + .../catalog/jdbc/JdbcTablePropertiesMetadata.java | 2 +- catalogs/catalog-jdbc-mysql/build.gradle.kts | 1 + .../mysql/MysqlTablePropertiesMetadata.java| 10 +- .../catalog-lakehouse-iceberg/build.gradle.kts | 2 +- flink-connector/build.gradle.kts | 2 +- .../connector/hive/HivePropertiesConverter.java| 14 +-- .../hive/TestHivePropertiesConverter.java | 5 +- .../connector/integration/test/FlinkCommonIT.java | 4 +- .../integration/test/hive/FlinkHiveCatalogIT.java | 6 +- settings.gradle.kts| 2 +- spark-connector/spark-common/build.gradle.kts | 2 +- .../connector/hive/HivePropertiesConstants.java| 47 --- .../connector/hive/HivePropertiesConverter.java| 5 +- spark-connector/v3.3/spark/build.gradle.kts| 1 + spark-connector/v3.4/spark/build.gradle.kts| 1 + spark-connector/v3.5/spark/build.gradle.kts| 1 + trino-connector/build.gradle.kts | 2 +- .../catalog/hive/HiveSchemaPropertyConverter.java | 4 +- .../catalog/hive/HiveTablePropertyConverter.java | 26 ++-- .../jdbc/mysql/MySQLTablePropertyConverter.java| 7 +- .../hive/TestHiveCatalogPropertyConverter.java | 16 --- 40 files changed, 329 insertions(+), 498 deletions(-) diff --git a/catalogs/bundled-catalog/build.gradle.kts b/catalogs/bundled-catalog/build.gradle.kts deleted file mode 100644 index a9cc1141b..0 --- a/catalogs/bundled-catalog/build.gradle.kts +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar - -plugins { - id("java") - alias(libs.plugins.shadow) -} - -dependencies { - implementation(project(":catalogs:catalog-hive")) - implementation(project(":catalogs:catalog-jdbc-common")) - implementation(project(":catalogs:catalog-jdbc-mysql")) - implementation(project(":catalogs:catalo
(gravitino) branch main updated: [#4143] improvment(core): Optimize the privileges of access control (#4214)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new cf3293523 [#4143] improvment(core): Optimize the privileges of access control (#4214) cf3293523 is described below commit cf329352398d19fd1c1645522d9bfd909b9ccffc Author: roryqi AuthorDate: Tue Jul 23 14:40:09 2024 +0800 [#4143] improvment(core): Optimize the privileges of access control (#4214) ### What changes were proposed in this pull request? Optimize the privileges of access control ### Why are the changes needed? Fix: #4143 ### Does this PR introduce _any_ user-facing change? No need. ### How was this patch tested? Exsiting tests. --- .../apache/gravitino/authorization/Privilege.java | 28 +-- .../apache/gravitino/authorization/Privileges.java | 249 - 2 files changed, 46 insertions(+), 231 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java index 3527ead1b..8ec9bb6a2 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java @@ -67,28 +67,14 @@ public interface Privilege { PRODUCE_TOPIC(0L, 1L << 12), /** The privilege to consume from a topic. */ CONSUME_TOPIC(0L, 1L << 13), -/** The privilege to add a user */ -ADD_USER(0L, 1L << 14), -/** The privilege to remove a user */ -REMOVE_USER(0L, 1L << 15), -/** The privilege to get a user */ -GET_USER(0L, 1L << 16), -/** The privilege to add a group */ -ADD_GROUP(0L, 1L << 17), -/** The privilege to remove a group */ -REMOVE_GROUP(0L, 1L << 18), -/** The privilege to get a group */ -GET_GROUP(0L, 1L << 19), +/** The privilege to create a user */ +CREATE_USER(0L, 1L << 14), +/** The privilege to create a group */ +CREATE_GROUP(0L, 1L << 15), /** The privilege to create a role */ -CREATE_ROLE(0L, 1L << 20), -/** The privilege to delete a role */ -DELETE_ROLE(0L, 1L << 21), -/** The privilege to grant a role to the user or the group. */ -GRANT_ROLE(0L, 1L << 22), -/** The privilege to revoke a role from the user or the group. */ -REVOKE_ROLE(0L, 1L << 23), -/** The privilege to get a role */ -GET_ROLE(0L, 1L << 24); +CREATE_ROLE(0L, 1L << 16), +/** The privilege to grant or revoke a role for the user or the group. */ +MANAGE_GRANTS(0L, 1L << 17); private final long highBits; private final long lowBits; diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java index 07a745760..6947ced25 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java @@ -79,32 +79,18 @@ public class Privileges { return ConsumeTopic.allow(); // User - case ADD_USER: -return AddUser.allow(); - case REMOVE_USER: -return RemoveUser.allow(); - case GET_USER: -return GetUser.allow(); + case CREATE_USER: +return CreateUser.allow(); // Group - case ADD_GROUP: -return AddGroup.allow(); - case REMOVE_GROUP: -return RemoveGroup.allow(); - case GET_GROUP: -return GetGroup.allow(); + case CREATE_GROUP: +return CreateGroup.allow(); // Role case CREATE_ROLE: return CreateRole.allow(); - case DELETE_ROLE: -return DeleteRole.allow(); - case GRANT_ROLE: -return GrantRole.allow(); - case REVOKE_ROLE: -return RevokeRole.allow(); - case GET_ROLE: -return GetRole.allow(); + case MANAGE_GRANTS: +return ManageGrants.allow(); default: throw new IllegalArgumentException("Doesn't support the privilege: " + name); @@ -167,32 +153,18 @@ public class Privileges { return ConsumeTopic.deny(); // User - case ADD_USER: -return AddUser.deny(); - case REMOVE_USER: -return RemoveUser.deny(); - case GET_USER: -return GetUser.deny(); + case CREATE_USER: +return CreateUser.deny(); // Group - case ADD_GROUP: -return AddGroup.deny(); - case REMOVE_GROUP: -return RemoveGroup.deny(); - case GET_GROUP: -return GetGroup.deny(); + case CREATE_GROUP: +return CreateGroup.deny(); // Role case CREATE_ROLE: return CreateRole.deny(
(gravitino) branch main updated: [#3914] feat(server): Add REST server interface for Tag System (#3943)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 71e6651e2 [#3914] feat(server): Add REST server interface for Tag System (#3943) 71e6651e2 is described below commit 71e6651e2bc7359a9c9b694fa7275871e4e11969 Author: Jerry Shao AuthorDate: Mon Jul 22 20:48:52 2024 +0800 [#3914] feat(server): Add REST server interface for Tag System (#3943) ### What changes were proposed in this pull request? This PR proposes to add REST server interface for Tag System ### Why are the changes needed? This is a part of work for Tag system. Fix: #3914 ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? UTs added. - Co-authored-by: bknbkn <67318028+bkn...@users.noreply.github.com> Co-authored-by: Dev Parikh <51128342+dev79...@users.noreply.github.com> Co-authored-by: roryqi Co-authored-by: JinsYin Co-authored-by: rqyin --- .../java/org/apache/gravitino/MetadataObjects.java | 41 + .../gravitino/client/ObjectMapperProvider.java |4 +- .../gravitino/dto/requests/TagCreateRequest.java | 76 ++ .../gravitino/dto/requests/TagUpdateRequest.java | 201 .../gravitino/dto/requests/TagUpdatesRequest.java | 57 + .../dto/requests/TagsAssociateRequest.java | 82 ++ .../dto/responses/MetadataObjectListResponse.java | 73 ++ .../gravitino/dto/responses/NameListResponse.java | 65 ++ .../gravitino/dto/responses/TagListResponse.java | 65 ++ .../gravitino/dto/responses/TagResponse.java | 62 ++ .../gravitino/dto/tag/MetadataObjectDTO.java | 124 +++ .../java/org/apache/gravitino/dto/tag/TagDTO.java | 147 +++ .../apache/gravitino/dto/util/DTOConverters.java | 38 + .../java/org/apache/gravitino/json/JsonUtils.java |7 +- .../dto/requests/TestTagCreateRequest.java | 49 + .../dto/requests/TestTagUpdatesRequest.java| 91 ++ .../gravitino/dto/responses/TestResponses.java | 57 + .../gravitino/dto/tag/TestMetadataObjectDTO.java | 150 +++ .../org/apache/gravitino/dto/tag/TestTagDTO.java | 103 ++ .../gravitino/tag/SupportsTagOperations.java |1 - .../java/org/apache/gravitino/tag/TagManager.java |7 +- .../apache/gravitino/server/GravitinoServer.java |3 +- .../gravitino/server/web/ObjectMapperProvider.java |4 +- .../server/web/rest/ExceptionHandlers.java | 42 + .../gravitino/server/web/rest/OperationType.java |3 +- .../gravitino/server/web/rest/TagOperations.java | 451 .../server/web/rest/TestTagOperations.java | 1099 27 files changed, 3093 insertions(+), 9 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/MetadataObjects.java b/api/src/main/java/org/apache/gravitino/MetadataObjects.java index 5136164c9..6bd72137e 100644 --- a/api/src/main/java/org/apache/gravitino/MetadataObjects.java +++ b/api/src/main/java/org/apache/gravitino/MetadataObjects.java @@ -22,6 +22,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import java.util.List; +import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; /** The helper class for {@link MetadataObject}. */ @@ -94,6 +95,46 @@ public class MetadataObjects { return new MetadataObjectImpl(getParentFullName(names), getLastName(names), type); } + /** + * Get the parent metadata object of the given metadata object. + * + * @param object The metadata object + * @return The parent metadata object if it exists, otherwise null + */ + @Nullable + public static MetadataObject parent(MetadataObject object) { +if (object == null) { + return null; +} + +// Return null if the object is the root object +if (object.type() == MetadataObject.Type.METALAKE +|| object.type() == MetadataObject.Type.CATALOG) { + return null; +} + +MetadataObject.Type parentType; +switch (object.type()) { + case COLUMN: +parentType = MetadataObject.Type.TABLE; +break; + case TABLE: + case FILESET: + case TOPIC: +parentType = MetadataObject.Type.SCHEMA; +break; + case SCHEMA: +parentType = MetadataObject.Type.CATALOG; +break; + + default: +throw new IllegalArgumentException( +"Unexpected to reach here for metadata object type: " + object.type()); +} + +return parse(object.parent(), parentType); + } + /** * Parse the metadata object with the given full name and type. * diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/ObjectMapperProvider.java b/c
(gravitino) branch main updated: [#4197] improvement(common): CatalogListResponse should implement the method validate (#4223)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 1e8511ae6 [#4197] improvement(common): CatalogListResponse should implement the method validate (#4223) 1e8511ae6 is described below commit 1e8511ae63df75545c4804d94eda2120ed8cfffc Author: jingjia88 <32607481+jingji...@users.noreply.github.com> AuthorDate: Mon Jul 22 18:20:48 2024 +0800 [#4197] improvement(common): CatalogListResponse should implement the method validate (#4223) ### What changes were proposed in this pull request? Add method `validate` in CatalogListResponse ### Why are the changes needed? Fix: #4197 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add the new ut. --- .../dto/responses/CatalogListResponse.java | 26 ++ .../gravitino/dto/responses/TestResponses.java | 22 ++ 2 files changed, 48 insertions(+) diff --git a/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java b/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java index dfb31417a..6d069ae46 100644 --- a/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java +++ b/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java @@ -19,9 +19,12 @@ package org.apache.gravitino.dto.responses; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import java.util.Arrays; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.dto.CatalogDTO; /** Represents a response for a list of catalogs with their information. */ @@ -51,4 +54,27 @@ public class CatalogListResponse extends BaseResponse { super(); this.catalogs = null; } + + /** + * Validates the response data. + * + * @throws IllegalArgumentException if name, type or audit information is not set. + */ + @Override + public void validate() throws IllegalArgumentException { +super.validate(); + +Preconditions.checkArgument(catalogs != null, "catalogs must be non-null"); +Arrays.stream(catalogs) +.forEach( +catalog -> { + Preconditions.checkArgument( + StringUtils.isNotBlank(catalog.name()), + "catalog 'name' must not be null and empty"); + Preconditions.checkArgument( + catalog.type() != null, "catalog 'type' must not be null"); + Preconditions.checkArgument( + catalog.auditInfo() != null, "catalog 'audit' must not be null"); +}); + } } diff --git a/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java b/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java index 8d7c52808..7f9ebfeb7 100644 --- a/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java +++ b/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java @@ -146,6 +146,28 @@ public class TestResponses { assertThrows(IllegalArgumentException.class, () -> catalog.validate()); } + @Test + void testCatalogListResponse() throws IllegalArgumentException { +AuditDTO audit = + AuditDTO.builder().withCreator("creator").withCreateTime(Instant.now()).build(); +CatalogDTO catalog = +CatalogDTO.builder() +.withName("CatalogA") +.withComment("comment") +.withType(Catalog.Type.RELATIONAL) +.withProvider("test") +.withAudit(audit) +.build(); +CatalogListResponse response = new CatalogListResponse(new CatalogDTO[] {catalog}); +response.validate(); // No exception thrown + } + + @Test + void testCatalogListException() throws IllegalArgumentException { +CatalogListResponse response = new CatalogListResponse(); +assertThrows(IllegalArgumentException.class, () -> response.validate()); + } + @Test void testSchemaResponse() throws IllegalArgumentException { AuditDTO audit =
(gravitino) branch main updated: [#4140] improvement(core): Optimize the privileges of securable objects (#4141)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new f3abe9d6f [#4140] improvement(core): Optimize the privileges of securable objects (#4141) f3abe9d6f is described below commit f3abe9d6ff1b418515f6d68758003fa0af38b5e0 Author: roryqi AuthorDate: Fri Jul 19 18:20:38 2024 +0800 [#4140] improvement(core): Optimize the privileges of securable objects (#4141) ### What changes were proposed in this pull request? Optimize the privileges of securable objects ### Why are the changes needed? Fix: #4140 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. --- .../apache/gravitino/authorization/Privilege.java | 68 ++--- .../apache/gravitino/authorization/Privileges.java | 278 - .../authorization/TestSecurableObjects.java| 14 +- .../gravitino/proto/TestEntityProtoSerDe.java | 2 +- .../relational/service/TestRoleMetaService.java| 2 +- .../relational/service/TestSecurableObjects.java | 6 +- .../server/web/rest/TestRoleOperations.java| 14 +- 7 files changed, 100 insertions(+), 284 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java index 5cb7b3214..3527ead1b 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java @@ -43,66 +43,52 @@ public interface Privilege { enum Name { /** The privilege to create a catalog. */ CREATE_CATALOG(0L, 1L), -/** The privilege to drop a catalog. */ -DROP_CATALOG(0L, 1L << 1), -/** The privilege to alter a catalog. */ -ALTER_CATALOG(0L, 1L << 2), /** The privilege to use a catalog. */ -USE_CATALOG(0L, 1L << 3), +USE_CATALOG(0L, 1L << 2), /** The privilege to create a schema. */ -CREATE_SCHEMA(0L, 1L << 4), -/** The privilege to drop a schema. */ -DROP_SCHEMA(0L, 1L << 5), -/** The privilege to alter a schema. */ -ALTER_SCHEMA(0L, 1L << 6), +CREATE_SCHEMA(0L, 1L << 3), /** the privilege to use a schema. */ -USE_SCHEMA(0L, 1L << 7), +USE_SCHEMA(0L, 1L << 4), /** The privilege to create a table. */ -CREATE_TABLE(0L, 1L << 8), -/** The privilege to drop a table. */ -DROP_TABLE(0L, 1L << 9), -/** The privilege to write a table. */ -WRITE_TABLE(0L, 1L << 10), -/** The privilege to read a table. */ -READ_TABLE(0L, 1L << 11), +CREATE_TABLE(0L, 1L << 5), +/** The privilege to execute SQL `ALTER`, `INSERT`, `UPDATE`, or `DELETE` for a table. */ +MODIFY_TABLE(0L, 1L << 6), +/** The privilege to select data from a table. */ +SELECT_TABLE(0L, 1L << 7), /** The privilege to create a fileset. */ -CREATE_FILESET(0L, 1L << 12), -/** The privilege to drop a fileset. */ -DROP_FILESET(0L, 1L << 13), +CREATE_FILESET(0L, 1L << 8), /** The privilege to write a fileset. */ -WRITE_FILESET(0L, 1L << 14), +WRITE_FILESET(0L, 1L << 9), /** The privilege to read a fileset. */ -READ_FILESET(0L, 1L << 15), +READ_FILESET(0L, 1L << 10), /** The privilege to create a topic. */ -CREATE_TOPIC(0L, 1L << 16), -/** The privilege to drop a topic. */ -DROP_TOPIC(0L, 1L << 17), -/** The privilege to write a topic. */ -WRITE_TOPIC(0L, 1L << 18), -/** The privilege to read a topic. */ -READ_TOPIC(0L, 1L << 19), +CREATE_TOPIC(0L, 1L << 11), +/** The privilege to produce to a topic. */ +PRODUCE_TOPIC(0L, 1L << 12), +/** The privilege to consume from a topic. */ +CONSUME_TOPIC(0L, 1L << 13), /** The privilege to add a user */ -ADD_USER(0L, 1L << 20), +ADD_USER(0L, 1L << 14), /** The privilege to remove a user */ -REMOVE_USER(0L, 1L << 21), +REMOVE_USER(0L, 1L << 15), /** The privilege to get a user */ -GET_USER(0L, 1L << 22), +GET_USER(0L, 1L << 16), /** The privilege to add a group */ -ADD_GROUP(0L, 1L << 23), +ADD_GROUP(0L, 1L << 17), /** The privilege to remove a group */ -REMOVE_GROUP(0L, 1L << 24), +REMOVE_GROUP(0L, 1L << 18), /** The privilege to get a group */ -GET_GROUP(0L, 1L << 25), +GET_GROUP(0L, 1L << 19), /** The privilege to create a role */ -CREATE_ROLE(0L, 1L << 26), +CREATE_ROLE(0L, 1L << 20), /** The privilege to delete a role */ -DELETE_ROLE(0
(gravitino) branch main updated: [#4195] improvement(core): Decouple `OperationDispatcher` from `NormalizeDispatcher` (#4196)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new d6721459d [#4195] improvement(core): Decouple `OperationDispatcher` from `NormalizeDispatcher` (#4196) d6721459d is described below commit d6721459d499d1237cfcb5b0527ed5252e2462ce Author: mchades AuthorDate: Fri Jul 19 18:06:44 2024 +0800 [#4195] improvement(core): Decouple `OperationDispatcher` from `NormalizeDispatcher` (#4196) ### What changes were proposed in this pull request? - Decouple `OperationDispatcher` from `NormalizeDispatcher` - move `getCatalogCapability` method from `OperationDispatcher` to `CapabilityHelpers` ### Why are the changes needed? Fix: #4195 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests --- .../java/org/apache/gravitino/GravitinoEnv.java| 11 ++--- .../gravitino/catalog/CapabilityHelpers.java | 23 ++ .../catalog/FilesetNormalizeDispatcher.java| 48 ++--- .../catalog/FilesetOperationDispatcher.java| 1 + .../gravitino/catalog/OperationDispatcher.java | 40 +- .../catalog/PartitionNormalizeDispatcher.java | 47 ++--- .../catalog/SchemaNormalizeDispatcher.java | 38 - .../catalog/SchemaOperationDispatcher.java | 1 + .../catalog/TableNormalizeDispatcher.java | 49 +++--- .../catalog/TableOperationDispatcher.java | 1 + .../catalog/TopicNormalizeDispatcher.java | 47 +++-- .../catalog/TopicOperationDispatcher.java | 1 + .../apache/gravitino/utils/NameIdentifierUtil.java | 32 ++ .../catalog/TestFilesetNormalizeDispatcher.java| 6 ++- .../gravitino/catalog/TestOperationDispatcher.java | 13 +++--- .../catalog/TestPartitionNormalizeDispatcher.java | 2 +- .../catalog/TestSchemaNormalizeDispatcher.java | 2 +- .../catalog/TestTableNormalizeDispatcher.java | 6 ++- .../catalog/TestTopicNormalizeDispatcher.java | 6 ++- 19 files changed, 232 insertions(+), 142 deletions(-) diff --git a/core/src/main/java/org/apache/gravitino/GravitinoEnv.java b/core/src/main/java/org/apache/gravitino/GravitinoEnv.java index b307cddbd..cf95dd7e7 100644 --- a/core/src/main/java/org/apache/gravitino/GravitinoEnv.java +++ b/core/src/main/java/org/apache/gravitino/GravitinoEnv.java @@ -154,30 +154,31 @@ public class GravitinoEnv { SchemaOperationDispatcher schemaOperationDispatcher = new SchemaOperationDispatcher(catalogManager, entityStore, idGenerator); SchemaNormalizeDispatcher schemaNormalizeDispatcher = -new SchemaNormalizeDispatcher(schemaOperationDispatcher); +new SchemaNormalizeDispatcher(schemaOperationDispatcher, catalogManager); this.schemaDispatcher = new SchemaEventDispatcher(eventBus, schemaNormalizeDispatcher); TableOperationDispatcher tableOperationDispatcher = new TableOperationDispatcher(catalogManager, entityStore, idGenerator); TableNormalizeDispatcher tableNormalizeDispatcher = -new TableNormalizeDispatcher(tableOperationDispatcher); +new TableNormalizeDispatcher(tableOperationDispatcher, catalogManager); this.tableDispatcher = new TableEventDispatcher(eventBus, tableNormalizeDispatcher); PartitionOperationDispatcher partitionOperationDispatcher = new PartitionOperationDispatcher(catalogManager, entityStore, idGenerator); // todo: support PartitionEventDispatcher -this.partitionDispatcher = new PartitionNormalizeDispatcher(partitionOperationDispatcher); +this.partitionDispatcher = +new PartitionNormalizeDispatcher(partitionOperationDispatcher, catalogManager); FilesetOperationDispatcher filesetOperationDispatcher = new FilesetOperationDispatcher(catalogManager, entityStore, idGenerator); FilesetNormalizeDispatcher filesetNormalizeDispatcher = -new FilesetNormalizeDispatcher(filesetOperationDispatcher); +new FilesetNormalizeDispatcher(filesetOperationDispatcher, catalogManager); this.filesetDispatcher = new FilesetEventDispatcher(eventBus, filesetNormalizeDispatcher); TopicOperationDispatcher topicOperationDispatcher = new TopicOperationDispatcher(catalogManager, entityStore, idGenerator); TopicNormalizeDispatcher topicNormalizeDispatcher = -new TopicNormalizeDispatcher(topicOperationDispatcher); +new TopicNormalizeDispatcher(topicOperationDispatcher, catalogManager); this.topicDispatcher = new TopicEventDispatcher(eventBus, topicNormalizeDispatcher); // Create and initialize access control related modules diff --git a/core/src/main/java/org/apache
(gravitino) branch main updated: [#3755] improvement(client-python): Support OAuth2TokenProvider for Python client (#4011)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 213bcc9f2 [#3755] improvement(client-python): Support OAuth2TokenProvider for Python client (#4011) 213bcc9f2 is described below commit 213bcc9f28102a3b472a8b2d9629525e9d00d269 Author: noidname01 <55401762+noidnam...@users.noreply.github.com> AuthorDate: Fri Jul 19 10:53:57 2024 +0800 [#3755] improvement(client-python): Support OAuth2TokenProvider for Python client (#4011) ### What changes were proposed in this pull request? * Add `OAuth2TokenProvider` and `DefaultOAuth2TokenProvider` in `client-python` * There are some components and tests missing because it would be a big code change if they were also done in this PR, they will be added in the following PRs - [ ] Error Handling: #4173 - [ ] Integration Test: #4208 * Modify test file structure, and found issue #4136, solve it by reset environment variable. ### Why are the changes needed? Fix: #3755, #4136 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add UT and tested by `./gradlew clients:client-python:unittest` - Co-authored-by: TimWang --- .../client-python/gravitino/auth/auth_constants.py | 2 + .../auth/default_oauth2_token_provider.py | 133 +++ .../gravitino/auth/oauth2_token_provider.py| 75 +++ .../gravitino/auth/simple_auth_provider.py | 4 +- .../requests/oauth2_client_credential_request.py} | 15 ++- .../dto/responses/oauth2_token_response.py | 55 .../client-python/gravitino/utils/http_client.py | 36 -- clients/client-python/requirements-dev.txt | 3 +- .../tests/integration/test_simple_auth_client.py | 2 + .../unittests/auth/__init__.py}| 6 - .../tests/unittests/auth/mock_base.py | 144 + .../unittests/auth/test_oauth2_token_provider.py | 93 + .../{ => auth}/test_simple_auth_provider.py| 4 + 13 files changed, 551 insertions(+), 21 deletions(-) diff --git a/clients/client-python/gravitino/auth/auth_constants.py b/clients/client-python/gravitino/auth/auth_constants.py index 2494030fc..247abcaaa 100644 --- a/clients/client-python/gravitino/auth/auth_constants.py +++ b/clients/client-python/gravitino/auth/auth_constants.py @@ -21,4 +21,6 @@ under the License. class AuthConstants: HTTP_HEADER_AUTHORIZATION: str = "Authorization" +AUTHORIZATION_BEARER_HEADER: str = "Bearer " + AUTHORIZATION_BASIC_HEADER: str = "Basic " diff --git a/clients/client-python/gravitino/auth/default_oauth2_token_provider.py b/clients/client-python/gravitino/auth/default_oauth2_token_provider.py new file mode 100644 index 0..3fb730395 --- /dev/null +++ b/clients/client-python/gravitino/auth/default_oauth2_token_provider.py @@ -0,0 +1,133 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +""" + +import time +import json +import base64 +from typing import Optional +from gravitino.auth.oauth2_token_provider import OAuth2TokenProvider +from gravitino.dto.responses.oauth2_token_response import OAuth2TokenResponse +from gravitino.dto.requests.oauth2_client_credential_request import ( +OAuth2ClientCredentialRequest, +) +from gravitino.exceptions.base import GravitinoRuntimeException + +CLIENT_CREDENTIALS = "client_credentials" +CREDENTIAL_SPLITTER = ":" +TOKEN_SPLITTER = "." +JWT_EXPIRE = "exp" + + +class DefaultOAuth2TokenProvider(OAuth2TokenProvider): +"""This class is the default implement of OAuth2TokenProvider.""" + +_credential: Optional[str] +_scope: Optional[str] +_path: Optional[str] +_token: Optional[str] + +def __init__( +self, +uri: str = None, +credential: str = None, +scope: str = None, +
(gravitino) branch main updated: [#3985] fix(hadooop-catalog): Create fileset catalog with empty location property success, but can't list schema of the catalog (#4177)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 9be7cc999 [#3985] fix(hadooop-catalog): Create fileset catalog with empty location property success, but can't list schema of the catalog (#4177) 9be7cc999 is described below commit 9be7cc999f97fe0962a53945f95bfbd46b7e90c0 Author: liuxian <39123327+dataxplorel...@users.noreply.github.com> AuthorDate: Thu Jul 18 15:03:50 2024 +0800 [#3985] fix(hadooop-catalog): Create fileset catalog with empty location property success, but can't list schema of the catalog (#4177) ### What changes were proposed in this pull request? Check if the catalogLocation is empty when initializing ### Why are the changes needed? Fix: #3985 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing pipeline. Before https://github.com/user-attachments/assets/42003bf4-f6a2-4729-98fc-bf139a811daf";> After https://github.com/user-attachments/assets/531dbc3b-7ad3-4949-8910-ea99bb31baa1";> --- .../catalog/hadoop/HadoopCatalogOperations.java | 5 - .../catalog/hadoop/TestHadoopCatalogOperations.java | 16 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java index b82eaa359..6b49c1310 100644 --- a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java +++ b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java @@ -135,7 +135,10 @@ public class HadoopCatalogOperations implements CatalogOperations, SupportsSchem conf.forEach(hadoopConf::set); initAuthentication(conf, hadoopConf); -this.catalogStorageLocation = Optional.ofNullable(catalogLocation).map(Path::new); +this.catalogStorageLocation = +StringUtils.isNotBlank(catalogLocation) +? Optional.of(catalogLocation).map(Path::new) +: Optional.empty(); } private void initAuthentication(Map conf, Configuration hadoopConf) { diff --git a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java index 284070f0b..3c8a4d463 100644 --- a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java +++ b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java @@ -230,6 +230,22 @@ public class TestHadoopCatalogOperations { Assertions.assertEquals("Schema m1.c1.schema11 already exists", exception.getMessage()); } + @Test + public void testCreateSchemaWithEmptyCatalogLocation() throws IOException { +String name = "schema28"; +String comment = "comment28"; +String catalogPath = ""; +Schema schema = createSchema(name, comment, catalogPath, null); +Assertions.assertEquals(name, schema.name()); +Assertions.assertEquals(comment, schema.comment()); + +Throwable exception = +Assertions.assertThrows( +SchemaAlreadyExistsException.class, +() -> createSchema(name, comment, catalogPath, null)); +Assertions.assertEquals("Schema m1.c1.schema28 already exists", exception.getMessage()); + } + @Test public void testCreateSchemaWithCatalogLocation() throws IOException { String name = "schema12";
(gravitino) branch main updated: [#4128] improvement(core): Remove privileges of metalakes (#4139)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new f8a472916 [#4128] improvement(core): Remove privileges of metalakes (#4139) f8a472916 is described below commit f8a472916f3d58640cbbd9bf8b9cbc284d960e0b Author: roryqi AuthorDate: Wed Jul 17 20:48:40 2024 +0800 [#4128] improvement(core): Remove privileges of metalakes (#4139) ### What changes were proposed in this pull request? Remove privileges of metalakes. We use the ownership instead of metalake privileges. ### Why are the changes needed? Fix: #4128 ### Does this PR introduce _any_ user-facing change? Modify APIs. But this feature isn't released yet. ### How was this patch tested? Existing UTs --- .../java/org/apache/gravitino/MetadataObjects.java | 13 +- .../apache/gravitino/authorization/Privilege.java | 28 +-- .../apache/gravitino/authorization/Privileges.java | 229 ++--- .../gravitino/authorization/SecurableObjects.java | 24 --- .../authorization/TestSecurableObjects.java| 24 --- .../src/main/java/org/apache/gravitino/Entity.java | 12 -- .../relational/service/MetadataObjectService.java | 11 - .../relational/service/RoleMetaService.java| 8 - .../relational/service/TestSecurableObjects.java | 9 +- .../gravitino/server/web/rest/RoleOperations.java | 5 - .../server/web/rest/TestRoleOperations.java| 21 -- 11 files changed, 71 insertions(+), 313 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/MetadataObjects.java b/api/src/main/java/org/apache/gravitino/MetadataObjects.java index 70f795fa0..5136164c9 100644 --- a/api/src/main/java/org/apache/gravitino/MetadataObjects.java +++ b/api/src/main/java/org/apache/gravitino/MetadataObjects.java @@ -27,11 +27,7 @@ import org.apache.commons.lang3.StringUtils; /** The helper class for {@link MetadataObject}. */ public class MetadataObjects { - /** - * The reserved name for the metadata object. - * - * It is used to represent the root metadata object of all metalakes. - */ + /** The reserved name for the metadata object. */ public static final String METADATA_OBJECT_RESERVED_NAME = "*"; private static final Splitter DOT_SPLITTER = Splitter.on('.'); @@ -106,13 +102,6 @@ public class MetadataObjects { * @return The parsed metadata object */ public static MetadataObject parse(String fullName, MetadataObject.Type type) { -if (METADATA_OBJECT_RESERVED_NAME.equals(fullName)) { - if (type != MetadataObject.Type.METALAKE) { -throw new IllegalArgumentException("If metadata object isn't metalake, it can't be `*`"); - } - return new MetadataObjectImpl(null, METADATA_OBJECT_RESERVED_NAME, type); -} - Preconditions.checkArgument( StringUtils.isNotBlank(fullName), "Metadata object full name cannot be blank"); diff --git a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java index 36229c8fc..5cb7b3214 100644 --- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java +++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java @@ -81,34 +81,28 @@ public interface Privilege { WRITE_TOPIC(0L, 1L << 18), /** The privilege to read a topic. */ READ_TOPIC(0L, 1L << 19), -/** The privilege to create a metalake. */ -CREATE_METALAKE(0L, 1L << 20), -/** The privilege to manage a metalake, including drop and alter a metalake. */ -MANAGE_METALAKE(0L, 1L << 21), -/** The privilege to use a metalake, the user can load the information of the metalake. */ -USE_METALAKE(0L, 1L << 22), /** The privilege to add a user */ -ADD_USER(0L, 1L << 23), +ADD_USER(0L, 1L << 20), /** The privilege to remove a user */ -REMOVE_USER(0L, 1L << 24), +REMOVE_USER(0L, 1L << 21), /** The privilege to get a user */ -GET_USER(0L, 1L << 25), +GET_USER(0L, 1L << 22), /** The privilege to add a group */ -ADD_GROUP(0L, 1L << 26), +ADD_GROUP(0L, 1L << 23), /** The privilege to remove a group */ -REMOVE_GROUP(0L, 1L << 27), +REMOVE_GROUP(0L, 1L << 24), /** The privilege to get a group */ -GET_GROUP(0L, 1L << 28), +GET_GROUP(0L, 1L << 25), /** The privilege to create a role */ -CREATE_ROLE(0L, 1L << 29), +CREATE_ROLE(0L, 1L << 26), /** The privilege to delete a role */ -DELETE_ROLE(0L, 1L << 30), +DELETE_ROLE(0L, 1L << 27), /** The privilege to grant a role to the user or the group. */ -GRANT_RO
(gravitino) branch main updated: [#4020] feat(core): Add Tag Manage core logic to support tag operations (part-2) (#4109)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new d85a9b482 [#4020] feat(core): Add Tag Manage core logic to support tag operations (part-2) (#4109) d85a9b482 is described below commit d85a9b4825f75e075daf74fe4109831dbe1bf373 Author: Jerry Shao AuthorDate: Tue Jul 16 20:53:04 2024 +0800 [#4020] feat(core): Add Tag Manage core logic to support tag operations (part-2) (#4109) ### What changes were proposed in this pull request? This PR add the second part tag core logic to support associate tags with metadata object, and query the associations between tags and metadata objects. ### Why are the changes needed? This is a part of work to support tag system. Fix: #4020 ### Does this PR introduce _any_ user-facing change? NO. ### How was this patch tested? Add UTs to cover the logic. --- .../exceptions/TagAlreadyAssociatedException.java | 49 +++ .../java/org/apache/gravitino/EntityStore.java | 11 + .../gravitino/storage/relational/JDBCBackend.java | 32 ++ .../storage/relational/RelationalBackend.java | 3 +- .../storage/relational/RelationalEntityStore.java | 41 ++- .../storage/relational/mapper/TagMetaMapper.java | 33 +- .../mapper/TagMetadataObjectRelMapper.java | 112 ++- .../relational/po/TagMetadataObjectRelPO.java | 130 .../gravitino/storage/relational/po/TagPO.java | 21 +- .../gravitino/storage/relational/po/TopicPO.java | 26 +- .../MetadataObjectService.java}| 14 +- .../relational/service/RoleMetaService.java| 6 +- .../storage/relational/service/TagMetaService.java | 198 +++ .../storage/relational/utils/POConverters.java | 26 ++ .../gravitino/tag/SupportsTagOperations.java | 96 ++ .../java/org/apache/gravitino/tag/TagManager.java | 210 +++- .../apache/gravitino/utils/MetadataObjectUtil.java | 97 ++ .../apache/gravitino/utils/NameIdentifierUtil.java | 55 .../relational/service/TestTagMetaService.java | 361 + .../storage/relational/utils/TestPOConverters.java | 17 + .../org/apache/gravitino/tag/TestTagManager.java | 355 +++- .../gravitino/utils/TestMetadataObjectUtil.java| 124 +++ .../gravitino/utils/TestNameIdentifierUtil.java| 66 scripts/h2/schema-h2.sql | 2 +- scripts/mysql/schema-0.6.0-mysql.sql | 2 +- scripts/mysql/upgrade-0.5.0-to-0.6.0-mysql.sql | 2 +- 26 files changed, 2024 insertions(+), 65 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java b/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java new file mode 100644 index 0..61cab11fb --- /dev/null +++ b/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.exceptions; + +import com.google.errorprone.annotations.FormatMethod; + +/** Exception thrown when a tag with specified name already associated to a metadata object. */ +public class TagAlreadyAssociatedException extends AlreadyExistsException { + + /** + * Constructs a new exception with the specified detail message. + * + * @param message the detail message. + * @param args the arguments to the message. + */ + @FormatMethod + public TagAlreadyAssociatedException(String message, Object... args) { +super(message, args); + } + + /** + * Constructs a new exception with the specified detail message and cause. + * + * @param cause the cause. + * @param message the detail message. + * @param args the arguments to the message. + */ + @FormatMethod + public TagAlreadyAssociatedException(Throwable cause, String message, Object... args) { +super(cause, message, args); + } +} diff --git a/core/src/
(gravitino) branch main updated: [#4107] feat(all): Add testConnection API for catalog (#4108)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 116e5ae15 [#4107] feat(all): Add testConnection API for catalog (#4108) 116e5ae15 is described below commit 116e5ae1591feca15b8eef20b3d77fcb3a99e8a8 Author: mchades AuthorDate: Tue Jul 16 18:54:29 2024 +0800 [#4107] feat(all): Add testConnection API for catalog (#4108) ### What changes were proposed in this pull request? Add testConnection API for catalog ### Why are the changes needed? Fix: #4107 ### Does this PR introduce _any_ user-facing change? yes, add a new API ### How was this patch tested? tests added --- .../org/apache/gravitino/SupportsCatalogs.java | 18 .../exceptions/ConnectionFailedException.java | 49 + build.gradle.kts | 4 +- .../catalog/hadoop/HadoopCatalogOperations.java| 21 .../hadoop/TestHadoopCatalogOperations.java| 14 +++ .../hadoop/integration/test/HadoopCatalogIT.java | 3 +- .../test/HadoopUserImpersonationIT.java| 3 +- .../catalog/hive/HiveCatalogOperations.java| 27 + .../catalog/hive/TestHiveCatalogOperations.java| 28 + .../catalog/jdbc/JdbcCatalogOperations.java| 20 .../jdbc/converter/JdbcExceptionConverter.java | 4 +- .../catalog/jdbc/TestJdbcCatalogOperations.java| 54 ++ .../jdbc/operation/SqliteDatabaseOperations.java | 16 ++- .../catalog/kafka/KafkaCatalogOperations.java | 23 +++- .../catalog/kafka/TestKafkaCatalogOperations.java | 15 +++ .../kafka/integration/test/CatalogKafkaIT.java | 28 - .../iceberg/IcebergCatalogOperations.java | 26 + .../iceberg/TestIcebergCatalogOperations.java | 45 .../lakehouse/paimon/PaimonCatalogOperations.java | 25 + .../lakehouse/paimon/TestPaimonCatalog.java| 13 +++ .../org/apache/gravitino/client/ErrorHandlers.java | 4 + .../apache/gravitino/client/GravitinoClient.java | 22 .../apache/gravitino/client/GravitinoMetalake.java | 43 .../gravitino/client/TestGravitinoClient.java | 65 .../gravitino/dto/responses/ErrorConstants.java| 3 + .../gravitino/dto/responses/ErrorResponse.java | 26 + .../org/apache/gravitino/StringIdentifier.java | 3 + .../apache/gravitino/catalog/CatalogManager.java | 116 + .../catalog/CatalogNormalizeDispatcher.java| 12 +++ .../apache/gravitino/catalog/SupportsCatalogs.java | 18 .../gravitino/connector/CatalogOperations.java | 20 .../gravitino/listener/CatalogEventDispatcher.java | 12 +++ .../gravitino/catalog/DummyCatalogOperations.java | 11 ++ .../gravitino/catalog/TestCatalogManager.java | 22 +++- .../gravitino/connector/TestCatalogOperations.java | 16 +++ docs/open-api/catalogs.yaml| 70 + docs/open-api/openapi.yaml | 3 + .../integration/test/client/CatalogIT.java | 20 +++- .../integration/test/client/MetalakeIT.java| 3 +- .../org/apache/gravitino/server/web/Utils.java | 14 +++ .../server/web/rest/CatalogOperations.java | 39 +++ .../server/web/rest/ExceptionHandlers.java | 31 ++ .../org/apache/gravitino/server/web/TestUtils.java | 10 ++ .../gravitino/server/web/rest/TestCatalog.java | 10 ++ .../server/web/rest/TestCatalogOperations.java | 55 -- 45 files changed, 1038 insertions(+), 46 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java b/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java index c3805edec..8644430bc 100644 --- a/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java +++ b/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java @@ -114,4 +114,22 @@ public interface SupportsCatalogs { * @return True if the catalog was dropped, false otherwise. */ boolean dropCatalog(String catalogName); + + /** + * Test whether the catalog with specified parameters can be connected to before creating it. + * + * @param catalogName the name of the catalog. + * @param type the type of the catalog. + * @param provider the provider of the catalog. + * @param comment the comment of the catalog. + * @param properties the properties of the catalog. + * @throws Exception if the test failed. + */ + void testConnection( + String catalogName, + Catalog.Type type, + String provider, + String comment, + Map properties) + throws Exception; } diff --git a/api/src/main/java/org/apache/gravitino/exceptions/ConnectionFailedException.java b/api/src/main/java/org/apache/gravitino/exceptions
(gravitino) branch main updated: [#4126] improvement(core): Remove MetalakeAdmin API (#4127)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 627e88ffb [#4126] improvement(core): Remove MetalakeAdmin API (#4127) 627e88ffb is described below commit 627e88ffb9cbb8f5efe1b6e8af3a69f8e051d3e0 Author: roryqi AuthorDate: Tue Jul 16 18:21:18 2024 +0800 [#4126] improvement(core): Remove MetalakeAdmin API (#4127) ### What changes were proposed in this pull request? Remove MetalakeAdmin API. ### Why are the changes needed? Fix: #4126 ### Does this PR introduce _any_ user-facing change? Remove API. But this API isn't released. ### How was this patch tested? Existing tests. --- .../gravitino/client/GravitinoAdminClient.java | 45 - .../apache/gravitino/client/TestMetalakeAdmin.java | 110 --- .../authorization/AccessControlManager.java| 91 ++--- .../gravitino/authorization/AdminManager.java | 122 .../authorization/TestAccessControlManager.java| 21 -- .../gravitino/server/web/rest/GroupOperations.java | 33 +++- .../server/web/rest/MetalakeAdminOperations.java | 98 -- .../server/web/rest/PermissionOperations.java | 76 ++-- .../gravitino/server/web/rest/RoleOperations.java | 37 ++-- .../gravitino/server/web/rest/UserOperations.java | 32 +++- .../web/rest/TestMetalakeAdminOperations.java | 213 - 11 files changed, 149 insertions(+), 729 deletions(-) diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java index 67d32289f..b70839733 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java @@ -67,7 +67,6 @@ public class GravitinoAdminClient extends GravitinoClientBase implements Support private static final String API_METALAKES_USERS_PATH = "api/metalakes/%s/users/%s"; private static final String API_METALAKES_GROUPS_PATH = "api/metalakes/%s/groups/%s"; private static final String API_METALAKES_ROLES_PATH = "api/metalakes/%s/roles/%s"; - private static final String API_ADMIN_PATH = "api/admins/%s"; private static final String API_PERMISSION_PATH = "api/metalakes/%s/permissions/%s"; private static final String BLANK_PLACE_HOLDER = ""; @@ -334,50 +333,6 @@ public class GravitinoAdminClient extends GravitinoClientBase implements Support return resp.getGroup(); } - /** - * Adds a new metalake admin. - * - * @param user The name of the User. - * @return The added User instance. - * @throws UserAlreadyExistsException If a metalake admin with the same name already exists. - * @throws RuntimeException If adding the User encounters storage issues. - */ - public User addMetalakeAdmin(String user) throws UserAlreadyExistsException { -UserAddRequest req = new UserAddRequest(user); -req.validate(); - -UserResponse resp = -restClient.post( -String.format(API_ADMIN_PATH, BLANK_PLACE_HOLDER), -req, -UserResponse.class, -Collections.emptyMap(), -ErrorHandlers.userErrorHandler()); -resp.validate(); - -return resp.getUser(); - } - - /** - * Removes a metalake admin. - * - * @param user The name of the User. - * @return True if the User was successfully removed, false only when there's no such metalake - * admin, otherwise it will throw an exception. - * @throws RuntimeException If removing the User encounters storage issues. - */ - public boolean removeMetalakeAdmin(String user) { -RemoveResponse resp = -restClient.delete( -String.format(API_ADMIN_PATH, user), -RemoveResponse.class, -Collections.emptyMap(), -ErrorHandlers.userErrorHandler()); -resp.validate(); - -return resp.removed(); - } - /** * Gets a Role. * diff --git a/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java b/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java deleted file mode 100644 index 6dd10965d..0 --- a/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "Lic
(gravitino) branch main updated: [#4165] improvement(Filesystem): Improve the potential storage replication issues in Hadoop GVFS (#4166)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new b2a930225 [#4165] improvement(Filesystem): Improve the potential storage replication issues in Hadoop GVFS (#4166) b2a930225 is described below commit b2a93022553a258a8f599f2e5a6e0bbdc3e5ace7 Author: xloya <982052...@qq.com> AuthorDate: Tue Jul 16 17:03:26 2024 +0800 [#4165] improvement(Filesystem): Improve the potential storage replication issues in Hadoop GVFS (#4166) ### What changes were proposed in this pull request? Currently, Hadoop GVFS does not implement the `getDefaultBlockSize(Path f)` and `getBlockSize(Path f)` methods, which will result in the use of the FileSystem default values, causing the storage replications and block sizes to not meet expectations. ### Why are the changes needed? Fix: #4165 ### How was this patch tested? Add UTs and ITs. - Co-authored-by: xiaojiebao --- .../hadoop/GravitinoVirtualFileSystem.java | 12 +++ .../gravitino/filesystem/hadoop/TestGvfsBase.java | 16 + .../hadoop/GravitinoVirtualFileSystemIT.java | 42 ++ 3 files changed, 70 insertions(+) diff --git a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java index ef51c1753..bbcf0c71e 100644 --- a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java +++ b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java @@ -515,6 +515,18 @@ public class GravitinoVirtualFileSystem extends FileSystem { return context.getFileSystem().mkdirs(context.getActualPath(), permission); } + @Override + public short getDefaultReplication(Path f) { +FilesetContext context = getFilesetContext(f); +return context.getFileSystem().getDefaultReplication(context.getActualPath()); + } + + @Override + public long getDefaultBlockSize(Path f) { +FilesetContext context = getFilesetContext(f); +return context.getFileSystem().getDefaultBlockSize(context.getActualPath()); + } + @Override public synchronized void close() throws IOException { // close all actual FileSystems diff --git a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java index ce87d8d02..13b365a25 100644 --- a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java +++ b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java @@ -591,4 +591,20 @@ public class TestGvfsBase extends GravitinoMockServerBase { () -> fs.extractIdentifier(new URI("/catalog1/schema1/fileset1/dir//"))); } } + + @Test + public void testGetDefaultReplications() throws IOException { +try (GravitinoVirtualFileSystem fs = +(GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) { + assertEquals(1, fs.getDefaultReplication(managedFilesetPath)); +} + } + + @Test + public void testGetDefaultBlockSize() throws IOException { +try (GravitinoVirtualFileSystem fs = +(GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) { + assertEquals(32 * 1024 * 1024, fs.getDefaultBlockSize(managedFilesetPath)); +} + } } diff --git a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java index 9321080d9..feb8446be 100644 --- a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java +++ b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java @@ -422,6 +422,48 @@ public class GravitinoVirtualFileSystemIT extends AbstractIT { } } + @Test + public void testGetDefaultReplications() throws IOException { +String filesetName = "test_get_default_replications"; +NameIdentifier filesetIdent = NameIdentifier.of(schemaName, filesetName); +Catalog catalog = metalake.loadCatalog(catalogName); +String storageLocation = genStorageLocation(filesetName); +catalog +.asFilesetCatalog() +.createFileset( +filesetIdent, +"fileset comment&q
(gravitino) branch main updated: [#4077] improvement(docs): Fixed an incorrect description and some incomplete examples (#4146)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 0961af355 [#4077] improvement(docs): Fixed an incorrect description and some incomplete examples (#4146) 0961af355 is described below commit 0961af35585f4419ded7c53969d5422e3f1b65b0 Author: JinsYin AuthorDate: Mon Jul 15 11:13:17 2024 +0800 [#4077] improvement(docs): Fixed an incorrect description and some incomplete examples (#4146) ### What changes were proposed in this pull request? Fixed an incorrect description. ### Why are the changes needed? Fix: #4145 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No testing required - Co-authored-by: rqyin --- docs/trino-connector/supported-catalog.md | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/trino-connector/supported-catalog.md b/docs/trino-connector/supported-catalog.md index 54d0bedd3..306b2dcc2 100644 --- a/docs/trino-connector/supported-catalog.md +++ b/docs/trino-connector/supported-catalog.md @@ -14,7 +14,6 @@ The catalogs currently supported by the Apache Gravitino connector are as follow ## Create catalog -Trino itself does not support creating catalogs. Users can create catalogs through the Gravitino connector and then load them into Trino. The Gravitino connector provides the following stored procedures to create, delete, and alter catalogs. User can also use the system table `catalog` to describe all the catalogs. @@ -87,9 +86,9 @@ call gravitino.system.create_catalog( 'jdbc-mysql', Map( Array['jdbc-url', 'jdbc-user', 'jdbc-password', 'jdbc-driver'], -Array['jdbc:mysql:192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 'com.mysql.cj.jdbc.Driver'] +Array['jdbc:mysql://192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 'com.mysql.cj.jdbc.Driver'] ) -) +); call gravitino.system.drop_datalog('mysql'); -- Call stored procedures with name. @@ -98,10 +97,10 @@ call gravitino.system.create_catalog( provider => 'jdbc-mysql', properties => Map( Array['jdbc-url', 'jdbc-user', 'jdbc-password', 'jdbc-driver'], -Array['jdbc:mysql:192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 'com.mysql.cj.jdbc.Driver'] +Array['jdbc:mysql://192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 'com.mysql.cj.jdbc.Driver'] ), ignore_exist => true -) +); call gravitino.system.drop_datalog( catalog => 'mysql' @@ -112,10 +111,10 @@ call gravitino.system.alter_catalog( catalog => 'mysql', set_properties=> Map( Array['jdbc-url'], -Array['jdbc:mysql:127.0.0.1:3306?useSSL=false'] +Array['jdbc:mysql://127.0.0.1:3306?useSSL=false'] ), remove_properties => Array['jdbc-driver'] -) +); ``` if you need more information about catalog, please refer to:
(gravitino) branch main updated: [#4157] fix(doc): Fix the doc format in `how-to-build` (#4158)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 245c4579f [#4157] fix(doc): Fix the doc format in `how-to-build` (#4158) 245c4579f is described below commit 245c4579f935062dc39207f0724ca3ca0764e6d5 Author: Rui Fan <1996fan...@gmail.com> AuthorDate: Sat Jul 13 10:25:33 2024 +0800 [#4157] fix(doc): Fix the doc format in `how-to-build` (#4158) ### What changes were proposed in this pull request? [#4157] fix(doc): Fix the doc format in `how-to-build` ### Why are the changes needed? https://github.com/apache/gravitino/blob/main/docs/how-to-build.md#quick-start The code block should only show the `git clone g...@github.com:apache/gravitino.git` https://github.com/user-attachments/assets/ac00cf90-85a5-4ad7-8c3e-ae24ccd4abe8";> Fix: #4157 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The doc format is fine in my dev branch. https://github.com/1996fanrui/gravitino/blob/4157/fix-doc-format/docs/how-to-build.md#quick-start https://github.com/user-attachments/assets/d54634f7-1bd8-4df8-9ea2-9537a71c0bcc";> --- docs/how-to-build.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to-build.md b/docs/how-to-build.md index f95206403..e609be671 100644 --- a/docs/how-to-build.md +++ b/docs/how-to-build.md @@ -40,7 +40,7 @@ license: "This software is licensed under the Apache License version 2." 1. Clone the Gravitino project. -If you want to contribute to this open-source project, please fork the project on GitHub first. After forking, clone the forked project to your local environment, make your changes, and submit a pull request (PR). +If you want to contribute to this open-source project, please fork the project on GitHub first. After forking, clone the forked project to your local environment, make your changes, and submit a pull request (PR). ```shell git clone g...@github.com:apache/gravitino.git
(gravitino) branch main updated: [#4155]fix(doc): Fixed an unreachable link (#4156)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new fad4beeda [#4155]fix(doc): Fixed an unreachable link (#4156) fad4beeda is described below commit fad4beedaf240b74563120edc0fddd22464e5279 Author: JinsYin AuthorDate: Fri Jul 12 18:05:34 2024 +0800 [#4155]fix(doc): Fixed an unreachable link (#4156) ### What changes were proposed in this pull request? Fixed an unreachable link. ### Why are the changes needed? Fix: #4155 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No testing required Co-authored-by: rqyin --- docs/lakehouse-iceberg-catalog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 15e463c96..1434405e0 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -234,7 +234,7 @@ Meanwhile, the data types other than listed above are mapped to Gravitino **[Ext ### Table properties -You can pass [Iceberg table properties](https://iceberg.apache.org/docs/1.3.1/configuration/) to Gravitino when creating an Iceberg table. +You can pass [Iceberg table properties](https://web.archive.org/web/20231210013537/https://iceberg.apache.org/docs/1.3.1/configuration/) to Gravitino when creating an Iceberg table. The Gravitino server doesn't allow passing the following reserved fields.
(gravitino) branch main updated: [#4105] improvement(core): Remove the logic of getValidRoles (#4121)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new d6135447a [#4105] improvement(core): Remove the logic of getValidRoles (#4121) d6135447a is described below commit d6135447af900e15954b21d6ccf7637d66237237 Author: roryqi AuthorDate: Fri Jul 12 16:27:05 2024 +0800 [#4105] improvement(core): Remove the logic of getValidRoles (#4121) ### What changes were proposed in this pull request? Remove the logic of getValidRoles. ### Why are the changes needed? Fix: #4105 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Modify some test cases. --- .../authorization/AccessControlManager.java| 2 +- .../gravitino/authorization/PermissionManager.java | 39 ++--- .../gravitino/authorization/RoleManager.java | 25 -- .../gravitino/authorization/UserGroupManager.java | 40 +++--- .../TestAccessControlManagerForPermissions.java| 36 +-- .../relational/service/TestGroupMetaService.java | 6 .../relational/service/TestUserMetaService.java| 6 7 files changed, 46 insertions(+), 108 deletions(-) diff --git a/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java b/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java index eb7dbfb04..26ec14a7e 100644 --- a/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java +++ b/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java @@ -53,7 +53,7 @@ public class AccessControlManager { public AccessControlManager(EntityStore store, IdGenerator idGenerator, Config config) { this.adminManager = new AdminManager(store, idGenerator, config); this.roleManager = new RoleManager(store, idGenerator, config); -this.userGroupManager = new UserGroupManager(store, idGenerator, roleManager); +this.userGroupManager = new UserGroupManager(store, idGenerator); this.permissionManager = new PermissionManager(store, roleManager); } diff --git a/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java b/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java index 3b24e8cde..95a59c18c 100644 --- a/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java +++ b/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java @@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory; /** * PermissionManager is used for managing the logic the granting and revoking roles. Role is used - * for manging permissions. PermissionManager will filter the invalid roles, too. + * for manging permissions. */ class PermissionManager { private static final Logger LOG = LoggerFactory.getLogger(PermissionManager.class); @@ -67,14 +67,17 @@ class PermissionManager { UserEntity.class, Entity.EntityType.USER, userEntity -> { -List roleEntities = -roleManager.getValidRoles(metalake, userEntity.roleNames(), userEntity.roleIds()); - +List roleEntities = Lists.newArrayList(); +if (userEntity.roleNames() != null) { + for (String role : userEntity.roleNames()) { +roleEntities.add(roleManager.getRole(metalake, role)); + } +} List roleNames = Lists.newArrayList(toRoleNames(roleEntities)); List roleIds = Lists.newArrayList(toRoleIds(roleEntities)); for (RoleEntity roleEntityToGrant : roleEntitiesToGrant) { - if (roleNames.contains(roleEntityToGrant.name())) { + if (roleIds.contains(roleEntityToGrant.id())) { LOG.warn( "Failed to grant, role {} already exists in the user {} of metalake {}", roleEntityToGrant.name(), @@ -129,13 +132,17 @@ class PermissionManager { GroupEntity.class, Entity.EntityType.GROUP, groupEntity -> { -List roleEntities = -roleManager.getValidRoles(metalake, groupEntity.roleNames(), groupEntity.roleIds()); +List roleEntities = Lists.newArrayList(); +if (groupEntity.roleNames() != null) { + for (String role : groupEntity.roleNames()) { +roleEntities.add(roleManager.getRole(metalake, role)); + } +} List roleNames = Lists.newArrayList(toRoleNames(roleEntities)); List roleIds = Lists.newArrayList(toRoleIds(roleEntities)); for (RoleEntity roleEntityToGrant : roleEntitiesToGrant) { - if (ro
(gravitino) branch main updated: [#4135] fix(trino-connector): Fix typo about Gravitino in trino-connector (#4144)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new b80be6ec7 [#4135] fix(trino-connector): Fix typo about Gravitino in trino-connector (#4144) b80be6ec7 is described below commit b80be6ec790e79f6ea4c7dd7ba9ab36deb6e3a41 Author: Dev Parikh <51128342+dev79...@users.noreply.github.com> AuthorDate: Fri Jul 12 13:36:24 2024 +0530 [#4135] fix(trino-connector): Fix typo about Gravitino in trino-connector (#4144) ### Why are the changes needed? Fix: #4135 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UTs --- .../datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java index f2311ea6a..89d330e1a 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java @@ -91,7 +91,7 @@ public class TrinoQueryTestTool { + "TrinoTestTool --testset=tpch --tester_id=5 --catalog=hive --auto=all\n\n" + "Run all the tpch testset's testers in the 'testsets/tpch' directory under 'mysql' " + "catalog with manual start the test environment:\n" -+ "TrinoTestTool --testset=tpch -- catalog=mysql --auto=none --gravition_uri=http://10.3.21.12:8090 " ++ "TrinoTestTool --testset=tpch -- catalog=mysql --auto=none --gravitino_uri=http://10.3.21.12:8090 " + "--trino_uri=http://10.3.21.12:8080 --mysql_url=jdbc:mysql:/10.3.21.12 \n"; System.out.println(example); return;
(gravitino) branch main updated: [#4129] improvement(core): Support hold multiple tree lock within a thread at the same time (#4130)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 1b1ef58b0 [#4129] improvement(core): Support hold multiple tree lock within a thread at the same time (#4130) 1b1ef58b0 is described below commit 1b1ef58b0fe9f9f84575319fbd953d8ea3351f61 Author: Qi Yu AuthorDate: Fri Jul 12 10:09:06 2024 +0800 [#4129] improvement(core): Support hold multiple tree lock within a thread at the same time (#4130) ### What changes were proposed in this pull request? Add the value of the name identifier in the holdingThreadTimestamp to support holding multiple tree lock at the same time. ### Why are the changes needed? To support more user sceanrio Fix: #4129 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Add new test class `TestTreeLockUtils` --- .../com/datastrato/gravitino/lock/LockManager.java | 6 +- .../com/datastrato/gravitino/lock/TreeLock.java| 21 +- .../datastrato/gravitino/lock/TreeLockNode.java| 82 +++--- .../gravitino/lock/TestTreeLockUtils.java | 51 ++ 4 files changed, 131 insertions(+), 29 deletions(-) diff --git a/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java b/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java index b1dbb27fe..9fb0ef6e1 100644 --- a/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java +++ b/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java @@ -132,12 +132,12 @@ public class LockManager { // Check self node.getHoldingThreadTimestamp() .forEach( -(thread, ts) -> { +(threadIdentifier, ts) -> { // If the thread is holding the lock for more than 30 seconds, we will log it. if (System.currentTimeMillis() - ts > 3) { LOG.warn( -"Dead lock detected for thread {} on node {}, threads that holding the node: {} ", -thread, +"Dead lock detected for thread with identifier {} on node {}, threads that holding the node: {} ", +threadIdentifier, node, node.getHoldingThreadTimestamp()); } diff --git a/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java b/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java index 76d9ab028..02cb0c757 100644 --- a/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java +++ b/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java @@ -104,8 +104,17 @@ public class TreeLock { try { treeLockNode.lock(type); heldLocks.push(Pair.of(treeLockNode, type)); + +treeLockNode.addHoldingThreadTimestamp( +Thread.currentThread(), identifier, System.currentTimeMillis()); if (LOG.isTraceEnabled()) { - LOG.trace("Locked node: {}, lock type: {}", treeLockNode, type); + LOG.trace( + "Node {} has been lock with '{}' lock, hold by {} with ident '{}' at {}", + this, + lockType, + Thread.currentThread(), + identifier, + System.currentTimeMillis()); } } catch (Exception e) { LOG.error( @@ -140,8 +149,16 @@ public class TreeLock { TreeLockNode current = pair.getLeft(); LockType type = pair.getRight(); current.unlock(type); + + long holdStartTime = current.removeHoldingThreadTimestamp(Thread.currentThread(), identifier); if (LOG.isTraceEnabled()) { -LOG.trace("Unlocked node: {}, lock type: {}", current, type); +LOG.trace( +"Node {} has been unlock with '{}' lock, hold by {} with ident '{}' for {} ms", +this, +lockType, +Thread.currentThread(), +identifier, +System.currentTimeMillis() - holdStartTime); } } diff --git a/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java b/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java index a4953c541..92db979aa 100644 --- a/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java +++ b/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java @@ -19,6 +19,7 @@ package com.datastrato.gravitino.lock; +import com.datastrato.gravitino.NameIdentifier; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Objects; import com.google.common.collect.Lists; @@ -44,13 +45,60 @@ public class TreeLockNode { private final String name; private final ReentrantReadW
(gravitino-playground) branch main updated: [MINOR] fix(git): correct gitignore file name (#53)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git The following commit(s) were added to refs/heads/main by this push: new 02db830 [MINOR] fix(git): correct gitignore file name (#53) 02db830 is described below commit 02db83012bedc777d3bd97297e8b346aeb1f4aae Author: mchades <793098...@qq.com> AuthorDate: Thu Jul 11 18:01:33 2024 +0800 [MINOR] fix(git): correct gitignore file name (#53) correct gitignore file name --- .gitigore => .gitignore | 0 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.gitigore b/.gitignore similarity index 100% rename from .gitigore rename to .gitignore
(gravitino) branch main updated: [#3733] feat(core): Unified authorization framework (#3946)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 87b58fee3 [#3733] feat(core): Unified authorization framework (#3946) 87b58fee3 is described below commit 87b58fee3ce630033857e9665ff0d63dd6d778c6 Author: Xun Liu AuthorDate: Wed Jul 10 17:20:14 2024 +0800 [#3733] feat(core): Unified authorization framework (#3946) ### What changes were proposed in this pull request? Provide an authorization hook plugin framework, In the next step we can develop an authorization plugin, just like Catalogs. + [Unified authorization design document](https://docs.google.com/document/d/1RtKfU0uO-N7OjcrB3DOtY1ZsbhVp3GsLSJ26c_YQosQ/edit) https://github.com/apache/gravitino/assets/3677382/b9a06b79-057a-494c-a1be-15691f478de1";> ### Why are the changes needed? Fix: #3733 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI Passed. --- .../java/com/datastrato/gravitino/Catalog.java | 6 + .../gravitino/authorization/RoleChange.java| 155 + .../gravitino/authorization/SecurableObjects.java | 14 +- build.gradle.kts | 3 +- .../catalog/hive/TestHiveCatalogOperations.java| 3 +- .../gravitino/authorization/PermissionManager.java | 2 +- .../gravitino/connector/BaseCatalog.java | 63 + .../connector/BaseCatalogPropertiesMetadata.java | 7 + .../authorization/AuthorizationPlugin.java | 29 .../authorization/AuthorizationProvider.java | 33 + .../connector/authorization/BaseAuthorization.java | 64 + .../authorization/RoleAuthorizationPlugin.java | 70 ++ .../UserGroupAuthorizationPlugin.java | 143 +++ .../com/datastrato/gravitino/meta/RoleEntity.java | 2 +- .../java/com/datastrato/gravitino/TestCatalog.java | 9 ++ .../connector/authorization/TestAuthorization.java | 95 + .../mysql/TestMySQLAuthorization.java | 37 + .../mysql/TestMySQLAuthorizationPlugin.java| 105 ++ .../ranger/TestRangerAuthorization.java| 37 + .../ranger/TestRangerAuthorizationPlugin.java | 105 ++ ...o.connector.authorization.AuthorizationProvider | 20 +++ integration-test/build.gradle.kts | 2 +- .../test/authorization/ranger/RangerIT.java| 56 +++- 23 files changed, 1042 insertions(+), 18 deletions(-) diff --git a/api/src/main/java/com/datastrato/gravitino/Catalog.java b/api/src/main/java/com/datastrato/gravitino/Catalog.java index 2f75cab38..d7627cf14 100644 --- a/api/src/main/java/com/datastrato/gravitino/Catalog.java +++ b/api/src/main/java/com/datastrato/gravitino/Catalog.java @@ -88,6 +88,12 @@ public interface Catalog extends Auditable { */ String CLOUD_REGION_CODE = "cloud.region-code"; + /** + * This variable is used as a key in properties of catalogs to use authorization provider in + * Gravitino. + */ + String AUTHORIZATION_PROVIDER = "authorization-provider"; + /** @return The name of the catalog. */ String name(); diff --git a/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java b/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java new file mode 100644 index 0..4271bc7f0 --- /dev/null +++ b/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package com.datastrato.gravitino.authorization; + +import com.datastrato.gravitino.annotation.Evolving; + +/** The RoleChange interface defines the public API for managing roles in an authorization. */ +@Evolving +public interface RoleChange { + /** + * Create a RoleChange to add a securable object into a role. + * + * @param securableObject The securable object. + * @return return a RoleChang
(gravitino) branch main updated: [#4000] improvement(client-python): Support simple auth for PyGVFS (#4001)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 0f18b638a [#4000] improvement(client-python): Support simple auth for PyGVFS (#4001) 0f18b638a is described below commit 0f18b638a2ce2f946bb8fc50ddfed7e88c1b73e4 Author: xloya <982052...@qq.com> AuthorDate: Wed Jul 10 13:39:31 2024 +0800 [#4000] improvement(client-python): Support simple auth for PyGVFS (#4001) ### What changes were proposed in this pull request? Support simple auth for gravitino client in PyGVFS. The integration test depends on this PR: #3876 #3931 . When #3876 #3931 is merged, I will add integration tests and docs for this PR. ### Why are the changes needed? Fix: #4000 ### How was this patch tested? Add UTs and ITs. - Co-authored-by: xiaojiebao --- clients/client-python/gravitino/filesystem/gvfs.py | 43 +++--- .../gravitino/filesystem/gvfs_config.py| 29 +++ .../tests/integration/test_gvfs_with_hdfs.py | 24 .../tests/unittests/test_gvfs_with_local.py| 36 -- docs/how-to-use-gvfs.md| 43 ++ 5 files changed, 157 insertions(+), 18 deletions(-) diff --git a/clients/client-python/gravitino/filesystem/gvfs.py b/clients/client-python/gravitino/filesystem/gvfs.py index a50c97f4c..a2b2461b3 100644 --- a/clients/client-python/gravitino/filesystem/gvfs.py +++ b/clients/client-python/gravitino/filesystem/gvfs.py @@ -32,8 +32,10 @@ from pyarrow.fs import HadoopFileSystem from readerwriterlock import rwlock from gravitino.api.catalog import Catalog from gravitino.api.fileset import Fileset +from gravitino.auth.simple_auth_provider import SimpleAuthProvider from gravitino.client.gravitino_client import GravitinoClient from gravitino.exceptions.base import GravitinoRuntimeException +from gravitino.filesystem.gvfs_config import GVFSConfig from gravitino.name_identifier import NameIdentifier PROTOCOL_NAME = "gvfs" @@ -94,15 +96,44 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem): def __init__( self, -server_uri=None, -metalake_name=None, -cache_size=20, -cache_expired_time=3600, +server_uri: str = None, +metalake_name: str = None, +options: Dict = None, **kwargs, ): +"""Initialize the GravitinoVirtualFileSystem. +:param server_uri: Gravitino server URI +:param metalake_name: Gravitino metalake name +:param options: Options for the GravitinoVirtualFileSystem +:param kwargs: Extra args for super filesystem +""" self._metalake = metalake_name -self._client = GravitinoClient( -uri=server_uri, metalake_name=metalake_name, check_version=False +auth_type = ( +GVFSConfig.DEFAULT_AUTH_TYPE +if options is None +else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.DEFAULT_AUTH_TYPE) +) +if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE: +self._client = GravitinoClient( +uri=server_uri, +metalake_name=metalake_name, +auth_data_provider=SimpleAuthProvider(), +) +else: +raise GravitinoRuntimeException( +f"Authentication type {auth_type} is not supported." +) +cache_size = ( +GVFSConfig.DEFAULT_CACHE_SIZE +if options is None +else options.get(GVFSConfig.CACHE_SIZE, GVFSConfig.DEFAULT_CACHE_SIZE) +) +cache_expired_time = ( +GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME +if options is None +else options.get( +GVFSConfig.CACHE_EXPIRED_TIME, GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME +) ) self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time) self._cache_lock = rwlock.RWLockFair() diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py b/clients/client-python/gravitino/filesystem/gvfs_config.py new file mode 100644 index 0..539b9045a --- /dev/null +++ b/clients/client-python/gravitino/filesystem/gvfs_config.py @@ -0,0 +1,29 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://w
(gravitino) branch main updated: [#3764] improvement(docs): Add user docs for using GVFS in Python (#3931)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 14222171d [#3764] improvement(docs): Add user docs for using GVFS in Python (#3931) 14222171d is described below commit 14222171d4be88be9b4b88471140316123751248 Author: xloya <982052...@qq.com> AuthorDate: Wed Jul 10 11:06:07 2024 +0800 [#3764] improvement(docs): Add user docs for using GVFS in Python (#3931) ### What changes were proposed in this pull request? Provides documentation for users to use Gravitino Virtual FileSystem in Python. ### Why are the changes needed? Fix: #3764 ### How was this patch tested? No code changes, no testing required. - Co-authored-by: xiaojiebao --- docs/how-to-use-gvfs.md | 251 +--- 1 file changed, 237 insertions(+), 14 deletions(-) diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 654c90387..46e0c1b60 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -11,8 +11,10 @@ directories, with `fileset` you can manage non-tabular data through Gravitino. F details, you can read [How to manage fileset metadata using Gravitino](./manage-fileset-metadata-using-gravitino.md). To use `Fileset` managed by Gravitino, Gravitino provides a virtual file system layer called -the Gravitino Virtual File System (GVFS) that's built on top of the Hadoop Compatible File System -(HCFS) interface. +the Gravitino Virtual File System (GVFS): +* In Java, it's built on top of the Hadoop Compatible File System(HCFS) interface. +* In Python, it's built on top of the [fsspec](https://filesystem-spec.readthedocs.io/en/stable/index.html) +interface. GVFS is a virtual layer that manages the files and directories in the fileset through a virtual path, without needing to understand the specific storage details of the fileset. You can access @@ -22,6 +24,12 @@ the files or folders as shown below: gvfs://fileset/${catalog_name}/${schema_name}/${fileset_name}/sub_dir/ ``` +In python GVFS, you can also access the files or folders as shown below: + +```text +fileset/${catalog_name}/${schema_name}/${fileset_name}/sub_dir/ +``` + Here `gvfs` is the scheme of the GVFS, `fileset` is the root directory of the GVFS which can't modified, and `${catalog_name}/${schema_name}/${fileset_name}` is the virtual path of the fileset. You can access the files and folders under this virtual path by concatenating a file or folder @@ -30,14 +38,16 @@ name to the virtual path. The usage pattern for GVFS is the same as HDFS or S3. GVFS internally manages the path mapping and convert automatically. -## Prerequisites +## 1. Managing files of Fileset with Java GVFS + +### Prerequisites + A Hadoop environment with HDFS running. GVFS has been tested against Hadoop 3.1.0. It is recommended to use Hadoop 3.1.0 or later, but it should work with Hadoop 2. x. Please create an [issue](https://www.github.com/apache/gravitino/issues) if you find any compatibility issues. -## Configuration +### Configuration | Configuration item| Description | Default value | Required| Since version | |---|-|---|-|---| @@ -94,7 +104,7 @@ You can configure these properties in two ways: ``` -## How to use the Apache Gravitino Virtual File System +### Usage examples First make sure to obtain the Gravitino Virtual File System runtime jar, which you can get in two ways: @@ -111,7 +121,7 @@ two ways: ./gradlew :clients:filesystem-hadoop3-runtime:build -x test ``` -### Use GVFS via Hadoop shell command + Via Hadoop shell command You can use the Hadoop shell command to perform operations on the fileset storage. For example: @@ -131,7 +141,7 @@ kinit -kt your_kerberos.keytab your_kerbe...@xxx.com ./${HADOOP_HOME}/bin/hadoop dfs -ls gvfs://fileset/test_catalog/test_schema/test_fileset_1 ``` -### Using the GVFS via Java code + Via Java code You can also perform operations on the files or directories managed by fileset through Java code. Make sure that your code is using the correct Hadoop environment, and that your environment @@ -150,7 +160,7 @@ FileSystem fs = file
(gravitino) branch main updated: [#4012] improvement(client-python): Refactor Error Handling in client-python (#4093)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 9e80cbcf9 [#4012] improvement(client-python): Refactor Error Handling in client-python (#4093) 9e80cbcf9 is described below commit 9e80cbcf9ea8de21eabec9fdb3651d5b376c7b26 Author: noidname01 <55401762+noidnam...@users.noreply.github.com> AuthorDate: Tue Jul 9 18:01:33 2024 +0800 [#4012] improvement(client-python): Refactor Error Handling in client-python (#4093) ### What changes were proposed in this pull request? * Refactor the error handling structure, each API can implement their own error handler to raise custom exceptions * Add unit test for error handler, but unit tests and integration tests for each API(ex. metalake, catalog, schema) have not been added, I will create issues for them. - [ ] Add Metalake Error Handler and related exceptions, test cases - [ ] Add Catalog Error Handler and related exceptions, test cases - [ ] Add Schema Error Handler and related exceptions, test cases - [ ] Add OAuth Error Handler and related exceptions, test cases * Create `gravitino/exceptions/base.py` to define all the exceptions. * Remove some unused files and exceptions ### Why are the changes needed? Fix: #4012 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT added and one IT added, test with `./gradlew clients:client-python:test` - Co-authored-by: TimWang --- .../gravitino/catalog/fileset_catalog.py | 18 +++- .../gravitino/client/gravitino_client.py | 12 --- .../gravitino/client/gravitino_client_base.py | 2 +- .../gravitino/client/gravitino_metalake.py | 12 --- .../gravitino/client/gravitino_version.py | 2 +- clients/client-python/gravitino/constants/error.py | 71 .../gravitino/dto/responses/base_response.py | 5 +- .../gravitino/dto/responses/error_response.py | 79 ++ .../gravitino/dto/responses/version_response.py| 4 +- clients/client-python/gravitino/exceptions/base.py | 83 ++ .../exceptions/gravitino_runtime_exception.py | 25 - .../__init__.py} | 6 -- .../error_handler.py} | 26 +++-- .../exceptions/handlers/fileset_error_handler.py | 43 .../rest_error_handler.py} | 26 +++-- .../gravitino/exceptions/not_found_exception.py| 24 - clients/client-python/gravitino/filesystem/gvfs.py | 2 +- clients/client-python/gravitino/name_identifier.py | 4 +- clients/client-python/gravitino/namespace.py | 5 +- .../client-python/gravitino/rest/rest_message.py | 6 -- .../client-python/gravitino/utils/exceptions.py| 114 .../client-python/gravitino/utils/http_client.py | 75 ++--- clients/client-python/scripts/generate_version.py | 2 +- .../tests/integration/base_hadoop_env.py | 2 +- .../tests/integration/hdfs_container.py| 2 +- .../tests/integration/integration_test_env.py | 2 +- .../tests/integration/test_fileset_catalog.py | 6 ++ .../tests/integration/test_gvfs_with_hdfs.py | 2 +- .../tests/unittests/test_error_handler.py | 120 + .../tests/unittests/test_gravitino_version.py | 2 +- .../tests/unittests/test_gvfs_with_local.py| 2 +- 31 files changed, 531 insertions(+), 253 deletions(-) diff --git a/clients/client-python/gravitino/catalog/fileset_catalog.py b/clients/client-python/gravitino/catalog/fileset_catalog.py index 82c345411..5ab2e00e6 100644 --- a/clients/client-python/gravitino/catalog/fileset_catalog.py +++ b/clients/client-python/gravitino/catalog/fileset_catalog.py @@ -35,6 +35,7 @@ from gravitino.name_identifier import NameIdentifier from gravitino.namespace import Namespace from gravitino.utils import HTTPClient from gravitino.rest.rest_utils import encode_string +from gravitino.exceptions.handlers.fileset_error_handler import FILESET_ERROR_HANDLER logger = logging.getLogger(__name__) @@ -88,7 +89,10 @@ class FilesetCatalog(BaseSchemaCatalog): full_namespace = self._get_fileset_full_namespace(namespace) -resp = self.rest_client.get(self.format_fileset_request_path(full_namespace)) +resp = self.rest_client.get( +self.format_fileset_request_path(full_namespace), +error_handler=FILESET_ERROR_HANDLER, +) entity_list_resp = EntityListResponse.from_json(resp.body, infer_missing=True) entity_list_resp.validate() @@ -114,7 +118,8 @@ class FilesetC
(gravitino) branch main updated: [#4018] feat(core): Add tag management logic for Tag System (Part 1) (#4019)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 178eb37f8 [#4018] feat(core): Add tag management logic for Tag System (Part 1) (#4019) 178eb37f8 is described below commit 178eb37f8b5013abdd2464dda764ddd5b0787f38 Author: Jerry Shao AuthorDate: Tue Jul 9 09:35:33 2024 +0800 [#4018] feat(core): Add tag management logic for Tag System (Part 1) (#4019) ### What changes were proposed in this pull request? This PR tracks the work of adding the core logics for tag management. ### Why are the changes needed? This is a part of work for adding tag support in Gravitino. Fix: #4018 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? UTs added. --- .../com/datastrato/gravitino/meta/TagEntity.java | 16 -- .../gravitino/storage/relational/JDBCBackend.java | 15 ++ .../storage/relational/mapper/TagMetaMapper.java | 175 .../mapper/TagMetadataObjectRelMapper.java | 59 .../gravitino/storage/relational/po/TagPO.java | 142 ++ .../relational/service/MetalakeMetaService.java| 22 +- .../storage/relational/service/TagMetaService.java | 180 + .../session/SqlSessionFactoryHelper.java | 4 + .../storage/relational/utils/POConverters.java | 57 .../com/datastrato/gravitino/tag/TagManager.java | 185 - .../com/datastrato/gravitino/meta/TestEntity.java | 20 -- .../storage/relational/TestJDBCBackend.java| 37 +++ .../relational/service/TestTagMetaService.java | 300 + .../storage/relational/utils/TestPOConverters.java | 91 +++ .../datastrato/gravitino/tag/TestTagManager.java | 248 + .../integration/test/util/AbstractIT.java | 9 +- .../relational/service/FilesetMetaServiceIT.java | 7 +- scripts/h2/schema-h2.sql | 31 ++- scripts/mysql/schema-0.5.0-mysql.sql | 2 +- scripts/mysql/schema-0.6.0-mysql.sql | 31 ++- scripts/mysql/upgrade-0.5.0-to-0.6.0-mysql.sql | 29 ++ 21 files changed, 1609 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java b/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java index b4acaf71f..c6e01ec7e 100644 --- a/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java +++ b/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java @@ -24,7 +24,6 @@ import com.datastrato.gravitino.Auditable; import com.datastrato.gravitino.Entity; import com.datastrato.gravitino.Field; import com.datastrato.gravitino.HasIdentifier; -import com.datastrato.gravitino.MetadataObject; import com.datastrato.gravitino.Namespace; import com.datastrato.gravitino.tag.Tag; import com.google.common.collect.Maps; @@ -47,10 +46,6 @@ public class TagEntity implements Tag, Entity, Auditable, HasIdentifier { public static final Field PROPERTIES = Field.optional("properties", Map.class, "The properties of the tag entity."); - public static final Field ASSOCIATED_OBJECTS = - Field.optional( - "objects", MetadataObject[].class, "The associated objects of the tag entity."); - public static final Field AUDIT_INFO = Field.required("audit_info", Audit.class, "The audit details of the tag entity."); @@ -59,7 +54,6 @@ public class TagEntity implements Tag, Entity, Auditable, HasIdentifier { private Namespace namespace; private String comment; private Map properties; - private MetadataObject[] objects = null; private Audit auditInfo; private TagEntity() {} @@ -72,7 +66,6 @@ public class TagEntity implements Tag, Entity, Auditable, HasIdentifier { fields.put(COMMENT, comment); fields.put(PROPERTIES, properties); fields.put(AUDIT_INFO, auditInfo); -fields.put(ASSOCIATED_OBJECTS, objects); return Collections.unmodifiableMap(fields); } @@ -112,10 +105,6 @@ public class TagEntity implements Tag, Entity, Auditable, HasIdentifier { return Optional.empty(); } - public MetadataObject[] objects() { -return objects; - } - @Override public Audit auditInfo() { return auditInfo; @@ -181,11 +170,6 @@ public class TagEntity implements Tag, Entity, Auditable, HasIdentifier { return this; } -public Builder withMetadataObjects(MetadataObject[] objects) { - tagEntity.objects = objects; - return this; -} - public Builder withAuditInfo(Audit auditInfo) { tagEntity.auditInfo = auditInfo; return this; diff --git a/core/src/main/java/com/datastrato/gravitino/storage/relational/JDBCBackend.java b/co
(gravitino) branch main updated: [#3968] improvement(core): Disable KV entity store and optimize CI (#3975)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 27ca87535 [#3968] improvement(core): Disable KV entity store and optimize CI (#3975) 27ca87535 is described below commit 27ca87535e30535df2dc8f4108570671cd9e80bd Author: Qi Yu AuthorDate: Mon Jul 8 19:49:18 2024 +0800 [#3968] improvement(core): Disable KV entity store and optimize CI (#3975) ### What changes were proposed in this pull request? - Disabling support for the KV entity store and adjusting the tests accordingly. - Change CI about backend option `jdbcBackend` ### Why are the changes needed? We are going to deprecate kv entity store Fix: #3968 ### Why are the changes needed? N/A ### How was this patch tested? Existing test. --- .github/workflows/backend-integration-test.yml | 2 +- build.gradle.kts | 5 +- catalogs/catalog-hadoop/build.gradle.kts | 10 + .../hadoop/TestHadoopCatalogOperations.java| 81 ++-- catalogs/catalog-kafka/build.gradle.kts| 10 + .../catalog/kafka/TestKafkaCatalogOperations.java | 69 ++- .../lakehouse/paimon/TestGravitinoPaimonTable.java | 11 ++ .../lakehouse/paimon/TestPaimonCatalog.java| 13 +- .../catalog/lakehouse/paimon/TestPaimonSchema.java | 13 ++ .../gravitino/filesystem/hadoop/TestGvfsBase.java | 8 + .../java/com/datastrato/gravitino/Configs.java | 2 +- .../datastrato/gravitino/EntityStoreFactory.java | 9 +- .../gravitino/storage/TestEntityStorage.java | 35 +--- .../storage/kv/TestEntityKeyEncoding.java | 2 + .../gravitino/storage/kv/TestKvEntityStorage.java | 1 + .../storage/kv/TestKvGarbageCollector.java | 2 + .../storage/kv/TestKvNameMappingService.java | 2 + .../gravitino/storage/kv/TestRocksDBKvBackend.java | 2 + .../gravitino/storage/kv/TestStorageVersion.java | 2 + .../storage/kv/TestTransactionIdGenerator.java | 1 + .../storage/kv/TestTransactionalKvBackend.java | 1 + .../storage/relational/TestJDBCBackend.java| 9 +- .../storage/relational/session/TestSqlSession.java | 8 +- core/src/test/resources/h2/schema-h2.sql | 218 - docs/gravitino-server-config.md| 2 +- gradle/libs.versions.toml | 1 + .../integration/test/util/AbstractIT.java | 2 +- .../relational/service/FilesetMetaServiceIT.java | 4 +- 28 files changed, 242 insertions(+), 283 deletions(-) diff --git a/.github/workflows/backend-integration-test.yml b/.github/workflows/backend-integration-test.yml index da534f467..437acbd02 100644 --- a/.github/workflows/backend-integration-test.yml +++ b/.github/workflows/backend-integration-test.yml @@ -61,7 +61,7 @@ jobs: architecture: [linux/amd64] java-version: [ 8, 11, 17 ] test-mode: [ embedded, deploy ] -backend: [ jdbcBackend, kvBackend] +backend: [ mysql, h2] env: PLATFORM: ${{ matrix.architecture }} steps: diff --git a/build.gradle.kts b/build.gradle.kts index ec5898921..a29b0405d 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -178,9 +178,8 @@ allprojects { // Change poll image pause time from 30s to 60s param.environment("TESTCONTAINERS_PULL_PAUSE_TIMEOUT", "60") - if (project.hasProperty("jdbcBackend")) { -param.environment("jdbcBackend", "true") - } + val jdbcDatabase = project.properties["jdbcBackend"] as? String ?: "h2" + param.environment("jdbcBackend", jdbcDatabase) val testMode = project.properties["testMode"] as? String ?: "embedded" param.systemProperty("gravitino.log.path", project.buildDir.path + "/${project.name}-integration-test.log") diff --git a/catalogs/catalog-hadoop/build.gradle.kts b/catalogs/catalog-hadoop/build.gradle.kts index ccdf7c996..0dfa23154 100644 --- a/catalogs/catalog-hadoop/build.gradle.kts +++ b/catalogs/catalog-hadoop/build.gradle.kts @@ -53,6 +53,7 @@ dependencies { testImplementation(libs.bundles.log4j) testImplementation(libs.mockito.core) + testImplementation(libs.mockito.inline) testImplementation(libs.mysql.driver) testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) @@ -101,6 +102,15 @@ tasks { } tasks.test { + doFirst { +val testMode = project.properties["testMode"] as? String ?: "embedded" +if (testMode == "deploy") { + environment("GRAVITINO_HOME", project.rootDir.path + "/distribution/package") +} el
(gravitino) branch main updated: [#4086] Remove Datastrato name and fix support email (#4087)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 46f825f8c [#4086] Remove Datastrato name and fix support email (#4087) 46f825f8c is described below commit 46f825f8c028af75e620ef41268a40c98080c3bc Author: Justin Mclean AuthorDate: Mon Jul 8 13:08:24 2024 +1000 [#4086] Remove Datastrato name and fix support email (#4087) ### What changes were proposed in this pull request? Remove Datastrato name and fix the support email. Note DockerHub still needs fixing but another issue covers this. ### Why are the changes needed? As we are now an ASF project. Fix: #4086 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? Built and tested locally. --- .asf.yaml | 2 +- .github/CONTRIBUTING | 4 ++-- build.gradle.kts | 6 +++--- clients/client-python/setup.py| 4 ++-- dev/docker/doris/Dockerfile | 2 +- dev/docker/gravitino/Dockerfile | 2 +- dev/docker/hive/Dockerfile| 2 +- dev/docker/kerberos-hive/Dockerfile | 2 +- dev/docker/ranger/Dockerfile | 2 +- dev/docker/trino/Dockerfile | 2 +- docs/glossary.md | 2 +- docs/how-to-sign-releases.md | 6 +++--- docs/how-to-use-the-playground.md | 8 .../gravitino/integration/test/web/ui/MetalakePageTest.java | 2 +- .../gravitino/integration/test/web/ui/pages/MetalakePage.java | 2 +- web/src/app/rootLayout/Footer.js | 6 +++--- 16 files changed, 27 insertions(+), 27 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index 84a019b42..de078eeed 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -17,7 +17,7 @@ github: description: World's most powerful open data catalog for building a high-performance, geo-distributed and federated metadata lake. - homepage: https://datastrato.ai/docs/ + homepage: https://gravitino.apache.org labels: - metadata - data-catalog diff --git a/.github/CONTRIBUTING b/.github/CONTRIBUTING index b65ca92fe..2c8ad3161 100644 --- a/.github/CONTRIBUTING +++ b/.github/CONTRIBUTING @@ -72,11 +72,11 @@ We value and appreciate the diverse contributions and ideas from the community. For significant contributions to Gravitino, we require contributors to sign an Individual Contributor License Agreement (ICLA). This ensures that the project and its community can properly manage and maintain intellectual property rights. -If you plan to make a large contribution, please contact us at [jus...@datastrato.com](mailto:jus...@datastrato.com) to discuss the ICLA process. +If you plan to make a large contribution, please contact us at [d...@gravitino.apache.org](mailto:d...@gravitino.apache.org) to discuss the ICLA process. ## Contact -If you have any questions or need further assistance, you can reach out to us at [jus...@datastrato.com](mailto:jus...@datastrato.com). +If you have any questions or need further assistance, you can reach out to us at [d...@gravitino.apache.org](mailto:d...@gravitino.apache.org). ## License diff --git a/build.gradle.kts b/build.gradle.kts index 51d1f2175..ec5898921 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -373,7 +373,7 @@ subprojects { pom { name.set("Gravitino") description.set("Gravitino is a high-performance, geo-distributed and federated metadata lake.") - url.set("https://datastrato.ai";) + url.set("https://gravitino.apache.org";) licenses { license { name.set("The Apache Software License, Version 2.0") @@ -382,9 +382,9 @@ subprojects { } developers { developer { - id.set("The maintainers of Gravitino") + id.set("The Gravitino community") name.set("support") - email.set("d...@datastrato.com") + email.set("d...@gravitino.apache.org") } } scm { diff --git a/clients/client-python/setup.py b/clients/client-python/setup.py index 48e8af031..02790f1e3 100644 --- a/clients/client-python/setup.py +++ b/clients/client-python/setup.py @@ -34,8 +34,8 @@ setup( long_description=
(gravitino) branch main updated: [#4064] Fix GitHub and resources (#4070)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 34a81df3f [#4064] Fix GitHub and resources (#4070) 34a81df3f is described below commit 34a81df3f4c47ea50fd64ed5eb413a6187178240 Author: Justin Mclean AuthorDate: Fri Jul 5 16:36:19 2024 +1000 [#4064] Fix GitHub and resources (#4070) ### What changes were proposed in this pull request? Change GitHub and resources to ASF ones. Still to fix are Docker and the documentation URL. But this can be merged now. ### Why are the changes needed? As we are now an ASF project. Fix: #4064 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Built locally with no issues. --- CONTRIBUTING.md | 6 +++--- build.gradle.kts | 6 +++--- clients/client-python/setup.py| 2 +- docs/docker-image-details.md | 2 +- docs/getting-started.md | 13 ++--- docs/hadoop-catalog.md| 6 +++--- docs/how-to-build.md | 4 ++-- docs/how-to-install.md| 4 ++-- docs/how-to-use-gvfs.md | 4 ++-- docs/how-to-use-python-client.md | 15 ++- docs/how-to-use-the-playground.md | 2 +- docs/index.md | 4 ++-- docs/manage-table-partition-using-gravitino.md| 4 ++-- docs/publish-docker-images.md | 2 +- docs/trino-connector/installation.md | 4 ++-- .../com/datastrato/gravitino/server/web/JettyServer.java | 2 +- web/src/app/rootLayout/Footer.js | 4 ++-- 17 files changed, 40 insertions(+), 44 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c4c83b6f6..89a63d79d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,13 +55,13 @@ Before you get started, please read and follow these guidelines to ensure a smoo Either click the "Fork" button at the top right of the repository's page on GitHub OR create a fork on your local machine using `git clone`. ```bash -git clone https://github.com/datastrato/gravitino.git +git clone https://github.com/apache/gravitino.git cd gravitino ``` ### Development Setup -Once you have cloned the [GitHub repository](https://github.com/datastrato/gravitino), see [how to build](/docs/how-to-build.md) for instructions on how to build, or you can use the provided docker images at [Datastrato's DockerHub repository](https://hub.docker.com/u/datastrato). +Once you have cloned the [GitHub repository](https://github.com/apache/gravitino), see [how to build](/docs/how-to-build.md) for instructions on how to build, or you can use the provided docker images at [Datastrato's DockerHub repository](https://hub.docker.com/u/datastrato). To stop and start a local Gravitino server via `bin/gravitino.sh start` and `bin/gravitino.sh stop` in a Gravitino distribution, see [how to build](/docs/how-to-build.md) for more instructions. @@ -225,7 +225,7 @@ If you have ideas for enhancements or new features, feel free to create an issue ### Good First Issues -If you are new to open source or can't find something to work on, check out the [Good First Issues list](https://github.com/datastrato/gravitino/contribute). +If you are new to open source or can't find something to work on, check out the [Good First Issues list](https://github.com/apache/gravitino/contribute). ### Working on Issues diff --git a/build.gradle.kts b/build.gradle.kts index abba4ee2a..51d1f2175 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -384,12 +384,12 @@ subprojects { developer { id.set("The maintainers of Gravitino") name.set("support") - email.set("supp...@datastrato.com") + email.set("d...@datastrato.com") } } scm { -url.set("https://github.com/datastrato/gravitino";) -connection.set("scm:git:git://github.com/datastrato/gravitino.git") +url.set("https://github.com/apache/gravitino";) +connection.set("scm:git:git://github.com/apache/gravitino.git") } } } diff --git a/clients/client-python/setup.py b/clients/client-python/setup.py index d812e593c..48e8af031 100644 --- a/clients/client-python/setup.py +++ b/client
(gravitino) branch main updated: [#3760] improvement(client-python): Add Docker env and PyGVFS Integration tests (#3876)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 4312b632b [#3760] improvement(client-python): Add Docker env and PyGVFS Integration tests (#3876) 4312b632b is described below commit 4312b632b225b5a5a2ea6e0f0bdcbdd3092a1db8 Author: xloya <982052...@qq.com> AuthorDate: Fri Jul 5 11:26:27 2024 +0800 [#3760] improvement(client-python): Add Docker env and PyGVFS Integration tests (#3876) ### What changes were proposed in this pull request? Add Hive Docker env for client-python, and add integration tests for PyGVFS + HDFS. Depends on #3528. ### Why are the changes needed? Fix: #3760 ### How was this patch tested? Add some ITs. - Co-authored-by: xiaojiebao --- .github/workflows/python-integration-test.yml | 2 +- clients/client-python/build.gradle.kts | 55 +- clients/client-python/requirements-dev.txt | 3 +- .../tests/integration/base_hadoop_env.py | 101 +++ .../tests/integration/hdfs_container.py| 158 + .../tests/integration/integration_test_env.py | 86 +++ .../tests/integration/test_gvfs_with_hdfs.py | 704 + .../tests/integration/test_simple_auth_client.py | 4 +- 8 files changed, 1098 insertions(+), 15 deletions(-) diff --git a/.github/workflows/python-integration-test.yml b/.github/workflows/python-integration-test.yml index f2e5fd4ed..a7ffacfd7 100644 --- a/.github/workflows/python-integration-test.yml +++ b/.github/workflows/python-integration-test.yml @@ -66,7 +66,7 @@ jobs: for pythonVersion in "3.8" "3.9" "3.10" "3.11" do echo "Use Python version ${pythonVersion} to test the Python client." -./gradlew -PjdkVersion=${{ matrix.java-version }} -PpythonVersion=${pythonVersion} :clients:client-python:test +./gradlew -PjdkVersion=${{ matrix.java-version }} -PpythonVersion=${pythonVersion} -PskipDockerTests=false :clients:client-python:test # Clean Gravitino database to clean test data rm -rf ./distribution/package/data done diff --git a/clients/client-python/build.gradle.kts b/clients/client-python/build.gradle.kts index 68cc897e5..2cf83c376 100644 --- a/clients/client-python/build.gradle.kts +++ b/clients/client-python/build.gradle.kts @@ -16,12 +16,15 @@ * specific language governing permissions and limitations * under the License. */ +import de.undercouch.gradle.tasks.download.Download +import de.undercouch.gradle.tasks.download.Verify import io.github.piyushroshan.python.VenvTask import java.net.HttpURLConnection import java.net.URL plugins { id("io.github.piyushroshan.python-gradle-miniforge-plugin") version "1.0.0" + id("de.undercouch.download") version "5.6.0" } pythonPlugin { @@ -148,6 +151,10 @@ fun generatePypiProjectHomePage() { } } +val hadoopVersion = "2.7.3" +val hadoopPackName = "hadoop-${hadoopVersion}.tar.gz" +val hadoopDirName = "hadoop-${hadoopVersion}" +val hadoopDownloadUrl = "https://archive.apache.org/dist/hadoop/core/hadoop-${hadoopVersion}/${hadoopPackName}"; tasks { val pipInstall by registering(VenvTask::class) { venvExec = "pip" @@ -173,6 +180,26 @@ tasks { workingDir = projectDir.resolve("./tests/integration") } + val build by registering(VenvTask::class) { +dependsOn(pylint) +venvExec = "python" +args = listOf("scripts/generate_version.py") + } + + val downloadHadoopPack by registering(Download::class) { +dependsOn(build) +onlyIfModified(true) +src(hadoopDownloadUrl) +dest(layout.buildDirectory.dir("tmp")) + } + + val verifyHadoopPack by registering(Verify::class) { +dependsOn(downloadHadoopPack) +src(layout.buildDirectory.file("tmp/${hadoopPackName}")) +algorithm("MD5") +checksum("3455bb57e4b4906bbea67b58cca78fa8") + } + val integrationTest by registering(VenvTask::class) { doFirst { gravitinoServer("start") @@ -181,11 +208,23 @@ tasks { venvExec = "coverage" args = listOf("run", "--branch", "-m", "unittest") workingDir = projectDir.resolve("./tests/integration") -environment = mapOf( - "PROJECT_VERSION" to project.version, - "GRAVITINO_HOME" to project.rootDir.path + "/distribution/package", - "START_EXTERNAL_GRAVITINO" to "true" -) +val dockerTest = project.rootProject.extra["dockerTest"] as? Boolean
(gravitino) branch main updated: [#4073] Update policies to be in line with ASF policy. (#4080)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 2133d95ac [#4073] Update policies to be in line with ASF policy. (#4080) 2133d95ac is described below commit 2133d95ac86918c9345e9a2440fc646c0fbbe0d3 Author: Justin Mclean AuthorDate: Fri Jul 5 13:05:58 2024 +1000 [#4073] Update policies to be in line with ASF policy. (#4080) ### What changes were proposed in this pull request? Update project's current policies to be in line with ASF policy. ### Why are the changes needed? To comply with ASF policy. Fix: #4073 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Built locally. --- CODE_OF_CONDUCT.md | 2 +- CONTRIBUTING.md| 2 +- GOVERNANCE.md | 30 +-- MAINTAINERS.md | 106 - SECURITY.md| 27 +- 5 files changed, 61 insertions(+), 106 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index c83f1d12e..7c9052a60 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -58,7 +58,7 @@ Examples of representing our community include using an official e-mail address, ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at . +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at . All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 028b782be..c4c83b6f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -281,7 +281,7 @@ All text files should use macOS/unix style line endings (LF) not windows style l ## Community and communication -Join the [community discourse group](https://gravitino.discourse.group) to discuss ideas and seek help. You are also encouraged to use GitHub discussions and follow Datastrato on social media to stay updated on project news. +Join the [community mailing list](https://lists.apache.org/list.html?d...@gravitino.apache.org) to discuss ideas and seek help and are also encouraged to use GitHub discussions. ## License diff --git a/GOVERNANCE.md b/GOVERNANCE.md index 5d28e44b4..37418d608 100644 --- a/GOVERNANCE.md +++ b/GOVERNANCE.md @@ -19,32 +19,4 @@ # Governance Policy -This document provides the governance policy for the project. Maintainers agree to this policy and to follow all project polices by adding their name to the [maintainers.md file](./MAINTAINERS.md). - -## 1. Roles - -This project includes the following roles. - -**1.1. Maintainers**. Maintainers oversee the development, maintenance, and updates of the project, and play a role in consensus decision-making. The addition or removal of Maintainers requires approval from the existing Maintainers. - -**1.2. Contributors**. Contributors are individuals who have made contributions to the project. - -## 2. Decisions - -**2.1. Consensus-Based Decision Making**. Decisions in projects are reached through consensus. Although unanimous agreement is preferred, it's not required. - -## 3. How We Work - -**3.1. Openness**. Anyone can participate in the project, and there should be minimal barriers to entry. - -**3.2. Balance**. The development process should balance the interests of all stakeholders. - -**3.3. Harmonization**. Good-faith efforts shall be made to resolve any conflicts. - -## 4. Trademarks - -Any names, trademarks or logos of the project may only be used if they indicate the project's source. - -## 5. Amendments - -Amendments to this governance policy may be made by approval of the Maintainers. +The Apache Gravitino project follows the standard [ASF governance model](https://www.apache.org/foundation/governance/) and [ASF policies](https://www.apache.org/foundation/policies/) and [ASF Incubator policies] (https://incubator.apache.org/policy/incubation.html). \ No newline at end of file diff --git a/MAINTAINERS.md b/MAINTAINERS.md index ceb250584..f9b693345 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -17,58 +17,11 @@ under the License. --> -This document lists the maintainers and contributors of the Project. +Note: This maintainer and contributor tables at the end of this document list the maintainers and contributors of the project before it become an ASF project and are no longer updated. -# Maintainers +# Committers -Maintainers may be added once approved by the existing maintainers (see [Governance document](GOVERNANCE.md)). By adding your nam
(gravitino) branch main updated: [#4074] Add work in progress disclaimer. (#4076)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new e17a910c9 [#4074] Add work in progress disclaimer. (#4076) e17a910c9 is described below commit e17a910c94d5462e705a0870569b1252be14bd50 Author: Justin Mclean AuthorDate: Thu Jul 4 16:20:21 2024 +1000 [#4074] Add work in progress disclaimer. (#4076) ### What changes were proposed in this pull request? Add work in progress disclaimer. ### Why are the changes needed? Required by ASF incubator policy. Fix: #4074 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? N/A --- DISCLAIMER_WIP.txt | 6 ++ build.gradle.kts | 2 ++ 2 files changed, 8 insertions(+) diff --git a/DISCLAIMER_WIP.txt b/DISCLAIMER_WIP.txt new file mode 100644 index 0..c285b1214 --- /dev/null +++ b/DISCLAIMER_WIP.txt @@ -0,0 +1,6 @@ +Apache Gravitino is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the name of Apache TLP sponsor. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision-making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that [...] + +Some of the incubating project’s releases may not be fully compliant with ASF policy and while we have documented the licensing of all code in detail, we know that currently our release would: +- Contains code that may not be compatible with the Apache License + +If you are planning to incorporate this work into your product/project, please be aware that you will need to conduct a thorough licensing review to determine the overall implications of including this work. For the current status of this project through the Apache Incubator, visit: https://incubator.apache.org/projects/Apache Podling-Name.html \ No newline at end of file diff --git a/build.gradle.kts b/build.gradle.kts index 32e13d72c..abba4ee2a 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -484,6 +484,8 @@ tasks.rat { "web/src/lib/icons/svg/**/*.svg", "**/LICENSE.*", "**/NOTICE.*", +"DISCLAIMER_WIP.txt", +"DISCLAIMER.txt", "ROADMAP.md", "clients/client-python/.pytest_cache/*", "clients/client-python/gravitino.egg-info/*",
(gravitino-playground) branch main updated: Update README (#49)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git The following commit(s) were added to refs/heads/main by this push: new 60ba5be Update README (#49) 60ba5be is described below commit 60ba5bea17d36c0679e395dfe53e097a88538f8d Author: Justin Mclean AuthorDate: Thu Jul 4 16:11:52 2024 +1000 Update README (#49) Add ASF disclaimer, trademark attribution and Apache Gravitino where needed. --- README.md | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fec2bbb..863d035 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ ## Playground introduction -The playground is a complete Gravitino Docker runtime environment with `Hive`, `HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupter`, and a `Gravitino` server. +The playground is a complete Apache Gravitino Docker runtime environment with `Hive`, `HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupter`, and a `Gravitino` server. Depending on your network and computer, startup time may take 3-5 minutes. Once the playground environment has started, you can open [http://localhost:8090](http://localhost:8090) in a browser to access the Gravitino Web UI. @@ -74,7 +74,7 @@ cd gravitino-playground ./launch-playground.sh hive|gravitino|trino|postgresql|mysql|spark|jupyter ``` -## Experiencing Gravitino with Trino SQL +## Experiencing Apache Gravitino with Trino SQL ### Using Trino CLI in Docker Container @@ -223,3 +223,9 @@ select * from catalog_hive.sales.customers union select * from catalog_iceberg.sales.customers; ``` + +## ASF Incubator disclaimer + +Apache Gravitino is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the proje [...] + +Apache®, Apache Gravitino™, Apache Hive™, Apache Iceberg™, and Apache Spark™ are either registered trademarks or trademarks of the Apache Software Foundation in the United States and/or other countries.
(gravitino-playground) branch main updated: Add ASF headers (#47)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git The following commit(s) were added to refs/heads/main by this push: new 48f1eb0 Add ASF headers (#47) 48f1eb0 is described below commit 48f1eb0d2ec72674833baf317ffe7bbeaa8eab7f Author: Justin Mclean AuthorDate: Thu Jul 4 16:08:19 2024 +1000 Add ASF headers (#47) Change file headers to ASF headers where needed --- README.md| 18 -- docker-compose.yaml | 18 -- healthcheck/gravitino-healthcheck.sh | 18 -- healthcheck/trino-healthcheck.sh | 18 -- init/gravitino/gravitino.conf| 18 -- init/gravitino/init.sh | 18 -- init/hive/init.sh| 18 -- init/jupyter/init.sh | 18 -- init/mysql/init.sql | 18 -- init/postgres/init.sql | 18 -- init/spark/init.sh | 18 -- init/spark/spark-defaults.conf | 18 -- init/trino/init.sh | 18 -- init/trino/init.sql | 18 -- launch-playground.sh | 18 -- 15 files changed, 240 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index e638802..fec2bbb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,20 @@ ## Playground introduction diff --git a/docker-compose.yaml b/docker-compose.yaml index 039d032..7be5912 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,20 @@ # -# Copyright 2023 Datastrato Pvt Ltd. -# This software is licensed under the Apache License version 2. +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. # version: '3.0' services: diff --git a/healthcheck/gravitino-healthcheck.sh b/healthcheck/gravitino-healthcheck.sh index f1f6952..4f9e35f 100755 --- a/healthcheck/gravitino-healthcheck.sh +++ b/healthcheck/gravitino-healthcheck.sh @@ -1,7 +1,21 @@ #!/bin/bash # -# Copyright 2023 Datastrato Pvt Ltd. -# This software is licensed under the Apache License version 2. +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. # set -ex diff --git a/healthcheck/trino-healthcheck.sh b/healthcheck/trino-healthcheck.sh index e4826e2..752c7bb 100755 --- a/healthcheck/trino-healthcheck.sh +++ b/healthcheck/trino-healthcheck.sh @@ -1,7 +1,21 @@ #!/bin/bash # -# Copyright 2023 Datastrato Pvt Ltd. -# This software is licensed under the Apache License version 2. +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied.
(gravitino) branch main updated (5b6d71af8 -> 828658162)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git from 5b6d71af8 [#4007] improvement: Use template to reduce Privileges duplicate codes (#4010) add 828658162 [#4066] improvment(build): Add dependabots and protected_tags (#4067) No new revisions were added by this update. Summary of changes: .asf.yaml | 4 1 file changed, 4 insertions(+)
(gravitino) branch main updated: [MINOR] fix(client-python): fix license header in new python file (#4051)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 0463ea458 [MINOR] fix(client-python): fix license header in new python file (#4051) 0463ea458 is described below commit 0463ea458b78a5a9538962ad1a075f2cc5fcbb85 Author: Shaofeng Shi AuthorDate: Wed Jul 3 16:30:08 2024 +0800 [MINOR] fix(client-python): fix license header in new python file (#4051) ### What changes were proposed in this pull request? fix license header in new python file ### Why are the changes needed? fix license header in new python file --- clients/client-python/gravitino/api/__init__.py| 1 - clients/client-python/gravitino/api/audit.py | 1 - clients/client-python/gravitino/api/auditable.py | 1 - clients/client-python/gravitino/api/catalog.py | 1 - clients/client-python/gravitino/api/catalog_change.py | 1 - clients/client-python/gravitino/api/fileset.py | 1 - clients/client-python/gravitino/api/fileset_change.py | 1 - clients/client-python/gravitino/api/metalake.py| 1 - clients/client-python/gravitino/api/metalake_change.py | 1 - clients/client-python/gravitino/api/schema.py | 1 - clients/client-python/gravitino/api/schema_change.py | 1 - .../client-python/gravitino/api/supports_schemas.py| 1 - clients/client-python/gravitino/auth/__init__.py | 1 - clients/client-python/gravitino/auth/auth_constants.py | 1 - .../client-python/gravitino/auth/auth_data_provider.py | 1 - .../gravitino/auth/simple_auth_provider.py | 1 - clients/client-python/gravitino/catalog/__init__.py| 1 - .../gravitino/catalog/base_schema_catalog.py | 1 - .../client-python/gravitino/catalog/fileset_catalog.py | 1 - clients/client-python/gravitino/client/__init__.py | 1 - .../gravitino/client/gravitino_admin_client.py | 1 - .../client-python/gravitino/client/gravitino_client.py | 1 - .../gravitino/client/gravitino_client_base.py | 1 - .../gravitino/client/gravitino_metalake.py | 1 - .../gravitino/client/gravitino_version.py | 1 - clients/client-python/gravitino/constants/__init__.py | 1 - clients/client-python/gravitino/constants/doc.py | 1 - clients/client-python/gravitino/constants/root.py | 1 - clients/client-python/gravitino/constants/timeout.py | 1 - clients/client-python/gravitino/constants/version.py | 1 - clients/client-python/gravitino/dto/__init__.py| 1 - clients/client-python/gravitino/dto/audit_dto.py | 1 - clients/client-python/gravitino/dto/catalog_dto.py | 1 - clients/client-python/gravitino/dto/dto_converters.py | 1 - clients/client-python/gravitino/dto/fileset_dto.py | 1 - clients/client-python/gravitino/dto/metalake_dto.py| 1 - .../client-python/gravitino/dto/requests/__init__.py | 1 - .../gravitino/dto/requests/catalog_create_request.py | 1 - .../gravitino/dto/requests/catalog_update_request.py | 1 - .../gravitino/dto/requests/catalog_updates_request.py | 1 - .../gravitino/dto/requests/fileset_create_request.py | 1 - .../gravitino/dto/requests/fileset_update_request.py | 1 - .../gravitino/dto/requests/fileset_updates_request.py | 1 - .../gravitino/dto/requests/metalake_create_request.py | 1 - .../gravitino/dto/requests/metalake_update_request.py | 1 - .../gravitino/dto/requests/metalake_updates_request.py | 1 - .../gravitino/dto/requests/schema_create_request.py| 1 - .../gravitino/dto/requests/schema_update_request.py| 1 - .../gravitino/dto/requests/schema_updates_request.py | 1 - .../client-python/gravitino/dto/responses/__init__.py | 1 - .../gravitino/dto/responses/base_response.py | 1 - .../gravitino/dto/responses/catalog_list_response.py | 1 - .../gravitino/dto/responses/catalog_response.py| 1 - .../gravitino/dto/responses/drop_response.py | 1 - .../gravitino/dto/responses/entity_list_response.py| 1 - .../gravitino/dto/responses/fileset_response.py| 1 - .../gravitino/dto/responses/metalake_list_response.py | 1 - .../gravitino/dto/responses/metalake_response.py | 1 - .../gravitino/dto/responses/schema_response.py | 1 - .../gravitino/dto/responses/version_response.py| 1 - clients/client-python/gravitino/dto/schema_dto.py | 1 - clients/client-python/gravitino/dto/version_dto.py | 1 - clients/client-python/gravitino/exceptions/__init__.py | 1 - .../exceptions/gravitino_runtime_exception.py | 1 - .../exceptions/illegal_name_identifier_exception.py| 1 - .../exceptions/illegal_namespace_exception.py | 1 - .../gravitino/exceptions/no_such_metalake_exception.py | 1 - .../gravitino
(gravitino) branch main updated: [#4048] Update README with incubator disclaimer (#4049)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new f387a441c [#4048] Update README with incubator disclaimer (#4049) f387a441c is described below commit f387a441c8408ae41dabe0e5683fe16cde87a4ef Author: Justin Mclean AuthorDate: Wed Jul 3 18:00:28 2024 +1000 [#4048] Update README with incubator disclaimer (#4049) ### What changes were proposed in this pull request? Update README with ASF Incubator disclaimer and updated links and used Apache Gravitino where needed. ### Why are the changes needed? To comply with ASF policy Fix: #4048 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A - only changes a text file --- README.md | 34 +++--- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 44a8eba49..b7e3a9cc2 100644 --- a/README.md +++ b/README.md @@ -17,20 +17,20 @@ under the License. --> -# Gravitino - -[![GitHub Actions Build](https://github.com/datastrato/gravitino/actions/workflows/build.yml/badge.svg)](https://github.com/datastrato/gravitino/actions/workflows/build.yml) -[![GitHub Actions Integration Test](https://github.com/datastrato/gravitino/actions/workflows/integration-test.yml/badge.svg)](https://github.com/datastrato/gravitino/actions/workflows/integration-test.yml) -[![License](https://img.shields.io/github/license/datastrato/gravitino)](https://github.com/datastrato/gravitino/blob/main/LICENSE) -[![Contributors](https://img.shields.io/github/contributors/datastrato/gravitino)](https://github.com/datastrato/gravitino/graphs/contributors) -[![Release](https://img.shields.io/github/v/release/datastrato/gravitino)](https://github.com/datastrato/gravitino/releases) -[![Open Issues](https://img.shields.io/github/issues-raw/datastrato/gravitino)](https://github.com/datastrato/gravitino/issues) -[![Last Committed](https://img.shields.io/github/last-commit/datastrato/gravitino)](https://github.com/datastrato/gravitino/commits/main/) +# Apache Gravitino (incubating) + +[![GitHub Actions Build](https://github.com/apache/gravitino/actions/workflows/build.yml/badge.svg)](https://github.com/apache/gravitino/actions/workflows/build.yml) +[![GitHub Actions Integration Test](https://github.com/apache/gravitino/actions/workflows/integration-test.yml/badge.svg)](https://github.com/apache/gravitino/actions/workflows/integration-test.yml) +[![License](https://img.shields.io/github/license/apache/gravitino)](https://github.com/apache/gravitino/blob/main/LICENSE) +[![Contributors](https://img.shields.io/github/contributors/apache/gravitino)](https://github.com/apache/gravitino/graphs/contributors) +[![Release](https://img.shields.io/github/v/release/apache/gravitino)](https://github.com/apache/gravitino/releases) +[![Open Issues](https://img.shields.io/github/issues-raw/apache/gravitino)](https://github.com/apache/gravitino/issues) +[![Last Committed](https://img.shields.io/github/last-commit/apache/gravitino)](https://github.com/apache/gravitino/commits/main/) [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8358/badge)](https://www.bestpractices.dev/projects/8358) ## Introduction -Gravitino is a high-performance, geo-distributed, and federated metadata lake. It manages the metadata directly in different sources, types, and regions. It also provides users with unified metadata access for data and AI assets. +Apache Gravitino is a high-performance, geo-distributed, and federated metadata lake. It manages the metadata directly in different sources, types, and regions. It also provides users with unified metadata access for data and AI assets. ![Gravitino Architecture](docs/assets/gravitino-architecture.png) @@ -41,7 +41,7 @@ Gravitino aims to provide several key features: * Security in one place, centralizing the security for different sources. * Built-in data management and data access management. -## Contributing to Gravitino +## Contributing to Apache Gravitino Gravitino is open source software available under the Apache 2.0 license. For information on how to contribute to Gravitino please see the [Contribution guidelines](CONTRIBUTING.md). @@ -49,7 +49,7 @@ Gravitino is open source software available under the Apache 2.0 license. For in You can find the latest Gravitino documentation in the [doc folder](docs). This README file only contains basic setup instructions. -## Building Gravitino +## Building Apache Gravitino You can build Gravitino using Gradle. Currently you can build Gravitino on Linux and macOS, Windows isn't supported. @@ -81,7 +81,7 @@ For the details of building and testing Gravitino, please see [How to build Grav ## Qui
(gravitino) branch main updated: [#4040] Update Rat check to ignore Datastrato headers (#4044)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 373f5 [#4040] Update Rat check to ignore Datastrato headers (#4044) 373f5 is described below commit 373f5613d79db4b62373984ae2e3d1794d1f Author: Justin Mclean AuthorDate: Wed Jul 3 17:42:05 2024 +1000 [#4040] Update Rat check to ignore Datastrato headers (#4044) ### What changes were proposed in this pull request? No longer accept Datastrato headers. ### Why are the changes needed? See above. Fix: #4040 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Note that until all file headers have changed the Rat test will not pass. I made it a separate PR for easy review. --- build.gradle.kts| 21 - .../client-python/tests/integration/test_catalog.py | 18 -- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 5c956e7ed..32e13d72c 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -1,6 +1,20 @@ /* - * Copyright 2023 Datastrato Pvt Ltd. - * This software is licensed under the Apache License version 2. + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ import com.github.gradle.node.NodeExtension import com.github.gradle.node.NodePlugin @@ -440,9 +454,6 @@ subprojects { } tasks.rat { - substringMatcher("DS", "Datastrato", "Copyright 2023 Datastrato Pvt Ltd.") - substringMatcher("DS", "Datastrato", "Copyright 2024 Datastrato Pvt Ltd.") - approvedLicense("Datastrato") approvedLicense("Apache License Version 2.0") // Set input directory to that of the root project instead of the CWD. This diff --git a/clients/client-python/tests/integration/test_catalog.py b/clients/client-python/tests/integration/test_catalog.py index 5b08edc23..1535e709c 100644 --- a/clients/client-python/tests/integration/test_catalog.py +++ b/clients/client-python/tests/integration/test_catalog.py @@ -1,6 +1,20 @@ """ -Copyright 2024 Datastrato Pvt Ltd. -This software is licensed under the Apache License version 2. +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. """ import logging
(gravitino) branch main updated: [MINOR] [#4037] Upgrade twine version to fix python client deploy task (#4038)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new bee0bb007 [MINOR] [#4037] Upgrade twine version to fix python client deploy task (#4038) bee0bb007 is described below commit bee0bb00763d9baa781492ce141ffcb3647a88b9 Author: xloya <982052...@qq.com> AuthorDate: Wed Jul 3 15:08:39 2024 +0800 [MINOR] [#4037] Upgrade twine version to fix python client deploy task (#4038) ### What changes were proposed in this pull request? Currently twine 5.0.0 will cause Python client deploy task failed, upgrade twine version to fix this problem. ### Why are the changes needed? Fix: #4037 Co-authored-by: xiaojiebao --- clients/client-python/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index 4e1d6b4a8..77387c01c 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -19,7 +19,7 @@ requests==2.32.2 dataclasses-json==0.6.6 pylint==3.2.2 black==24.4.2 -twine==5.1.0 +twine==5.1.1 coverage==7.5.1 pandas==2.0.3 pyarrow==15.0.2
(gravitino) branch main updated: [#4039] Update NOTICE files to ASF norms (#4043)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 30b11ce4a [#4039] Update NOTICE files to ASF norms (#4043) 30b11ce4a is described below commit 30b11ce4a23d2acb8ab60e5db936034d9b10ff29 Author: Justin Mclean AuthorDate: Wed Jul 3 16:18:28 2024 +1000 [#4039] Update NOTICE files to ASF norms (#4043) ### What changes were proposed in this pull request? Update NOTICE files to comply with ASF policy. ### Why are the changes needed? To comply with ASF policy. Fix: #4039 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Built and run non-integration tests locally. --- NOTICE | 7 +-- NOTICE.bin | 8 web/NOTICE | 7 +-- web/NOTICE.bin | 7 +-- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/NOTICE b/NOTICE index 6900d05a9..3f221e49c 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,8 @@ -Gravitino -Copyright 2023-2024 Datastrato Pvt Ltd +Apache Gravitino (incubating) +Copyright 2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). This product includes software developed at Datastrato (https://datastrato.ai). diff --git a/NOTICE.bin b/NOTICE.bin index 939c7f67a..5c63b8189 100644 --- a/NOTICE.bin +++ b/NOTICE.bin @@ -1,11 +1,11 @@ -Gravitino -Copyright 2023-2024 Datastrato Pvt Ltd +Apache Gravitino (incubating) +Copyright 2024 The Apache Software Foundation This product includes software developed at -Datastrato (https://datastrato.ai). +The Apache Software Foundation (http://www.apache.org/). This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). +Datastrato (https://datastrato.ai). The Web UI also has a NOTICE file please see web/NOTICE for it's contents. diff --git a/web/NOTICE b/web/NOTICE index 4f96647d6..5ddaf26a8 100644 --- a/web/NOTICE +++ b/web/NOTICE @@ -1,5 +1,8 @@ -Gravitino -Copyright 2023 Datastrato Pvt Ltd +Apache Gravitino (incubating) +Copyright 2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). This product includes software developed at Datastrato (https://datastrato.ai). diff --git a/web/NOTICE.bin b/web/NOTICE.bin index 4f96647d6..5ddaf26a8 100644 --- a/web/NOTICE.bin +++ b/web/NOTICE.bin @@ -1,5 +1,8 @@ -Gravitino -Copyright 2023 Datastrato Pvt Ltd +Apache Gravitino (incubating) +Copyright 2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). This product includes software developed at Datastrato (https://datastrato.ai).
(gravitino) branch main updated (0e59cc854 -> 33ca5812c)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git from 0e59cc854 [#4031] build: Add the asf yaml for Gravitino (#4033) add 33ca5812c [#4031][followup] build: Revert the partial protected branches (#4047) No new revisions were added by this update. Summary of changes: .asf.yaml | 6 -- 1 file changed, 6 deletions(-)
(gravitino) branch main updated: [#4031] build: Add the asf yaml for Gravitino (#4033)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 0e59cc854 [#4031] build: Add the asf yaml for Gravitino (#4033) 0e59cc854 is described below commit 0e59cc854a043d29b19a5622cf4f16691ab18a6d Author: roryqi AuthorDate: Wed Jul 3 12:05:45 2024 +0800 [#4031] build: Add the asf yaml for Gravitino (#4033) ### What changes were proposed in this pull request? Add the asf yaml for Gravitino ### Why are the changes needed? Fix: #4031 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? This pull request needs to be verified after mering. - Co-authored-by: Jerry Shao --- .asf.yaml | 61 + 1 file changed, 61 insertions(+) diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 0..44c9f9055 --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,61 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +github: + description: World's most powerful open data catalog for building a high-performance, geo-distributed and federated metadata lake. + homepage: https://datastrato.ai/docs/ + labels: +- metadata +- data-catalog +- datalake +- stratosphere +- federated-query +- lakehouse +- model-catalog +- metalake +- skycomputing +- ai-catalog +- opendatacatalog + features: +# Enable wiki for documentation +wiki: false +# Enable issues management +issues: true +# Enable projects for project management boards +projects: true + enabled_merge_buttons: +squash: true +merge: false +rebase: true + protected_branches: +main: + required_status_checks: +strict: true + required_pull_request_reviews: +dismiss_stale_reviews: true +required_approving_review_count: 1 +branch-*: + required_status_checks: +strict: true + required_pull_request_reviews: +dismiss_stale_reviews: true +required_approving_review_count: 1 + +notifications: + commits: commits@gravitino.apache.org + issues: commits@gravitino.apache.org + pullrequests: commits@gravitino.apache.org
(gravitino) branch main updated: [#4032] Fix(CI): Remove some CI actions to make CI back to normal (#4030)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino.git The following commit(s) were added to refs/heads/main by this push: new 6180bc650 [#4032] Fix(CI): Remove some CI actions to make CI back to normal (#4030) 6180bc650 is described below commit 6180bc650aec6847d625a8551b9a54c0a913eb28 Author: Qi Yu AuthorDate: Wed Jul 3 11:37:35 2024 +0800 [#4032] Fix(CI): Remove some CI actions to make CI back to normal (#4030) ### What changes were proposed in this pull request? Remove `csexton/debugger-action@master` action in the github CI, if someone wants this functionality, they can add it in his fork repo and do the debug work the personal account. ### Why are the changes needed? This action is not allowed in the Apache project. Fixed: #4032 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? Test in the CI --- .github/workflows/backend-integration-test.yml | 4 .github/workflows/build.yml| 18 -- .github/workflows/cron-integration-test.yml| 4 .github/workflows/flink-integration-test.yml | 4 .github/workflows/frontend-integration-test.yml| 4 .github/workflows/spark-integration-test.yml | 4 docs/how-to-test.md| 1 - .../integration/test/web/ui/MetalakePageTest.java | 5 ++--- 8 files changed, 2 insertions(+), 42 deletions(-) diff --git a/.github/workflows/backend-integration-test.yml b/.github/workflows/backend-integration-test.yml index aa564ac8d..da534f467 100644 --- a/.github/workflows/backend-integration-test.yml +++ b/.github/workflows/backend-integration-test.yml @@ -85,10 +85,6 @@ jobs: run: | ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} - - name: Setup debug Github Action -if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }} -uses: csexton/debugger-action@master - - name: Free up disk space run: | dev/ci/util_free_space.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e6a22cf35..cbb9eaffb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,21 +107,3 @@ jobs: build/reports catalogs/**/*.log catalogs/**/*.tar - - - name: Jacoco Report to PR -id: jacoco -uses: madrapps/jacoco-report@v1.6.1 -with: - paths: ${{ github.workspace }}/**/build/reports/jacoco/test/jacocoTestReport.xml - token: ${{ secrets.GITHUB_TOKEN }} - min-coverage-overall: 40 - min-coverage-changed-files: 60 - title: 'Code Coverage Report' - debug-mode: false - update-comment: true - pass-emoji: ':green_circle:' - fail-emoji: ':red_circle:' - - name: Get the Coverage info -run: | - echo "Total coverage ${{ steps.jacoco.outputs.coverage-overall }}" - echo "Changed Files coverage ${{ steps.jacoco.outputs.coverage-changed-files }}" diff --git a/.github/workflows/cron-integration-test.yml b/.github/workflows/cron-integration-test.yml index 4a7c498ee..195e1b6e9 100644 --- a/.github/workflows/cron-integration-test.yml +++ b/.github/workflows/cron-integration-test.yml @@ -76,10 +76,6 @@ jobs: run: | ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} - - name: Setup debug Github Action -if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }} -uses: csexton/debugger-action@master - - name: Free up disk space run: | dev/ci/util_free_space.sh diff --git a/.github/workflows/flink-integration-test.yml b/.github/workflows/flink-integration-test.yml index 54141c842..ba7648c72 100644 --- a/.github/workflows/flink-integration-test.yml +++ b/.github/workflows/flink-integration-test.yml @@ -79,10 +79,6 @@ jobs: run: | ./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} - - name: Setup debug Github Action -if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }} -uses: csexton/debugger-action@master - - name: Free up disk space run: | dev/ci/util_free_space.sh diff --git a/.github/workflows/frontend-integration-test.yml b/.github/workflows/frontend-integration-test.yml index e8925afb7..7b0315e46 100644 --- a/.github/workflows/frontend-integration-test.yml +++ b/.github/workflows/frontend-integration-test.yml @@ -82,10 +82,6 @@ jobs: run:
[incubator-uniffle] branch master updated: Rename DISCLAIMER to DISCLAIMER-WIP (#258)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new 13f61cd3 Rename DISCLAIMER to DISCLAIMER-WIP (#258) 13f61cd3 is described below commit 13f61cd35b130928ddb2d1de8bf0605ed005f741 Author: roryqi AuthorDate: Tue Oct 11 09:37:34 2022 +0800 Rename DISCLAIMER to DISCLAIMER-WIP (#258) Co-authored-by: roryqi --- DISCLAIMER | 11 --- DISCLAIMER-WIP | 21 + pom.xml| 2 +- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/DISCLAIMER b/DISCLAIMER deleted file mode 100644 index 3e401182.. --- a/DISCLAIMER +++ /dev/null @@ -1,11 +0,0 @@ -Apache Uniffle (incubating) is an effort undergoing incubation at The Apache -Software Foundation (ASF), sponsored by the Apache Incubator PMC. - -Incubation is required of all newly accepted projects until a further review -indicates that the infrastructure, communications, and decision-making process -have stabilized in a manner consistent with other successful ASF projects. - -While incubation status is not necessarily a reflection of the completeness -or stability of the code, it does indicate that the project has yet to be -fully endorsed by the ASF. - diff --git a/DISCLAIMER-WIP b/DISCLAIMER-WIP new file mode 100644 index ..23df370b --- /dev/null +++ b/DISCLAIMER-WIP @@ -0,0 +1,21 @@ +Apache Uniffle (incubating) is an effort undergoing incubation at The Apache +Software Foundation (ASF), sponsored by the Apache Incubator PMC. + +Incubation is required of all newly accepted projects until a further review +indicates that the infrastructure, communications, and decision-making process +have stabilized in a manner consistent with other successful ASF projects. + +While incubation status is not necessarily a reflection of the completeness +or stability of the code, it does indicate that the project has yet to be +fully endorsed by the ASF. + +Some of the incubating project’s releases may not be fully compliant with ASF policy. +For example, releases may have incomplete or un-reviewed licensing conditions. +What follows is a list of issues the project is currently aware of (this list is likely to be incomplete): + +1. Releases may have incomplete licensing conditions + +If you are planning to incorporate this work into your product/project,please be aware that +you will need to conduct a thorough licensing review to determine the overall implications of +including this work.For the current status of this project through the Apache Incubator, +visit: https://incubator.apache.org/projects/uniffle.html diff --git a/pom.xml b/pom.xml index b8e57475..c18d1be7 100644 --- a/pom.xml +++ b/pom.xml @@ -925,7 +925,7 @@ LICENSE -DISCLAIMER +DISCLAIMER-WIP NOTICE **/target/** src/test/resources/empty
[incubator-uniffle] branch master updated: Change url of total lines badge in README (#222)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new 8be68ab4 Change url of total lines badge in README (#222) 8be68ab4 is described below commit 8be68ab42de921e36073024c9bd2f08ae4814b23 Author: Kaijie Chen AuthorDate: Thu Sep 15 18:49:06 2022 +0800 Change url of total lines badge in README (#222) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8794e016..4ab67422 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Currently it supports [Apache Spark](https://spark.apache.org) and [MapReduce](h [![Build](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml/badge.svg?branch=master&event=push)](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml) [![Codecov](https://codecov.io/gh/apache/incubator-uniffle/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/incubator-uniffle) -[![Total Lines](https://img.shields.io/tokei/lines/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle) +[![](https://sloc.xyz/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle) [![Code Quality](https://img.shields.io/lgtm/grade/java/github/apache/incubator-uniffle?label=code%20quality)](https://lgtm.com/projects/g/apache/incubator-uniffle/) [![License](https://img.shields.io/github/license/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle/blob/master/LICENSE)
[incubator-uniffle] branch master updated: Add more badges in README (#219)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new 3b210c0c Add more badges in README (#219) 3b210c0c is described below commit 3b210c0cf2ea5e9cd23ce759a267c6c5b3eb302d Author: Kaijie Chen AuthorDate: Thu Sep 15 15:51:43 2022 +0800 Add more badges in README (#219) --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index e3521acc..8794e016 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,9 @@ Currently it supports [Apache Spark](https://spark.apache.org) and [MapReduce](h [![Build](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml/badge.svg?branch=master&event=push)](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml) [![Codecov](https://codecov.io/gh/apache/incubator-uniffle/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/incubator-uniffle) +[![Total Lines](https://img.shields.io/tokei/lines/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle) +[![Code Quality](https://img.shields.io/lgtm/grade/java/github/apache/incubator-uniffle?label=code%20quality)](https://lgtm.com/projects/g/apache/incubator-uniffle/) +[![License](https://img.shields.io/github/license/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle/blob/master/LICENSE) ## Architecture ![Rss Architecture](docs/asset/rss_architecture.png)
[incubator-uniffle] branch master updated: Add Notice and DISCLAMER file (#215)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new dcdf8ae5 Add Notice and DISCLAMER file (#215) dcdf8ae5 is described below commit dcdf8ae55a774adbd5126919868b4fa5376f99ab Author: frankliee AuthorDate: Wed Sep 14 15:25:21 2022 +0800 Add Notice and DISCLAMER file (#215) --- DISCLAIMER | 2 +- NOTICE | 7 +++ pom.xml| 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/DISCLAIMER b/DISCLAIMER index 805a8e84..3e401182 100644 --- a/DISCLAIMER +++ b/DISCLAIMER @@ -1,4 +1,4 @@ -Apache Uniffle (Incubating) is an effort undergoing incubation at The Apache +Apache Uniffle (incubating) is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC. Incubation is required of all newly accepted projects until a further review diff --git a/NOTICE b/NOTICE new file mode 100644 index ..2cfb9fb7 --- /dev/null +++ b/NOTICE @@ -0,0 +1,7 @@ +Apache Uniffle (incubating) +Copyright 2022 and onwards The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (https://www.apache.org/). + +The initial codebase was donated to the ASF by Tencent, copyright 2020-2022. diff --git a/pom.xml b/pom.xml index 7db56f06..327d614f 100644 --- a/pom.xml +++ b/pom.xml @@ -926,6 +926,7 @@ LICENSE DISCLAIMER +NOTICE **/target/** src/test/resources/empty **/dependency-reduced-pom.xml
[incubator-uniffle-website] branch master updated: Update Slack invitation link (#4)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git The following commit(s) were added to refs/heads/master by this push: new e2fb0e5 Update Slack invitation link (#4) e2fb0e5 is described below commit e2fb0e5f1ca9c6d42e4b6b862bae2aed3bebd714 Author: Kaijie Chen AuthorDate: Thu Sep 8 19:28:40 2022 +0800 Update Slack invitation link (#4) --- docusaurus.config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.js b/docusaurus.config.js index a1aaf70..66b86e5 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -117,7 +117,7 @@ const config = { items: [ { label: 'Slack', -href: 'https://github.com/apache/incubator-uniffle/issues', +href: 'https://join.slack.com/t/the-asf/shared_invite/zt-1fm9561yr-uzTpjqg3jf5nxSJV5AE3KQ', }, { label: 'Issue Tracker',
[incubator-uniffle-website] branch master created (now d7af0cf)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git at d7af0cf first commit This branch includes the following new commits: new d7af0cf first commit The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[incubator-uniffle-website] 01/01: first commit
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git commit d7af0cfa6d7a831188b8f6f79c7626ede2e600d9 Author: Jerry Shao AuthorDate: Fri Aug 26 11:42:04 2022 +0800 first commit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md new file mode 100644 index 000..ecb1a1c --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# incubator-uniffle-website
[incubator-uniffle] branch master updated: [TYPO] Fix misspelled word "integration" (#34)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new 49f1a16 [TYPO] Fix misspelled word "integration" (#34) 49f1a16 is described below commit 49f1a16a3bcf33429307b6326d77f782ec9eb79d Author: Kaijie Chen AuthorDate: Wed Jul 6 10:26:10 2022 +0800 [TYPO] Fix misspelled word "integration" (#34) --- integration-test/common/pom.xml | 2 +- integration-test/mr/pom.xml | 2 +- integration-test/spark-common/pom.xml | 2 +- integration-test/spark2/pom.xml | 2 +- integration-test/spark3/pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml index deeb403..c4dc048 100644 --- a/integration-test/common/pom.xml +++ b/integration-test/common/pom.xml @@ -31,7 +31,7 @@ rss-integration-common-test 0.6.0-snapshot jar -Apache Uniffle Intergration Test (Common) +Apache Uniffle Integration Test (Common) diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml index cc9e9c1..2199759 100644 --- a/integration-test/mr/pom.xml +++ b/integration-test/mr/pom.xml @@ -30,7 +30,7 @@ rss-integration-mr-test 0.6.0-snapshot jar -Apache Uniffle Intergration Test (MapReduce) +Apache Uniffle Integration Test (MapReduce) diff --git a/integration-test/spark-common/pom.xml b/integration-test/spark-common/pom.xml index 3a7b56a..42890d3 100644 --- a/integration-test/spark-common/pom.xml +++ b/integration-test/spark-common/pom.xml @@ -31,7 +31,7 @@ rss-integration-spark-common-test 0.6.0-snapshot jar - Apache Uniffle Intergration Test (Spark Common) + Apache Uniffle Integration Test (Spark Common) diff --git a/integration-test/spark2/pom.xml b/integration-test/spark2/pom.xml index 08557d8..c384fda 100644 --- a/integration-test/spark2/pom.xml +++ b/integration-test/spark2/pom.xml @@ -31,7 +31,7 @@ rss-integration-spark2-test 0.6.0-snapshot jar - Apache Uniffle Intergration Test (Spark 2) + Apache Uniffle Integration Test (Spark 2) diff --git a/integration-test/spark3/pom.xml b/integration-test/spark3/pom.xml index c166979..0075522 100644 --- a/integration-test/spark3/pom.xml +++ b/integration-test/spark3/pom.xml @@ -31,7 +31,7 @@ rss-integration-spark3-test 0.6.0-snapshot jar -Apache Uniffle Intergration Test (Spark 3) +Apache Uniffle Integration Test (Spark 3)
[incubator-uniffle] branch master updated: Improve asf.yaml to reduce the notifications (#25)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new 0d7dfdb Improve asf.yaml to reduce the notifications (#25) 0d7dfdb is described below commit 0d7dfdbcc382aee4bdfa6924afd2bfe56d0a0bf5 Author: Saisai Shao AuthorDate: Tue Jul 5 15:17:18 2022 +0800 Improve asf.yaml to reduce the notifications (#25) --- .asf.yaml | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index bff9c79..5137082 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -23,6 +23,7 @@ github: - mapreduce - shuffle - remote-shuffle-service +- rss features: # Enable wiki for documentation wiki: true @@ -43,6 +44,6 @@ github: required_approving_review_count: 1 notifications: - commits: notificati...@uniffle.apache.org - issues: d...@uniffle.apache.org - pullrequests: notificati...@uniffle.apache.org +commits: commits@uniffle.apache.org +issues: d...@uniffle.apache.org +pullrequests: iss...@uniffle.apache.org
[incubator-uniffle] branch master updated: Add asf yaml
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git The following commit(s) were added to refs/heads/master by this push: new e5dd0ea Add asf yaml e5dd0ea is described below commit e5dd0eaf1651680420f081b3fc456f1c7be3d316 Author: Jerry Shao AuthorDate: Fri Jul 1 15:56:56 2022 +0800 Add asf yaml --- .asf.yaml | 48 1 file changed, 48 insertions(+) diff --git a/.asf.yaml b/.asf.yaml new file mode 100644 index 000..bff9c79 --- /dev/null +++ b/.asf.yaml @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +github: + description: Uniffle is a high performance, general purpose Remote Shuffle Service. + homepage: https://uniffle.apache.org/ + labels: +- spark +- mapreduce +- shuffle +- remote-shuffle-service + features: +# Enable wiki for documentation +wiki: true +# Enable issues management +issues: true +# Enable projects for project management boards +projects: true + enabled_merge_buttons: +squash: true +merge: false +rebase: false + protected_branches: +master: + required_status_checks: +strict: true + required_pull_request_reviews: +dismiss_stale_reviews: true +required_approving_review_count: 1 + + notifications: + commits: notificati...@uniffle.apache.org + issues: d...@uniffle.apache.org + pullrequests: notificati...@uniffle.apache.org
[incubator-uniffle] branch branch-0.3.0 created (now 1d69058)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch branch-0.3.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 1d69058 [Bugfix] Fix uncorrect index file (#92) (#93) This branch includes the following new commits: new 1d69058 [Bugfix] Fix uncorrect index file (#92) (#93) The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[incubator-uniffle] 01/01: [Bugfix] Fix uncorrect index file (#92) (#93)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.3.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 1d69058c32f8f943e1694cfe182fb19d55943a11 Author: roryqi AuthorDate: Tue Mar 8 17:21:55 2022 +0800 [Bugfix] Fix uncorrect index file (#92) (#93) backport 0.3.0 ### What changes were proposed in this pull request? Modify the method that calculate the offset in the index file. ### Why are the changes needed? If we don't have this patch, we run 10TB tpcds, query24a will fail. https://user-images.githubusercontent.com/8159038/157178756-d8a39b3f-0ea6-4864-ac68-ee382a88bb0f.png";> When we write many data to dataOutputStream, dataOutputStream.size() won't increase again. dataOutputStream.size() will always be Integer.MAX_VALUE. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new uts. Co-authored-by: roryqi --- .../rss/storage/handler/impl/LocalFileWriter.java | 6 ++ .../rss/storage/handler/impl/LocalFileHandlerTest.java | 17 + 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java index 10185a4..609db7e 100644 --- a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java +++ b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java @@ -30,21 +30,19 @@ public class LocalFileWriter implements Closeable { private DataOutputStream dataOutputStream; private FileOutputStream fileOutputStream; - private long initSize; private long nextOffset; public LocalFileWriter(File file) throws IOException { fileOutputStream = new FileOutputStream(file, true); // init fsDataOutputStream dataOutputStream = new DataOutputStream(fileOutputStream); -initSize = file.length(); -nextOffset = initSize; +nextOffset = file.length(); } public void writeData(byte[] data) throws IOException { if (data != null && data.length > 0) { dataOutputStream.write(data); - nextOffset = initSize + dataOutputStream.size(); + nextOffset = nextOffset + data.length; } } diff --git a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java index 32b7ace..846ab20 100644 --- a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java +++ b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java @@ -39,6 +39,7 @@ import com.tencent.rss.storage.handler.api.ServerReadHandler; import com.tencent.rss.storage.handler.api.ShuffleWriteHandler; import com.tencent.rss.storage.util.ShuffleStorageUtils; import java.io.File; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Random; @@ -53,6 +54,7 @@ public class LocalFileHandlerTest { @Test public void writeTest() throws Exception { File tmpDir = Files.createTempDir(); +tmpDir.deleteOnExit(); File dataDir1 = new File(tmpDir, "data1"); File dataDir2 = new File(tmpDir, "data2"); String[] basePaths = new String[]{dataDir1.getAbsolutePath(), @@ -111,6 +113,21 @@ public class LocalFileHandlerTest { } } + @Test + public void writeBigDataTest() throws IOException { +File tmpDir = Files.createTempDir(); +tmpDir.deleteOnExit(); +File writeFile = new File(tmpDir, "writetest"); +LocalFileWriter writer = new LocalFileWriter(writeFile); +int size = Integer.MAX_VALUE / 100; +byte[] data = new byte[size]; +for (int i = 0; i < 200; i++) { + writer.writeData(data); +} +long totalSize = 200L * size; +assertEquals(writer.nextOffset(), totalSize); + } + private void writeTestData( ShuffleWriteHandler writeHandler,
[incubator-uniffle] branch branch-0.4.0 created (now 6a4295a)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch branch-0.4.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 6a4295a upgrade to 0.4.1 No new revisions were added by this update.
[incubator-uniffle] 04/04: [Bugfix] [0.5] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.5.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 55cb16fb6b9f494f657068721ca81c74043a4bb9 Author: roryqi AuthorDate: Thu Jun 23 10:52:59 2022 +0800 [Bugfix] [0.5] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196) backport 0.5 ### What changes were proposed in this pull request? We should aquire the storageType from extraConf. ### Why are the changes needed? If we don't have this patch, MR don't work when we use dynamic conf and MEMORY_LOCALE_HDFS storageType. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- .../main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java index 7511104..976b03c 100644 --- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java +++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java @@ -180,7 +180,7 @@ public class RssMRAppMaster extends MRAppMaster { RssMRUtils.applyDynamicClientConf(extraConf, clusterClientConf); } - String storageType = conf.get(RssMRConfig.RSS_STORAGE_TYPE); + String storageType = RssMRUtils.getString(extraConf, conf, RssMRConfig.RSS_STORAGE_TYPE); RemoteStorageInfo defaultRemoteStorage = new RemoteStorageInfo(conf.get(RssMRConfig.RSS_REMOTE_STORAGE_PATH, "")); RemoteStorageInfo remoteStorage = ClientUtils.fetchRemoteStorage(
[incubator-uniffle] 01/04: [Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.5.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 59856687f8e17b20f206815cbcf31bbbaacf4292 Author: roryqi AuthorDate: Wed Jun 22 14:50:40 2022 +0800 [Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188) backport 0.5.0 ### What changes were proposed in this pull request? We need to judge heartbeatExecutorService whether is null when we will stop it. ### Why are the changes needed? #177 pr introduce this problem, when we run Spark applications on our cluster, the executor will throw NPE when method `stop` is called. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java index f1f2a36..2970489 100644 --- a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java +++ b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java @@ -370,7 +370,9 @@ public class RssShuffleManager implements ShuffleManager { @Override public void stop() { -heartBeatScheduledExecutorService.shutdownNow(); +if (heartBeatScheduledExecutorService != null) { + heartBeatScheduledExecutorService.shutdownNow(); +} threadPoolExecutor.shutdownNow(); shuffleWriteClient.close(); }
[incubator-uniffle] 12/17: [Improvement] Move detailed client configuration to individual doc (#201)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 2c1c554bb9a47a25e56164d1af2efa1acff66cd8 Author: frankliee AuthorDate: Tue Jun 28 11:02:00 2022 +0800 [Improvement] Move detailed client configuration to individual doc (#201) ### What changes were proposed in this pull request? 1. Put detailed configuration to doc subdirectory. 2. Add doc for client quorum setting. ### Why are the changes needed? Update doc ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Just doc. --- README.md | 22 +-- docs/client_guide.md | 148 ++ docs/coordinator_guide.md | 8 +++ docs/index.md | 8 +++ docs/pageA.md | 7 --- docs/server_guide.md | 7 +++ 6 files changed, 173 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 51a1ed0..eba4fd3 100644 --- a/README.md +++ b/README.md @@ -233,27 +233,9 @@ The important configuration is listed as following. |rss.server.flush.cold.storage.threshold.size|64M| The threshold of data size for LOACALFILE and HDFS if MEMORY_LOCALFILE_HDFS is used| -### Spark Client +### Shuffle Client -|Property Name|Default|Description| -|---|---|---| -|spark.rss.writer.buffer.size|3m|Buffer size for single partition data| -|spark.rss.writer.buffer.spill.size|128m|Buffer size for total partition data| -|spark.rss.coordinator.quorum|-|Coordinator quorum| -|spark.rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, MEMORY_LOCALFILE_HDFS| -|spark.rss.client.send.size.limit|16m|The max data size sent to shuffle server| -|spark.rss.client.read.buffer.size|32m|The max data size read from storage| -|spark.rss.client.send.threadPool.size|10|The thread size for send shuffle data to shuffle server| - - -### MapReduce Client - -|Property Name|Default|Description| -|---|---|---| -|mapreduce.rss.coordinator.quorum|-|Coordinator quorum| -|mapreduce.rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, MEMORY_LOCALFILE_HDFS| -|mapreduce.rss.client.max.buffer.size|3k|The max buffer size in map side| -|mapreduce.rss.client.read.buffer.size|32m|The max data size read from storage| +For more details of advanced configuration, please see [Firestorm Shuffle Client Guide](https://github.com/Tencent/Firestorm/blob/master/docs/client_guide.md). ## LICENSE diff --git a/docs/client_guide.md b/docs/client_guide.md new file mode 100644 index 000..95b960b --- /dev/null +++ b/docs/client_guide.md @@ -0,0 +1,148 @@ +--- +layout: page +displayTitle: Firestorm Shuffle Client Guide +title: Firestorm Shuffle Client Guide +description: Firestorm Shuffle Client Guide +--- +# Firestorm Shuffle Client Guide + +Firestorm is designed as a unified shuffle engine for multiple computing frameworks, including Apache Spark and Apache Hadoop. +Firestorm has provided pluggable client plugins to enable remote shuffle in Spark and MapReduce. + +## Deploy +This document will introduce how to deploy Firestorm client plugins with Spark and MapReduce. + +### Deploy Spark Client Plugin + +1. Add client jar to Spark classpath, eg, SPARK_HOME/jars/ + + The jar for Spark2 is located in /jars/client/spark2/rss-client-X-shaded.jar + + The jar for Spark3 is located in /jars/client/spark3/rss-client-X-shaded.jar + +2. Update Spark conf to enable Firestorm, eg, + + ``` + spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager + spark.rss.coordinator.quorum :1,:1 + # Note: For Spark2, spark.sql.adaptive.enabled should be false because Spark2 doesn't support AQE. + ``` + +### Support Spark Dynamic Allocation + +To support spark dynamic allocation with Firestorm, spark code should be updated. +There are 2 patches for spark-2.4.6 and spark-3.1.2 in spark-patches folder for reference. + +After apply the patch and rebuild spark, add following configuration in spark conf to enable dynamic allocation: + ``` + spark.shuffle.service.enabled false + spark.dynamicAllocation.enabled true + ``` + +### Deploy MapReduce Client Plugin + +1. Add client jar to the classpath of each NodeManager, e.g., /share/hadoop/mapreduce/ + +The jar for MapReduce is located in /jars/client/mr/rss-client-mr-X-shaded.jar + +2. Update MapReduce conf to enable Firestorm, eg, + + ``` + -Dmapreduce.rss.coordinator.quorum=:1,:1 + -Dyarn.app.mapreduce.am.command-opts=org.apache.hadoop.mapreduce.v2.app.RssMRAppMaster + -Dmapreduce.job.map.output.collector.class=org.apache.hadoop.mapred.RssMapOutputCollector + -Dmapreduce.job.reduce.shuffle.consumer.plugin.class=org.apache.hadoop.mapreduce.task.reduce.RssShuffle + ``` +Note that the RssMRAppMaster will automatically disable slow start (i.e., `mapreduce.job.reduce.slowstart.complete
[incubator-uniffle] 02/04: [Doc] Update readme with features like multiple remote storage support etc (#192)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.5.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit af92c1ca1339d3353ba3f80d5d97ee0658977397 Author: Colin AuthorDate: Wed Jun 22 17:16:53 2022 +0800 [Doc] Update readme with features like multiple remote storage support etc (#192) ### What changes were proposed in this pull request? Update Readme for latest features, eg, multiple remote storage support, dynamic client conf etc. ### Why are the changes needed? Doc should be updated ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No need --- README.md | 46 ++ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e134f0f..50903ce 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Coordinator will collect status of shuffle server and do the assignment for the Shuffle server will receive the shuffle data, merge them and write to storage. -Depend on different situation, Firestorm supports Memory & Local, Memory & Remote Storage(eg, HDFS), Local only, Remote Storage only. +Depend on different situation, Firestorm supports Memory & Local, Memory & Remote Storage(eg, HDFS), Memory & Local & Remote Storage(recommendation for production environment). ## Shuffle Process with Firestorm @@ -74,9 +74,25 @@ rss-xxx.tgz will be generated for deployment rss.coordinator.server.heartbeat.timeout 3 rss.coordinator.app.expired 6 rss.coordinator.shuffle.nodes.max 5 - rss.coordinator.exclude.nodes.file.path RSS_HOME/conf/exclude_nodes - ``` -4. start Coordinator + # enable dynamicClientConf, and coordinator will be responsible for most of client conf + rss.coordinator.dynamicClientConf.enabled true + # config the path of client conf + rss.coordinator.dynamicClientConf.path /conf/dynamic_client.conf + # config the path of excluded shuffle server + rss.coordinator.exclude.nodes.file.path /conf/exclude_nodes + ``` +4. update /conf/dynamic_client.conf, rss client will get default conf from coordinator eg, + ``` +# MEMORY_LOCALFILE_HDFS is recommandation for production environment +rss.storage.type MEMORY_LOCALFILE_HDFS +# multiple remote storages are supported, and client will get assignment from coordinator +rss.coordinator.remote.storage.path hdfs://cluster1/path,hdfs://cluster2/path +rss.writer.require.memory.retryMax 1200 +rss.client.retry.max 100 +rss.writer.send.check.timeout 60 +rss.client.read.buffer.size 14m + ``` +5. start Coordinator ``` bash RSS_HOME/bin/start-coordnator.sh ``` @@ -90,14 +106,17 @@ rss-xxx.tgz will be generated for deployment HADOOP_HOME= XMX_SIZE="80g" ``` -3. update RSS_HOME/conf/server.conf, the following demo is for memory + local storage only, eg, +3. update RSS_HOME/conf/server.conf, eg, ``` rss.rpc.server.port 1 rss.jetty.http.port 19998 rss.rpc.executor.size 2000 - rss.storage.type MEMORY_LOCALFILE + # it should be configed the same as in coordinator + rss.storage.type MEMORY_LOCALFILE_HDFS rss.coordinator.quorum :1,:1 + # local storage path for shuffle server rss.storage.basePath /data1/rssdata,/data2/rssdata + # it's better to config thread num according to local disk num rss.server.flush.thread.alive 5 rss.server.flush.threadPool.size 10 rss.server.buffer.capacity 40g @@ -108,6 +127,10 @@ rss-xxx.tgz will be generated for deployment rss.server.preAllocation.expired 12 rss.server.commit.timeout 60 rss.server.app.expired.withoutHeartbeat 12 + # note: the default value of rss.server.flush.cold.storage.threshold.size is 64m + # there will be no data written to DFS if set it as 100g even rss.storage.type=MEMORY_LOCALFILE_HDFS + # please set proper value if DFS is used, eg, 64m, 128m. + rss.server.flush.cold.storage.threshold.size 100g ``` 4. start Shuffle Server ``` @@ -121,12 +144,11 @@ rss-xxx.tgz will be generated for deployment The jar for Spark3 is located in /jars/client/spark3/rss-client-X-shaded.jar -2. Update Spark conf to enable Firestorm, the following demo is for local storage only, eg, +2. Update Spark conf to enable Firestorm, eg, ``` spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager spark.rss.coordinator.quorum :1,:1 - spark.rss.storage.type MEMORY_LOCALFILE ``` ### Support Spark dynamic allocation @@ -140,17 +162,16 @@ After apply the patch and rebuild spark, add following configuration in spark co spark.dynamicAllocation.enabled true ``` -## Deploy MapReduce Client +### Deploy MapReduce Client 1.
[incubator-uniffle] branch branch-0.5.0 created (now 55cb16f)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch branch-0.5.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 55cb16f [Bugfix] [0.5] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196) This branch includes the following new commits: new 5985668 [Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188) new af92c1c [Doc] Update readme with features like multiple remote storage support etc (#192) new e049863 upgrade to 0.5.0 (#189) new 55cb16f [Bugfix] [0.5] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196) The 4 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[incubator-uniffle] 03/04: upgrade to 0.5.0 (#189)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.5.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit e049863dec022d86a3aa95706c2bb93896a94c4f Author: roryqi AuthorDate: Wed Jun 22 17:17:55 2022 +0800 upgrade to 0.5.0 (#189) ### What changes were proposed in this pull request? upgrade version number ### Why are the changes needed? upgrade to 0.5.0 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? no --- client-mr/pom.xml | 4 ++-- client-spark/common/pom.xml | 4 ++-- client-spark/spark2/pom.xml | 4 ++-- client-spark/spark3/pom.xml | 4 ++-- client/pom.xml| 4 ++-- common/pom.xml| 2 +- coordinator/pom.xml | 2 +- integration-test/common/pom.xml | 4 ++-- integration-test/mr/pom.xml | 4 ++-- integration-test/spark-common/pom.xml | 4 ++-- integration-test/spark2/pom.xml | 4 ++-- integration-test/spark3/pom.xml | 4 ++-- internal-client/pom.xml | 4 ++-- pom.xml | 2 +- proto/pom.xml | 2 +- server/pom.xml| 2 +- storage/pom.xml | 2 +- 17 files changed, 28 insertions(+), 28 deletions(-) diff --git a/client-mr/pom.xml b/client-mr/pom.xml index c15ffba..1dc433e 100644 --- a/client-mr/pom.xml +++ b/client-mr/pom.xml @@ -23,13 +23,13 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.5.0 ../pom.xml com.tencent.rss rss-client-mr -0.5.0-snapshot +0.5.0 jar diff --git a/client-spark/common/pom.xml b/client-spark/common/pom.xml index 61c4b1f..fdf3b84 100644 --- a/client-spark/common/pom.xml +++ b/client-spark/common/pom.xml @@ -25,12 +25,12 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.5.0 ../../pom.xml rss-client-spark-common -0.5.0-snapshot +0.5.0 jar diff --git a/client-spark/spark2/pom.xml b/client-spark/spark2/pom.xml index 41a4432..bef2028 100644 --- a/client-spark/spark2/pom.xml +++ b/client-spark/spark2/pom.xml @@ -24,13 +24,13 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.5.0 ../../pom.xml com.tencent.rss rss-client-spark2 - 0.5.0-snapshot + 0.5.0 jar diff --git a/client-spark/spark3/pom.xml b/client-spark/spark3/pom.xml index 5674613..acc4fd7 100644 --- a/client-spark/spark3/pom.xml +++ b/client-spark/spark3/pom.xml @@ -24,13 +24,13 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.5.0 ../../pom.xml com.tencent.rss rss-client-spark3 -0.5.0-snapshot +0.5.0 jar diff --git a/client/pom.xml b/client/pom.xml index e6134ce..a6ebf91 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -24,12 +24,12 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.5.0 com.tencent.rss rss-client - 0.5.0-snapshot + 0.5.0 jar diff --git a/common/pom.xml b/common/pom.xml index b4b65f8..6bf0143 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -22,7 +22,7 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.5.0 ../pom.xml diff --git a/coordinator/pom.xml b/coordinator/pom.xml index e860a50..ceefda3 100644 --- a/coordinator/pom.xml +++ b/coordinator/pom.xml @@ -24,7 +24,7 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.5.0 ../pom.xml diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml index 2a759a4..773f383 100644 --- a/integration-test/common/pom.xml +++ b/integration-test/common/pom.xml @@ -24,13 +24,13 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.5.0 ../../pom.xml com.tencent.rss rss-integration-common-test -0.5.0-snapshot +0.5.0 jar diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml index 489ffd5..4879eea 100644 --- a/integration-test/mr/pom.xml +++ b/integration-test/mr/pom.xml @@ -22,14 +22,14 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.5.0 ../../pom.xml 4.0.0 com.tencent.rss rss-integration-mr-test -0.5.0-snapshot +0.5.0 jar diff --git a/integration-test/spark-common/pom.xml b/integration-test/spark-common/pom.xml index 284ca2b..f82e915 100644 --- a/integration-test/spark-common/pom.xml +++ b/integration-test/spark-common/pom.xml @@ -23,14 +23,14 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.5.0 ../../pom.xml 4.0.0 com.tencent.rss rss-integration-spark-common-test - 0.5.0-snapshot + 0.5.0 jar diff --git
[incubator-uniffle] 15/17: [Minor] Make clearResourceThread and processEventThread daemon (#207)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit ba47aa017f67e681af7c311c4ef8578eef740d4b Author: Zhen Wang <643348...@qq.com> AuthorDate: Thu Jun 30 14:56:54 2022 +0800 [Minor] Make clearResourceThread and processEventThread daemon (#207) ### What changes were proposed in this pull request? Make clearResourceThread daemon and processEventThread daemon. ### Why are the changes needed? `clearResourceThread` and `processEventThread` never exits, we can make it daemon. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Nod --- .../java/com/tencent/rss/server/ShuffleFlushManager.java | 12 .../main/java/com/tencent/rss/server/ShuffleTaskManager.java | 1 + 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java b/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java index e246b02..be941ac 100644 --- a/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java +++ b/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java @@ -29,6 +29,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; import com.google.common.collect.Queues; import com.google.common.collect.RangeMap; +import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.hadoop.conf.Configuration; import org.roaringbitmap.longlong.Roaring64NavigableMap; @@ -60,7 +61,6 @@ public class ShuffleFlushManager { private Map>> handlers = Maps.newConcurrentMap(); // appId -> shuffleId -> committed shuffle blockIds private Map> committedBlockIds = Maps.newConcurrentMap(); - private Runnable processEventThread; private final int retryMax; private final StorageManager storageManager; @@ -84,11 +84,12 @@ public class ShuffleFlushManager { BlockingQueue waitQueue = Queues.newLinkedBlockingQueue(waitQueueSize); int poolSize = shuffleServerConf.getInteger(ShuffleServerConf.SERVER_FLUSH_THREAD_POOL_SIZE); long keepAliveTime = shuffleServerConf.getLong(ShuffleServerConf.SERVER_FLUSH_THREAD_ALIVE); -threadPoolExecutor = new ThreadPoolExecutor(poolSize, poolSize, keepAliveTime, TimeUnit.SECONDS, waitQueue); +threadPoolExecutor = new ThreadPoolExecutor(poolSize, poolSize, keepAliveTime, TimeUnit.SECONDS, waitQueue, +new ThreadFactoryBuilder().setDaemon(true).setNameFormat("FlushEventThreadPool").build()); storageBasePaths = shuffleServerConf.getString(ShuffleServerConf.RSS_STORAGE_BASE_PATH).split(","); pendingEventTimeoutSec = shuffleServerConf.getLong(ShuffleServerConf.PENDING_EVENT_TIMEOUT_SEC); // the thread for flush data -processEventThread = () -> { +Runnable processEventRunnable = () -> { while (true) { try { ShuffleDataFlushEvent event = flushQueue.take(); @@ -103,7 +104,10 @@ public class ShuffleFlushManager { } } }; -new Thread(processEventThread).start(); +Thread processEventThread = new Thread(processEventRunnable); +processEventThread.setName("ProcessEventThread"); +processEventThread.setDaemon(true); +processEventThread.start(); // todo: extract a class named Service, and support stop method Thread thread = new Thread("PendingEventProcessThread") { @Override diff --git a/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java b/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java index e847779..fc37a19 100644 --- a/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java +++ b/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java @@ -123,6 +123,7 @@ public class ShuffleTaskManager { }; Thread thread = new Thread(clearResourceThread); thread.setName("clearResourceThread"); +thread.setDaemon(true); thread.start(); }
[incubator-uniffle] 10/17: [MINOR] Close clusterManager resources (#202)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 8b5f363fa296312042130b73c8dd8f5a15b5e0ae Author: Junfan Zhang AuthorDate: Mon Jun 27 17:34:13 2022 +0800 [MINOR] Close clusterManager resources (#202) ### What changes were proposed in this pull request? 1. Change the method of shutdown to close 2. Close resources of clustermanager in test cases ### Why are the changes needed? Close resources to reduce the resource occupying in test cases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test cases --- .../java/com/tencent/rss/coordinator/ClusterManager.java| 5 ++--- .../java/com/tencent/rss/coordinator/CoordinatorServer.java | 2 +- .../com/tencent/rss/coordinator/SimpleClusterManager.java | 10 -- .../rss/coordinator/BasicAssignmentStrategyTest.java| 5 - .../coordinator/PartitionBalanceAssignmentStrategyTest.java | 4 +++- .../tencent/rss/coordinator/SimpleClusterManagerTest.java | 13 +++-- .../test/java/com/tencent/rss/test/CoordinatorGrpcTest.java | 1 + 7 files changed, 30 insertions(+), 10 deletions(-) diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java index 4249a03..9f5915e 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java @@ -18,10 +18,11 @@ package com.tencent.rss.coordinator; +import java.io.Closeable; import java.util.List; import java.util.Set; -public interface ClusterManager { +public interface ClusterManager extends Closeable { /** * Add a server to the cluster. @@ -49,6 +50,4 @@ public interface ClusterManager { List list(); int getShuffleNodesMax(); - - void shutdown(); } diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java index 7ba7e1c..3b79221 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java @@ -94,7 +94,7 @@ public class CoordinatorServer { jettyServer.stop(); } if (clusterManager != null) { - clusterManager.shutdown(); + clusterManager.close(); } if (accessManager != null) { accessManager.close(); diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java index 10af74d..fcfd1dc 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java @@ -21,6 +21,7 @@ package com.tencent.rss.coordinator; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; @@ -186,8 +187,13 @@ public class SimpleClusterManager implements ClusterManager { } @Override - public void shutdown() { -scheduledExecutorService.shutdown(); + public void close() throws IOException { +if (scheduledExecutorService != null) { + scheduledExecutorService.shutdown(); +} +if (checkNodesExecutorService != null) { + checkNodesExecutorService.shutdown(); +} } @Override diff --git a/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java b/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java index 97afabf..7a95d76 100644 --- a/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java +++ b/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java @@ -24,6 +24,8 @@ import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import com.google.common.collect.Sets; import com.tencent.rss.common.PartitionRange; + +import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -49,8 +51,9 @@ public class BasicAssignmentStrategyTest { } @AfterEach - public void tearDown() { + public void tearDown() throws IOException { clusterManager.clear(); +clusterManager.close(); } @Test diff --git a/coordinator/src/test/java/com/tencent/rss/coordinator/PartitionBalanceAssignmentStrategyTest.java b/coordinator/src/test/java/com/tencent/rss/coordinator/PartitionBalanceAssignmentStrategyTest.java index 018aa62..9ca4146 100644 --- a/coordinator/src/test/java/com/tencent/rss
[incubator-uniffle] 16/17: Support using remote fs path to specify the excludeNodesFilePath (#200)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 5ec04b89348ca9c28c9ddce571ffa528969d2f8a Author: Junfan Zhang AuthorDate: Thu Jun 30 19:12:36 2022 +0800 Support using remote fs path to specify the excludeNodesFilePath (#200) What changes were proposed in this pull request? Support using remote fs path to specify the excludeNodesFilePath Why are the changes needed? When existing two coordinators serving for online, we hope they can read the consistent exclude nodes file insteading of using the localfile syncing manually. Does this PR introduce any user-facing change? Yes. It's an incompatible change. When the default fs is HDFS in the core-site.xml, and the excludeFilePath is specified to "/user/x" in coordinator server. After applied this patch, filesystem will be initialized to remote HDFS due to lacking scheme. How was this patch tested? Unit tests. --- .../rss/coordinator/ClusterManagerFactory.java | 10 +++- .../tencent/rss/coordinator/CoordinatorServer.java | 2 +- .../rss/coordinator/SimpleClusterManager.java | 68 +- .../coordinator/BasicAssignmentStrategyTest.java | 6 +- .../PartitionBalanceAssignmentStrategyTest.java| 6 +- .../rss/coordinator/SimpleClusterManagerTest.java | 13 +++-- 6 files changed, 63 insertions(+), 42 deletions(-) diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java index 2ec2b12..b2723f9 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java @@ -18,15 +18,19 @@ package com.tencent.rss.coordinator; +import org.apache.hadoop.conf.Configuration; + public class ClusterManagerFactory { CoordinatorConf conf; + Configuration hadoopConf; - public ClusterManagerFactory(CoordinatorConf conf) { + public ClusterManagerFactory(CoordinatorConf conf, Configuration hadoopConf) { this.conf = conf; +this.hadoopConf = hadoopConf; } - public ClusterManager getClusterManager() { -return new SimpleClusterManager(conf); + public ClusterManager getClusterManager() throws Exception { +return new SimpleClusterManager(conf, hadoopConf); } } diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java index 3b79221..2dbe06f 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java @@ -111,7 +111,7 @@ public class CoordinatorServer { registerMetrics(); this.applicationManager = new ApplicationManager(coordinatorConf); -ClusterManagerFactory clusterManagerFactory = new ClusterManagerFactory(coordinatorConf); +ClusterManagerFactory clusterManagerFactory = new ClusterManagerFactory(coordinatorConf, new Configuration()); this.clusterManager = clusterManagerFactory.getClusterManager(); this.clientConfManager = new ClientConfManager(coordinatorConf, new Configuration(), applicationManager); AssignmentStrategyFactory assignmentStrategyFactory = diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java index fcfd1dc..972ea5f 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java @@ -19,9 +19,10 @@ package com.tencent.rss.coordinator; import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; +import java.io.DataInputStream; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStreamReader; import java.util.List; import java.util.Map; import java.util.Set; @@ -36,6 +37,10 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,8 +57,9 @@ public class SimpleClusterManager implements ClusterManager { private int shuffleNodesMax; private ScheduledExecutorService scheduledExecutorService; private ScheduledExecutorService checkNodesExecutorService; + private FileSystem hadoopFileSystem; - public Simpl
[incubator-uniffle] 13/17: [Improvement] Add RSS_IP environment variable support for K8S (#204)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 6937631876052425b8d808d26caf78c79b24536a Author: roryqi AuthorDate: Wed Jun 29 10:06:31 2022 +0800 [Improvement] Add RSS_IP environment variable support for K8S (#204) ### What changes were proposed in this pull request? Method `getHostIp` can acquire IP by environment variable. ### Why are the changes needed? For K8S, there are too many IPs, it's hard to decide which we should use. So we use the environment variable to tell RSS to use which one. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT --- .../java/com/tencent/rss/common/util/RssUtils.java | 10 + .../com/tencent/rss/common/util/RssUtilsTest.java | 26 ++ 2 files changed, 36 insertions(+) diff --git a/common/src/main/java/com/tencent/rss/common/util/RssUtils.java b/common/src/main/java/com/tencent/rss/common/util/RssUtils.java index 1b7200e..7ecae6b 100644 --- a/common/src/main/java/com/tencent/rss/common/util/RssUtils.java +++ b/common/src/main/java/com/tencent/rss/common/util/RssUtils.java @@ -41,6 +41,7 @@ import java.util.Map; import java.util.Properties; import com.google.common.collect.Lists; +import com.google.common.net.InetAddresses; import org.roaringbitmap.longlong.Roaring64NavigableMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,6 +103,15 @@ public class RssUtils { // loop back, etc.). If the network interface in the machine is more than one, we // will choose the first IP. public static String getHostIp() throws Exception { +// For K8S, there are too many IPs, it's hard to decide which we should use. +// So we use the environment variable to tell RSS to use which one. +String ip = System.getenv("RSS_IP"); +if (ip != null) { + if (!InetAddresses.isInetAddress(ip)) { +throw new RuntimeException("Environment RSS_IP: " + ip + " is wrong format"); + } + return ip; +} Enumeration nif = NetworkInterface.getNetworkInterfaces(); String siteLocalAddress = null; while (nif.hasMoreElements()) { diff --git a/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java b/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java index 95fd55f..220cb5c 100644 --- a/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java +++ b/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java @@ -18,6 +18,7 @@ package com.tencent.rss.common.util; +import java.lang.reflect.Field; import java.net.InetAddress; import java.nio.ByteBuffer; import java.util.Arrays; @@ -62,6 +63,18 @@ public class RssUtilsTest { if (!address.equals("127.0.0.1")) { assertEquals(address, realIp); } + setEnv("RSS_IP", "8.8.8.8"); + assertEquals("8.8.8.8", RssUtils.getHostIp()); + setEnv("RSS_IP", ""); + boolean isException = false; + try { +RssUtils.getHostIp(); + } catch (Exception e) { +isException = true; + } + setEnv("RSS_IP", realIp); + RssUtils.getHostIp(); + assertTrue(isException); } catch (Exception e) { fail(e.getMessage()); } @@ -185,6 +198,19 @@ public class RssUtilsTest { } } + public static void setEnv(String key, String value) { +try { + Map env = System.getenv(); + Class cl = env.getClass(); + Field field = cl.getDeclaredField("m"); + field.setAccessible(true); + Map writableEnv = (Map) field.get(env); + writableEnv.put(key, value); +} catch (Exception e) { + throw new IllegalStateException("Failed to set environment variable", e); +} + } + public static class RssUtilTestDummySuccess implements RssUtilTestDummy { private final String s;
[incubator-uniffle] 07/17: [Minor] Remove serverNode from tags structure when heartbeart timeout (#193)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit d92208ddb1edca13fcb6cb31a8980b2052f29d7b Author: Junfan Zhang AuthorDate: Thu Jun 23 15:30:19 2022 +0800 [Minor] Remove serverNode from tags structure when heartbeart timeout (#193) ### What changes were proposed in this pull request? Remove serverNode from tags structure when heartbeart timeout ### Why are the changes needed? Remove serverNode from tags structure when heartbeart timeout ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT --- .../com/tencent/rss/coordinator/ServerNode.java| 7 ++ .../rss/coordinator/SimpleClusterManager.java | 9 ++-- .../rss/coordinator/SimpleClusterManagerTest.java | 27 ++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java b/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java index ef09298..816f080 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java @@ -115,6 +115,13 @@ public class ServerNode implements Comparable { + ", healthy[" + isHealthy + "]"; } + /** + * Only for test case + */ + void setTimestamp(long timestamp) { +this.timestamp = timestamp; + } + @Override public int compareTo(ServerNode other) { if (availableMemory > other.getAvailableMemory()) { diff --git a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java index d3fe789..10af74d 100644 --- a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java +++ b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java @@ -72,7 +72,7 @@ public class SimpleClusterManager implements ClusterManager { } } - private void nodesCheck() { + void nodesCheck() { try { long timestamp = System.currentTimeMillis(); Set deleteIds = Sets.newHashSet(); @@ -83,7 +83,12 @@ public class SimpleClusterManager implements ClusterManager { } } for (String serverId : deleteIds) { -servers.remove(serverId); +ServerNode sn = servers.remove(serverId); +if (sn != null) { + for (Set nodesWithTag : tagToNodes.values()) { +nodesWithTag.remove(sn); + } +} } CoordinatorMetrics.gaugeTotalServerNum.set(servers.size()); diff --git a/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java b/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java index a5040bf..bed9081 100644 --- a/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java +++ b/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java @@ -27,6 +27,7 @@ import java.util.Set; import com.google.common.collect.Sets; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -142,6 +143,32 @@ public class SimpleClusterManagerTest { assertEquals(0, serverNodes.size()); } + @Test + public void testGetCorrectServerNodesWhenOneNodeRemoved() { +CoordinatorConf ssc = new CoordinatorConf(); +ssc.setLong(CoordinatorConf.COORDINATOR_HEARTBEAT_TIMEOUT, 30 * 1000L); +SimpleClusterManager clusterManager = new SimpleClusterManager(ssc); +ServerNode sn1 = new ServerNode("sn1", "ip", 0, 100L, 50L, 20, +10, testTags, true); +ServerNode sn2 = new ServerNode("sn2", "ip", 0, 100L, 50L, 21, +10, testTags, true); +ServerNode sn3 = new ServerNode("sn3", "ip", 0, 100L, 50L, 20, +11, testTags, true); +clusterManager.add(sn1); +clusterManager.add(sn2); +clusterManager.add(sn3); +List serverNodes = clusterManager.getServerList(testTags); +assertEquals(3, serverNodes.size()); + +sn3.setTimestamp(System.currentTimeMillis() - 60 * 1000L); +clusterManager.nodesCheck(); + +Map> tagToNodes = clusterManager.getTagToNodes(); +List serverList = clusterManager.getServerList(testTags); +Assertions.assertEquals(2, tagToNodes.get(testTags.iterator().next()).size()); +Assertions.assertEquals(2, serverList.size()); + } + @Test public void updateExcludeNodesTest() throws Exception { String excludeNodesFolder = (new File(ClassLoader.getSystemResource("empty").getFile())).getParent();
[incubator-uniffle] 17/17: [Improvement] Modify configuration template (#209)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 166f3f8c7c5f14eb75daca843f992e908bd3c938 Author: roryqi AuthorDate: Fri Jul 1 11:49:34 2022 +0800 [Improvement] Modify configuration template (#209) ### What changes were proposed in this pull request? I modify the file `conf/server.conf` and `conf/coordinator.conf`. Some configurations are not recommended. I modify them ### Why are the changes needed? Give users a better configuration template ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No need. --- conf/coordinator.conf | 2 +- conf/server.conf | 16 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/conf/coordinator.conf b/conf/coordinator.conf index 294f14e..c66e302 100644 --- a/conf/coordinator.conf +++ b/conf/coordinator.conf @@ -21,4 +21,4 @@ rss.jetty.http.port 19998 rss.coordinator.server.heartbeat.timeout 3 rss.coordinator.app.expired 6 rss.coordinator.shuffle.nodes.max 13 -rss.coordinator.exclude.nodes.file.path /xxx +rss.coordinator.exclude.nodes.file.path file:///xxx diff --git a/conf/server.conf b/conf/server.conf index 3c347e1..6ab6571 100644 --- a/conf/server.conf +++ b/conf/server.conf @@ -19,18 +19,10 @@ rss.rpc.server.port 1 rss.jetty.http.port 19998 rss.storage.basePath /xxx,/xxx -rss.storage.type LOCALFILE_AND_HDFS +rss.storage.type MEMORY_LOCALFILE_HDFS rss.coordinator.quorum xxx:1,xxx:1 rss.server.buffer.capacity 40gb -rss.server.buffer.spill.threshold 22gb -rss.server.partition.buffer.size 150mb rss.server.read.buffer.capacity 20gb -rss.server.flush.thread.alive 50 -rss.server.flush.threadPool.size 100 - -# multistorage config -rss.server.multistorage.enable true -rss.server.uploader.enable true -rss.server.uploader.base.path hdfs://xxx -rss.server.uploader.thread.number 32 -rss.server.disk.capacity 1011550697553 +rss.server.flush.thread.alive 5 +rss.server.flush.threadPool.size 10 +rss.server.disk.capacity 1t
[incubator-uniffle] 04/17: [Doc] Update readme with features like multiple remote storage support etc (#191)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 11a8594e868db3aaf55af9baa1903e8cbd17413e Author: Colin AuthorDate: Wed Jun 22 16:38:27 2022 +0800 [Doc] Update readme with features like multiple remote storage support etc (#191) What changes were proposed in this pull request? Update Readme for latest features, eg, multiple remote storage support, dynamic client conf etc. Why are the changes needed? Doc should be updated Does this PR introduce any user-facing change? No How was this patch tested? No need --- README.md | 46 ++ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e134f0f..50903ce 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Coordinator will collect status of shuffle server and do the assignment for the Shuffle server will receive the shuffle data, merge them and write to storage. -Depend on different situation, Firestorm supports Memory & Local, Memory & Remote Storage(eg, HDFS), Local only, Remote Storage only. +Depend on different situation, Firestorm supports Memory & Local, Memory & Remote Storage(eg, HDFS), Memory & Local & Remote Storage(recommendation for production environment). ## Shuffle Process with Firestorm @@ -74,9 +74,25 @@ rss-xxx.tgz will be generated for deployment rss.coordinator.server.heartbeat.timeout 3 rss.coordinator.app.expired 6 rss.coordinator.shuffle.nodes.max 5 - rss.coordinator.exclude.nodes.file.path RSS_HOME/conf/exclude_nodes - ``` -4. start Coordinator + # enable dynamicClientConf, and coordinator will be responsible for most of client conf + rss.coordinator.dynamicClientConf.enabled true + # config the path of client conf + rss.coordinator.dynamicClientConf.path /conf/dynamic_client.conf + # config the path of excluded shuffle server + rss.coordinator.exclude.nodes.file.path /conf/exclude_nodes + ``` +4. update /conf/dynamic_client.conf, rss client will get default conf from coordinator eg, + ``` +# MEMORY_LOCALFILE_HDFS is recommandation for production environment +rss.storage.type MEMORY_LOCALFILE_HDFS +# multiple remote storages are supported, and client will get assignment from coordinator +rss.coordinator.remote.storage.path hdfs://cluster1/path,hdfs://cluster2/path +rss.writer.require.memory.retryMax 1200 +rss.client.retry.max 100 +rss.writer.send.check.timeout 60 +rss.client.read.buffer.size 14m + ``` +5. start Coordinator ``` bash RSS_HOME/bin/start-coordnator.sh ``` @@ -90,14 +106,17 @@ rss-xxx.tgz will be generated for deployment HADOOP_HOME= XMX_SIZE="80g" ``` -3. update RSS_HOME/conf/server.conf, the following demo is for memory + local storage only, eg, +3. update RSS_HOME/conf/server.conf, eg, ``` rss.rpc.server.port 1 rss.jetty.http.port 19998 rss.rpc.executor.size 2000 - rss.storage.type MEMORY_LOCALFILE + # it should be configed the same as in coordinator + rss.storage.type MEMORY_LOCALFILE_HDFS rss.coordinator.quorum :1,:1 + # local storage path for shuffle server rss.storage.basePath /data1/rssdata,/data2/rssdata + # it's better to config thread num according to local disk num rss.server.flush.thread.alive 5 rss.server.flush.threadPool.size 10 rss.server.buffer.capacity 40g @@ -108,6 +127,10 @@ rss-xxx.tgz will be generated for deployment rss.server.preAllocation.expired 12 rss.server.commit.timeout 60 rss.server.app.expired.withoutHeartbeat 12 + # note: the default value of rss.server.flush.cold.storage.threshold.size is 64m + # there will be no data written to DFS if set it as 100g even rss.storage.type=MEMORY_LOCALFILE_HDFS + # please set proper value if DFS is used, eg, 64m, 128m. + rss.server.flush.cold.storage.threshold.size 100g ``` 4. start Shuffle Server ``` @@ -121,12 +144,11 @@ rss-xxx.tgz will be generated for deployment The jar for Spark3 is located in /jars/client/spark3/rss-client-X-shaded.jar -2. Update Spark conf to enable Firestorm, the following demo is for local storage only, eg, +2. Update Spark conf to enable Firestorm, eg, ``` spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager spark.rss.coordinator.quorum :1,:1 - spark.rss.storage.type MEMORY_LOCALFILE ``` ### Support Spark dynamic allocation @@ -140,17 +162,16 @@ After apply the patch and rebuild spark, add following configuration in spark co spark.dynamicAllocation.enabled true ``` -## Deploy MapReduce Client +### Deploy MapReduce Client 1. Add client jar to the classpath of
[incubator-uniffle] 01/17: [Improvement] Avoid using the default forkjoin pool by parallelStream directly (#180)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 46b62b2406a547dca6f6b933ee187047e3618202 Author: Junfan Zhang AuthorDate: Tue Jun 21 14:15:59 2022 +0800 [Improvement] Avoid using the default forkjoin pool by parallelStream directly (#180) ### What changes were proposed in this pull request? As we know that parallelStream will use the default forkjoin pool in entire jvm. To avoid it, use the custom pool and allow to specify the pool size. ### Why are the changes needed? use separate forkjoin pool to send shuffle data ### Does this PR introduce _any_ user-facing change? Yes, introduce the configuration to control the size of forkjoinpool. mapreduce.rss.client.data.transfer.pool.size for MapReduce spark.rss.client.data.transfer.pool.size for Spark ### How was this patch tested? GA passed. --- .../org/apache/hadoop/mapreduce/RssMRConfig.java | 4 .../org/apache/hadoop/mapreduce/RssMRUtils.java| 5 - .../org/apache/spark/shuffle/RssSparkConfig.java | 4 .../apache/spark/shuffle/RssShuffleManager.java| 5 - .../apache/spark/shuffle/RssShuffleManager.java| 14 ++--- .../rss/client/factory/ShuffleClientFactory.java | 4 ++-- .../rss/client/impl/ShuffleWriteClientImpl.java| 24 ++ .../tencent/rss/client/util/RssClientConfig.java | 2 ++ .../client/impl/ShuffleWriteClientImplTest.java| 2 +- .../test/java/com/tencent/rss/test/QuorumTest.java | 2 +- .../tencent/rss/test/ShuffleServerGrpcTest.java| 2 +- .../tencent/rss/test/ShuffleWithRssClientTest.java | 2 +- 12 files changed, 50 insertions(+), 20 deletions(-) diff --git a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java index a191e2f..3447f09 100644 --- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java +++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java @@ -52,6 +52,10 @@ public class RssMRConfig { RssClientConfig.RSS_DATA_REPLICA_READ_DEFAULT_VALUE; public static final String RSS_DATA_REPLICA_SKIP_ENABLED = MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_REPLICA_SKIP_ENABLED; + public static final String RSS_DATA_TRANSFER_POOL_SIZE = + MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_TRANSFER_POOL_SIZE; + public static final int RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE = + RssClientConfig.RSS_DATA_TRANFER_POOL_SIZE_DEFAULT_VALUE; public static final String RSS_CLIENT_SEND_THREAD_NUM = MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_CLIENT_SEND_THREAD_NUM; public static final int RSS_CLIENT_DEFAULT_SEND_THREAD_NUM = diff --git a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java index 1d8b4d6..16613e1 100644 --- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java +++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java @@ -90,10 +90,13 @@ public class RssMRUtils { RssMRConfig.RSS_DATA_REPLICA_DEFAULT_VALUE); boolean replicaSkipEnabled = jobConf.getBoolean(RssMRConfig.RSS_DATA_REPLICA_SKIP_ENABLED, RssMRConfig.RSS_DATA_REPLICA_SKIP_ENABLED_DEFAULT_VALUE); +int dataTransferPoolSize = jobConf.getInt(RssMRConfig.RSS_DATA_TRANSFER_POOL_SIZE, +RssMRConfig.RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE); ShuffleWriteClient client = ShuffleClientFactory .getInstance() .createShuffleWriteClient(clientType, retryMax, retryIntervalMax, -heartBeatThreadNum, replica, replicaWrite, replicaRead, replicaSkipEnabled); +heartBeatThreadNum, replica, replicaWrite, replicaRead, replicaSkipEnabled, +dataTransferPoolSize); return client; } diff --git a/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java b/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java index 9720ff0..8d5dda9 100644 --- a/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java +++ b/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java @@ -106,6 +106,10 @@ public class RssSparkConfig { public static final int RSS_DATA_REPLICA_READ_DEFAULT_VALUE = RssClientConfig.RSS_DATA_REPLICA_READ_DEFAULT_VALUE; public static final String RSS_DATA_REPLICA_SKIP_ENABLED = SPARK_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_REPLICA_SKIP_ENABLED; + public static final String RSS_DATA_TRANSFER_POOL_SIZE = + SPARK_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_TRANSFER_POOL_SIZE; + public static final int RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE = + RssClientConfig.RSS_DATA_TRANFER_POOL_SIZE_DEFAULT_VALUE
[incubator-uniffle] 02/17: [Bugfix] Fix spark2 executor stop NPE problem (#187)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 7fa8b52e5739a0c2ded7f2eca84b086713765418 Author: roryqi AuthorDate: Wed Jun 22 14:30:15 2022 +0800 [Bugfix] Fix spark2 executor stop NPE problem (#187) backport 0.5.0 ### What changes were proposed in this pull request? We need to judge heartbeatExecutorService whether is null when we will stop it. ### Why are the changes needed? #177 pr introduce this problem, when we run Spark applications on our cluster, the executor will throw NPE when method `stop` is called. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java index 5d11c39..8a2c385 100644 --- a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java +++ b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java @@ -373,7 +373,9 @@ public class RssShuffleManager implements ShuffleManager { @Override public void stop() { -heartBeatScheduledExecutorService.shutdownNow(); +if (heartBeatScheduledExecutorService != null) { + heartBeatScheduledExecutorService.shutdownNow(); +} threadPoolExecutor.shutdownNow(); shuffleWriteClient.close(); }
[incubator-uniffle] 14/17: [Improvement] Close coordinatorClients when DelegationRssShuffleManager stops (#205)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 15a6ea65ede6a2bc07824855801573a5d0cad512 Author: Zhen Wang <643348...@qq.com> AuthorDate: Thu Jun 30 11:34:40 2022 +0800 [Improvement] Close coordinatorClients when DelegationRssShuffleManager stops (#205) ### What changes were proposed in this pull request? Close coordinatorClients when DelegationRssShuffleManager stops. ### Why are the changes needed? The coordinatorClients in DelegationRssShuffleManager are never closed. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No --- .../main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java | 1 + .../main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java | 1 + 2 files changed, 2 insertions(+) diff --git a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java index e0a30e7..03320c0 100644 --- a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java +++ b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java @@ -173,6 +173,7 @@ public class DelegationRssShuffleManager implements ShuffleManager { @Override public void stop() { delegate.stop(); +coordinatorClients.forEach(CoordinatorClient::close); } @Override diff --git a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java index 4ed6cce..32d58d2 100644 --- a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java +++ b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java @@ -248,6 +248,7 @@ public class DelegationRssShuffleManager implements ShuffleManager { @Override public void stop() { delegate.stop(); +coordinatorClients.forEach(CoordinatorClient::close); } @Override
[incubator-uniffle] branch master created (now 166f3f8)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 166f3f8 [Improvement] Modify configuration template (#209) This branch includes the following new commits: new 46b62b2 [Improvement] Avoid using the default forkjoin pool by parallelStream directly (#180) new 7fa8b52 [Bugfix] Fix spark2 executor stop NPE problem (#187) new 924dac7 [Bugfix] Fix spark2 executor stop NPE problem (#186) new 11a8594 [Doc] Update readme with features like multiple remote storage support etc (#191) new 8d8e6bf upgrade to 0.6.0-snapshot (#190) new cf731f2 [Bugfix] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) new d92208d [Minor] Remove serverNode from tags structure when heartbeart timeout (#193) new 6bdf49e [Improvement] Check ADAPTIVE_EXECUTION_ENABLED in RssShuffleManager (#197) new a253b1f [Improvement] Add dynamic allocation patch for Spark 3.2 (#199) new 8b5f363 [MINOR] Close clusterManager resources (#202) new 392c881 Support build_distribution.sh to specify different mvn build options for Spark2 and Spark3 (#203) new 2c1c554 [Improvement] Move detailed client configuration to individual doc (#201) new 6937631 [Improvement] Add RSS_IP environment variable support for K8S (#204) new 15a6ea6 [Improvement] Close coordinatorClients when DelegationRssShuffleManager stops (#205) new ba47aa0 [Minor] Make clearResourceThread and processEventThread daemon (#207) new 5ec04b8 Support using remote fs path to specify the excludeNodesFilePath (#200) new 166f3f8 [Improvement] Modify configuration template (#209) The 17 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[incubator-uniffle] 11/17: Support build_distribution.sh to specify different mvn build options for Spark2 and Spark3 (#203)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 392c88129f2706043ebb87cc89e9e2cde5733647 Author: cxzl25 AuthorDate: Tue Jun 28 10:09:01 2022 +0800 Support build_distribution.sh to specify different mvn build options for Spark2 and Spark3 (#203) What changes were proposed in this pull request? Add --spark2-mvn, --spark3-mvn parameters in build_distribution.sh to support compiling different profiles, we can pass in different maven parameters, such as profile, spark version. Add --help parameters in build_distribution.sh, fix typo. gitignore ignores the tar package generated by build. README added how to use build_distribution.sh. Why are the changes needed? If we use such a command to build, Spark2 will also use the Spark3 version to compile, so we'd better distinguish the build options of different versions. ./build_distribution.sh -Pspark3.2 Does this PR introduce any user-facing change? No How was this patch tested? local test --- .gitignore| 1 + README.md | 16 build_distribution.sh | 53 +++ pom.xml | 4 ++-- 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 5c39d59..b6164b2 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ reports/ metastore_db/ derby.log dependency-reduced-pom.xml +rss-*.tgz diff --git a/README.md b/README.md index 9ad8299..51a1ed0 100644 --- a/README.md +++ b/README.md @@ -50,10 +50,26 @@ To build it, run: mvn -DskipTests clean package +Build against profile Spark2(2.4.6) + +mvn -DskipTests clean package -Pspark2 + +Build against profile Spark3(3.1.2) + +mvn -DskipTests clean package -Pspark3 + +Build against Spark 3.2.x + +mvn -DskipTests clean package -Pspark3.2 + To package the Firestorm, run: ./build_distribution.sh +Package against Spark 3.2.x, run: + +./build_distribution.sh --spark3-profile 'spark3.2' + rss-xxx.tgz will be generated for deployment ## Deploy diff --git a/build_distribution.sh b/build_distribution.sh index baf50e4..214a2ed 100755 --- a/build_distribution.sh +++ b/build_distribution.sh @@ -32,12 +32,57 @@ RSS_HOME="$( function exit_with_usage() { set +x - echo "$0 - tool for making binary distributions of Rmote Shuffle Service" + echo "./build_distribution.sh - Tool for making binary distributions of Remote Shuffle Service" echo "" - echo "usage:" + echo "Usage:" + echo "+--+" + echo "| ./build_distribution.sh [--spark2-profile ] [--spark2-mvn ] |" + echo "| [--spark3-profile ] [--spark3-mvn ] |" + echo "| |" + echo "+--+" exit 1 } +SPARK2_PROFILE_ID="spark2" +SPARK2_MVN_OPTS="" +SPARK3_PROFILE_ID="spark3" +SPARK3_MVN_OPTS="" +while (( "$#" )); do + case $1 in +--spark2-profile) + SPARK2_PROFILE_ID="$2" + shift + ;; +--spark2-mvn) + SPARK2_MVN_OPTS=$2 + shift + ;; +--spark3-profile) + SPARK3_PROFILE_ID="$2" + shift + ;; +--spark3-mvn) + SPARK3_MVN_OPTS=$2 + shift + ;; +--help) + exit_with_usage + ;; +--*) + echo "Error: $1 is not supported" + exit_with_usage + ;; +-*) + break + ;; +*) + echo "Error: $1 is not supported" + exit_with_usage + ;; + esac + shift +done + cd $RSS_HOME if [ -z "$JAVA_HOME" ]; then @@ -99,7 +144,7 @@ cp "${RSS_HOME}"/coordinator/target/jars/* ${COORDINATOR_JAR_DIR} CLIENT_JAR_DIR="${DISTDIR}/jars/client" mkdir -p $CLIENT_JAR_DIR -BUILD_COMMAND_SPARK2=("$MVN" clean package -Pspark2 -pl client-spark/spark2 -DskipTests -am $@) +BUILD_COMMAND_SPARK2=("$MVN" clean package -P$SPARK2_PROFILE_ID -pl client-spark/spark2 -DskipTests -am $@ $SPARK2_MVN_OPTS) # Actually build the jar echo -e "\nBuilding with..." @@ -114,7 +159,7 @@ SPARK_CLIENT2_JAR="${RSS_HOME}/client-spark/spark2/target/shaded/rss-client-spar echo "copy $SPARK_CLIENT2_JAR to ${SPARK_CLIENT2_JAR_DIR}" cp $SPARK_CLIENT2_JAR ${SPARK_CLIENT2_JAR_DIR} -BUILD_COMMAND_SPARK3=("$MVN" clean package -Pspark3 -pl client-spark/spark3 -DskipTests -am $@) +BUILD_COMMAND
[incubator-uniffle] 09/17: [Improvement] Add dynamic allocation patch for Spark 3.2 (#199)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit a253b1fed2e947e397b45b1db8f56d856eabc9fc Author: roryqi AuthorDate: Mon Jun 27 10:07:13 2022 +0800 [Improvement] Add dynamic allocation patch for Spark 3.2 (#199) ### What changes were proposed in this pull request? Add the dynamic allocation patch for Spark 3.2, solve issue #106 ### Why are the changes needed? If we don't have this patch, users can't use dynamic allocation in Spark 3.2. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- README.md | 2 +- .../spark-3.2.1_dynamic_allocation_support.patch | 92 ++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0fb65e5..9ad8299 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ rss-xxx.tgz will be generated for deployment ### Support Spark dynamic allocation To support spark dynamic allocation with Firestorm, spark code should be updated. -There are 2 patches for spark-2.4.6 and spark-3.1.2 in spark-patches folder for reference. +There are 3 patches for spark (2.4.6/3.1.2/3.2.1) in spark-patches folder for reference. After apply the patch and rebuild spark, add following configuration in spark conf to enable dynamic allocation: ``` diff --git a/spark-patches/spark-3.2.1_dynamic_allocation_support.patch b/spark-patches/spark-3.2.1_dynamic_allocation_support.patch new file mode 100644 index 000..1e195df --- /dev/null +++ b/spark-patches/spark-3.2.1_dynamic_allocation_support.patch @@ -0,0 +1,92 @@ +diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala +index 1b4e7ba5106..95818ff72ca 100644 +--- a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala +@@ -174,8 +174,10 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag]( + !rdd.isBarrier() + } + +- _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this)) +- _rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId) ++ if (!_rdd.context.getConf.isRssEnable()) { ++_rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this)) ++_rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId) ++ } + } + + +diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +index c4b619300b5..821a01985d9 100644 +--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +@@ -207,7 +207,9 @@ private[spark] class ExecutorAllocationManager( + // If dynamic allocation shuffle tracking or worker decommissioning along with + // storage shuffle decommissioning is enabled we have *experimental* support for + // decommissioning without a shuffle service. +- if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED) || ++ if (conf.isRssEnable()) { ++logInfo("Dynamic allocation will use remote shuffle service") ++ } else if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED) || + (decommissionEnabled && + conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED))) { + logWarning("Dynamic allocation without a shuffle service is an experimental feature.") +diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala +index 5f37a1abb19..af4bee1e1bb 100644 +--- a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala +@@ -580,6 +580,10 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria + Utils.redact(this, getAll).sorted.map { case (k, v) => k + "=" + v }.mkString("\n") + } + ++ /** ++ * Return true if remote shuffle service is enabled. ++ */ ++ def isRssEnable(): Boolean = get("spark.shuffle.manager", "sort").contains("RssShuffleManager") + } + + private[spark] object SparkConf extends Logging { +diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +index a82d261d545..72e54940ca2 100644 +--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +@@ -2231,7 +2231,8 @@ private[spark] class DAGScheduler( + // if the cluster manager explicitly tells us that the entire worker was lost, then + // we
[incubator-uniffle] 08/17: [Improvement] Check ADAPTIVE_EXECUTION_ENABLED in RssShuffleManager (#197)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 6bdf49e1a68131545a8385123da558be287a196f Author: xunxunmimi5577 <52647492+xunxunmimi5...@users.noreply.github.com> AuthorDate: Fri Jun 24 02:12:40 2022 +0800 [Improvement] Check ADAPTIVE_EXECUTION_ENABLED in RssShuffleManager (#197) ### What changes were proposed in this pull request? 1. Add checking of spark.sql.adaptive.enabled=false in RssShuffleManager's constructor for spark2. 2. Add a description of this parameter in the Deploy Spark Client section of the readme. ### Why are the changes needed? When use firestorm+spark2+spark.sql.adaptive.enabled=true,the result is wrong,but we didn't get any hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- README.md | 1 + .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 3 +++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 50903ce..0fb65e5 100644 --- a/README.md +++ b/README.md @@ -149,6 +149,7 @@ rss-xxx.tgz will be generated for deployment ``` spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager spark.rss.coordinator.quorum :1,:1 + # Note: For Spark2, spark.sql.adaptive.enabled should be false because Spark2 doesn't support AQE. ``` ### Support Spark dynamic allocation diff --git a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java index 8a2c385..28f1a8d 100644 --- a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java +++ b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java @@ -136,6 +136,9 @@ public class RssShuffleManager implements ShuffleManager { }; public RssShuffleManager(SparkConf sparkConf, boolean isDriver) { +if (sparkConf.getBoolean("spark.sql.adaptive.enabled", false)) { + throw new IllegalArgumentException("Spark2 doesn't support AQE, spark.sql.adaptive.enabled should be false."); +} this.sparkConf = sparkConf; // set & check replica config
[incubator-uniffle] 03/17: [Bugfix] Fix spark2 executor stop NPE problem (#186)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 924dac7f093d0b3f581e521fc71bc30ea0963907 Author: roryqi AuthorDate: Wed Jun 22 14:34:06 2022 +0800 [Bugfix] Fix spark2 executor stop NPE problem (#186) ### What changes were proposed in this pull request? We need to judge heartbeatExecutorService whether is null when we will stop it. ### Why are the changes needed? #177 pr introduce this problem, when we run Spark applications on our cluster, the executor will throw NPE when method `stop` is called. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test
[incubator-uniffle] 05/17: upgrade to 0.6.0-snapshot (#190)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 8d8e6bf81ebf0bbb669642a46d13581927f9cec9 Author: roryqi AuthorDate: Wed Jun 22 17:36:33 2022 +0800 upgrade to 0.6.0-snapshot (#190) ### What changes were proposed in this pull request? upgrade version number ### Why are the changes needed? upgrade to 0.6.0-snapshot ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? no --- client-mr/pom.xml | 4 ++-- client-spark/common/pom.xml | 4 ++-- client-spark/spark2/pom.xml | 4 ++-- client-spark/spark3/pom.xml | 4 ++-- client/pom.xml| 4 ++-- common/pom.xml| 2 +- coordinator/pom.xml | 2 +- integration-test/common/pom.xml | 4 ++-- integration-test/mr/pom.xml | 4 ++-- integration-test/spark-common/pom.xml | 4 ++-- integration-test/spark2/pom.xml | 4 ++-- integration-test/spark3/pom.xml | 4 ++-- internal-client/pom.xml | 4 ++-- pom.xml | 2 +- proto/pom.xml | 2 +- server/pom.xml| 2 +- storage/pom.xml | 2 +- 17 files changed, 28 insertions(+), 28 deletions(-) diff --git a/client-mr/pom.xml b/client-mr/pom.xml index c15ffba..650a771 100644 --- a/client-mr/pom.xml +++ b/client-mr/pom.xml @@ -23,13 +23,13 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.6.0-snapshot ../pom.xml com.tencent.rss rss-client-mr -0.5.0-snapshot +0.6.0-snapshot jar diff --git a/client-spark/common/pom.xml b/client-spark/common/pom.xml index 61c4b1f..e79a671 100644 --- a/client-spark/common/pom.xml +++ b/client-spark/common/pom.xml @@ -25,12 +25,12 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.6.0-snapshot ../../pom.xml rss-client-spark-common -0.5.0-snapshot +0.6.0-snapshot jar diff --git a/client-spark/spark2/pom.xml b/client-spark/spark2/pom.xml index 41a4432..54434d5 100644 --- a/client-spark/spark2/pom.xml +++ b/client-spark/spark2/pom.xml @@ -24,13 +24,13 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.6.0-snapshot ../../pom.xml com.tencent.rss rss-client-spark2 - 0.5.0-snapshot + 0.6.0-snapshot jar diff --git a/client-spark/spark3/pom.xml b/client-spark/spark3/pom.xml index 5674613..8cd091e 100644 --- a/client-spark/spark3/pom.xml +++ b/client-spark/spark3/pom.xml @@ -24,13 +24,13 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.6.0-snapshot ../../pom.xml com.tencent.rss rss-client-spark3 -0.5.0-snapshot +0.6.0-snapshot jar diff --git a/client/pom.xml b/client/pom.xml index e6134ce..1b4e3d7 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -24,12 +24,12 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.6.0-snapshot com.tencent.rss rss-client - 0.5.0-snapshot + 0.6.0-snapshot jar diff --git a/common/pom.xml b/common/pom.xml index b4b65f8..9d6b2df 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -22,7 +22,7 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.6.0-snapshot ../pom.xml diff --git a/coordinator/pom.xml b/coordinator/pom.xml index e860a50..28b5b5c 100644 --- a/coordinator/pom.xml +++ b/coordinator/pom.xml @@ -24,7 +24,7 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.6.0-snapshot ../pom.xml diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml index 2a759a4..179ecb8 100644 --- a/integration-test/common/pom.xml +++ b/integration-test/common/pom.xml @@ -24,13 +24,13 @@ com.tencent.rss rss-main -0.5.0-snapshot +0.6.0-snapshot ../../pom.xml com.tencent.rss rss-integration-common-test -0.5.0-snapshot +0.6.0-snapshot jar diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml index 489ffd5..6ae8a17 100644 --- a/integration-test/mr/pom.xml +++ b/integration-test/mr/pom.xml @@ -22,14 +22,14 @@ rss-main com.tencent.rss -0.5.0-snapshot +0.6.0-snapshot ../../pom.xml 4.0.0 com.tencent.rss rss-integration-mr-test -0.5.0-snapshot +0.6.0-snapshot jar diff --git a/integration-test/spark-common/pom.xml b/integration-test/spark-common/pom.xml index 284ca2b..8f642a5 100644 --- a/integration-test/spark-common/pom.xml +++ b/integration-test/spark-common/pom.xml @@ -23,14 +23,14 @@ rss-main com.tencent.rss -0.5.0
[incubator-uniffle] 06/17: [Bugfix] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit cf731f24ef3f10bb24c57475131c04355c9d7e64 Author: roryqi AuthorDate: Thu Jun 23 09:49:16 2022 +0800 [Bugfix] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) ### What changes were proposed in this pull request? We should aquire the storageType from extraConf. ### Why are the changes needed? If we don't have this patch, MR don't work when we use dynamic conf and MEMORY_LOCALE_HDFS storageType. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test --- .../main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java index 7511104..976b03c 100644 --- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java +++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java @@ -180,7 +180,7 @@ public class RssMRAppMaster extends MRAppMaster { RssMRUtils.applyDynamicClientConf(extraConf, clusterClientConf); } - String storageType = conf.get(RssMRConfig.RSS_STORAGE_TYPE); + String storageType = RssMRUtils.getString(extraConf, conf, RssMRConfig.RSS_STORAGE_TYPE); RemoteStorageInfo defaultRemoteStorage = new RemoteStorageInfo(conf.get(RssMRConfig.RSS_REMOTE_STORAGE_PATH, "")); RemoteStorageInfo remoteStorage = ClientUtils.fetchRemoteStorage(
[incubator-uniffle] branch branch-0.1.0 created (now 36343ec)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch branch-0.1.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 36343ec Upgrade the version to 0.1.0 No new revisions were added by this update.
[incubator-uniffle] 01/02: [Feature] [0.2] Support Spark 3.2 (#88)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.2.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 7f3c44a9a051310e991034162ef53e2835490e71 Author: roryqi AuthorDate: Tue Mar 1 20:33:34 2022 +0800 [Feature] [0.2] Support Spark 3.2 (#88) ### What changes were proposed in this pull request? Support Spark 3.2 ### Why are the changes needed? We need support more Spark Versions ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? GA passed (include profiles spark2,spark3,spark3.0,spark3.1,spark3.2) Co-authored-by: roryqi --- README.md | 2 +- .../spark/shuffle/writer/WriteBufferManager.java | 3 +- .../spark/shuffle/writer/RssShuffleWriter.java | 5 + .../tencent/rss/test/SparkIntegrationTestBase.java | 4 + integration-test/spark3/pom.xml| 2 + pom.xml| 106 - 6 files changed, 119 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a785f47..ac3e92a 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ The shuffle data is stored with index file and data file. Data file has all bloc ![Rss Shuffle_Write](docs/asset/rss_data_format.png) ## Supported Spark Version -Current support Spark 2.3.x, Spark 2.4.x, Spark3.0.x, Spark 3.1.x +Current support Spark 2.3.x, Spark 2.4.x, Spark3.0.x, Spark 3.1.x, Spark 3.2.x Note: To support dynamic allocation, the patch(which is included in client-spark/patch folder) should be applied to Spark diff --git a/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java b/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java index 1b26f0b..91cc6a7 100644 --- a/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java +++ b/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java @@ -28,6 +28,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Maps; import org.apache.spark.executor.ShuffleWriteMetrics; import org.apache.spark.memory.MemoryConsumer; +import org.apache.spark.memory.MemoryMode; import org.apache.spark.memory.TaskMemoryManager; import org.apache.spark.serializer.SerializationStream; import org.apache.spark.serializer.Serializer; @@ -86,7 +87,7 @@ public class WriteBufferManager extends MemoryConsumer { Map> partitionToServers, TaskMemoryManager taskMemoryManager, ShuffleWriteMetrics shuffleWriteMetrics) { -super(taskMemoryManager); +super(taskMemoryManager, taskMemoryManager.pageSizeBytes(), MemoryMode.ON_HEAP); this.bufferSize = bufferManagerOptions.getBufferSize(); this.spillSize = bufferManagerOptions.getBufferSpillThreshold(); this.instance = serializer.newInstance(); diff --git a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java index 2a4beb6..a7e4480 100644 --- a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java +++ b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java @@ -171,6 +171,11 @@ public class RssShuffleWriter extends ShuffleWriter { + bufferManager.getManagerCostInfo()); } + // only push-based shuffle use this interface, but rss won't be used when push-based shuffle is enabled. + public long[] getPartitionLengths() { +return new long[0]; + } + private void processShuffleBlockInfos(List shuffleBlockInfoList, Set blockIds) { if (shuffleBlockInfoList != null && !shuffleBlockInfoList.isEmpty()) { shuffleBlockInfoList.forEach(sbi -> { diff --git a/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java b/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java index 06789d2..1e15ba6 100644 --- a/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java +++ b/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java @@ -21,6 +21,9 @@ package com.tencent.rss.test; import static org.junit.Assert.assertEquals; import java.util.Map; +import java.util.concurrent.TimeUnit; + +import com.google.common.util.concurrent.Uninterruptibles; import org.apache.spark.SparkConf; import org.apache.spark.shuffle.RssClientConfig; import org.apache.spark.sql.SparkSession; @@ -50,6 +53,7 @@ public abstract class SparkIntegrationTestBase extends IntegrationTestBase { Map resultWithoutRss = runSparkApp(sparkConf, fileName); long durationWithoutRss = System.currentT
[incubator-uniffle] branch branch-0.2.0 created (now 75b5376)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch branch-0.2.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git at 75b5376 [Bugfix] Fix uncorrect index file (#92) This branch includes the following new commits: new 7f3c44a [Feature] [0.2] Support Spark 3.2 (#88) new 75b5376 [Bugfix] Fix uncorrect index file (#92) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
[incubator-uniffle] 02/02: [Bugfix] Fix uncorrect index file (#92)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.2.0 in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git commit 75b537661f1a29291f199974c6e7fa1e39197d72 Author: roryqi AuthorDate: Tue Mar 8 16:31:33 2022 +0800 [Bugfix] Fix uncorrect index file (#92) ### What changes were proposed in this pull request? Modify the method that calculate the offset in the index file. ### Why are the changes needed? If we don't have this patch, we run 10TB tpcds, query24a will fail. https://user-images.githubusercontent.com/8159038/157178756-d8a39b3f-0ea6-4864-ac68-ee382a88bb0f.png";> When we write many data to dataOutputStream, dataOutputStream.size() won't increase again. dataOutputStream.size() will always be Integer.MAX_VALUE. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new uts. Co-authored-by: roryqi --- .../rss/storage/handler/impl/LocalFileWriter.java | 6 ++ .../rss/storage/handler/impl/LocalFileHandlerTest.java | 17 + 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java index 10185a4..609db7e 100644 --- a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java +++ b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java @@ -30,21 +30,19 @@ public class LocalFileWriter implements Closeable { private DataOutputStream dataOutputStream; private FileOutputStream fileOutputStream; - private long initSize; private long nextOffset; public LocalFileWriter(File file) throws IOException { fileOutputStream = new FileOutputStream(file, true); // init fsDataOutputStream dataOutputStream = new DataOutputStream(fileOutputStream); -initSize = file.length(); -nextOffset = initSize; +nextOffset = file.length(); } public void writeData(byte[] data) throws IOException { if (data != null && data.length > 0) { dataOutputStream.write(data); - nextOffset = initSize + dataOutputStream.size(); + nextOffset = nextOffset + data.length; } } diff --git a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java index 969944d..ce8915b 100644 --- a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java +++ b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java @@ -39,6 +39,7 @@ import com.tencent.rss.storage.handler.api.ServerReadHandler; import com.tencent.rss.storage.handler.api.ShuffleWriteHandler; import com.tencent.rss.storage.util.ShuffleStorageUtils; import java.io.File; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Random; @@ -53,6 +54,7 @@ public class LocalFileHandlerTest { @Test public void writeTest() throws Exception { File tmpDir = Files.createTempDir(); +tmpDir.deleteOnExit(); File dataDir1 = new File(tmpDir, "data1"); File dataDir2 = new File(tmpDir, "data2"); String[] basePaths = new String[]{dataDir1.getAbsolutePath(), @@ -111,6 +113,21 @@ public class LocalFileHandlerTest { } } + @Test + public void writeBigDataTest() throws IOException { +File tmpDir = Files.createTempDir(); +tmpDir.deleteOnExit(); +File writeFile = new File(tmpDir, "writetest"); +LocalFileWriter writer = new LocalFileWriter(writeFile); +int size = Integer.MAX_VALUE / 100; +byte[] data = new byte[size]; +for (int i = 0; i < 200; i++) { + writer.writeData(data); +} +long totalSize = 200L * size; +assertEquals(writer.nextOffset(), totalSize); + } + private void writeTestData( ShuffleWriteHandler writeHandler,
svn commit: r46186 - /dev/incubator/livy/0.7.1-incubating-rc1/ /release/incubator/livy/0.7.1-incubating/
Author: jshao Date: Fri Feb 19 01:57:45 2021 New Revision: 46186 Log: Livy 0.7.1-incubating release Added: release/incubator/livy/0.7.1-incubating/ - copied from r46185, dev/incubator/livy/0.7.1-incubating-rc1/ Removed: dev/incubator/livy/0.7.1-incubating-rc1/
svn commit: r45798 - /dev/incubator/livy/0.7.1-incubating-rc1/
Author: jshao Date: Thu Feb 4 05:00:19 2021 New Revision: 45798 Log: Apache Livy 0.7.1-incubating-rc1 Added: dev/incubator/livy/0.7.1-incubating-rc1/ dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip (with props) dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc (with props) dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512 dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip (with props) dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc (with props) dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512 Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip == Binary file - no diff available. Propchange: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip -- svn:mime-type = application/zip Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc == Binary file - no diff available. Propchange: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc -- svn:mime-type = application/pgp-signature Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512 == --- dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512 (added) +++ dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512 Thu Feb 4 05:00:19 2021 @@ -0,0 +1,4 @@ +apache-livy-0.7.1-incubating-bin.zip: C4987855 FDCD7220 ABC0FA19 63359019 + 34B2AB6C 76BF54C3 7AF14D97 4FD0BB44 + 05D58AD3 B10C64B8 1E1C0B73 5017822E + 2030CB57 41C232B3 4E492181 E49002A4 Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip == Binary file - no diff available. Propchange: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip -- svn:mime-type = application/zip Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc == Binary file - no diff available. Propchange: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc -- svn:mime-type = application/pgp-signature Added: dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512 == --- dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512 (added) +++ dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512 Thu Feb 4 05:00:19 2021 @@ -0,0 +1,4 @@ +apache-livy-0.7.1-incubating-src.zip: 03E6F489 518930F5 906F793D A88A6DC0 + F9735D87 5BCE0E2F 1818AEAA B1C0150D + EA9FEB69 9690938A FA6C1648 291FC90D + 6A9AF132 D4E88C8B CFF2F327 A9CF8AB1
[incubator-livy] branch branch-0.7 updated: [BUILD] Update version for 0.7.2-incubating-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.7 in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/branch-0.7 by this push: new 972b600 [BUILD] Update version for 0.7.2-incubating-SNAPSHOT 972b600 is described below commit 972b600d72629884140aec315ea925858eb67884 Author: jerryshao AuthorDate: Thu Feb 4 11:00:54 2021 +0800 [BUILD] Update version for 0.7.2-incubating-SNAPSHOT --- api/pom.xml | 4 ++-- assembly/pom.xml | 4 ++-- client-common/pom.xml| 4 ++-- client-http/pom.xml | 4 ++-- core/pom.xml | 4 ++-- core/scala-2.11/pom.xml | 4 ++-- coverage/pom.xml | 4 ++-- examples/pom.xml | 4 ++-- integration-test/pom.xml | 4 ++-- pom.xml | 2 +- python-api/pom.xml | 4 ++-- python-api/setup.py | 2 +- repl/pom.xml | 4 ++-- repl/scala-2.11/pom.xml | 4 ++-- rsc/pom.xml | 2 +- scala-api/pom.xml| 4 ++-- scala-api/scala-2.11/pom.xml | 4 ++-- scala/pom.xml| 4 ++-- server/pom.xml | 4 ++-- test-lib/pom.xml | 4 ++-- thriftserver/client/pom.xml | 2 +- thriftserver/server/pom.xml | 2 +- thriftserver/session/pom.xml | 2 +- 23 files changed, 40 insertions(+), 40 deletions(-) diff --git a/api/pom.xml b/api/pom.xml index fbd8496..e160690 100644 --- a/api/pom.xml +++ b/api/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating +0.7.2-incubating-SNAPSHOT org.apache.livy livy-api - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar diff --git a/assembly/pom.xml b/assembly/pom.xml index 36bb48c..113a704 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating +0.7.2-incubating-SNAPSHOT ../pom.xml livy-assembly - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT pom diff --git a/client-common/pom.xml b/client-common/pom.xml index 3897c2b..540d68d 100644 --- a/client-common/pom.xml +++ b/client-common/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating +0.7.2-incubating-SNAPSHOT org.apache.livy livy-client-common - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar diff --git a/client-http/pom.xml b/client-http/pom.xml index a053d8d..1625a54 100644 --- a/client-http/pom.xml +++ b/client-http/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating +0.7.2-incubating-SNAPSHOT org.apache.livy livy-client-http - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar diff --git a/core/pom.xml b/core/pom.xml index 2b21dec..6c76db4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -22,12 +22,12 @@ org.apache.livy multi-scala-project-root -0.7.1-incubating +0.7.2-incubating-SNAPSHOT ../scala/pom.xml livy-core-parent - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT pom diff --git a/core/scala-2.11/pom.xml b/core/scala-2.11/pom.xml index 5100e19..e703896 100644 --- a/core/scala-2.11/pom.xml +++ b/core/scala-2.11/pom.xml @@ -19,13 +19,13 @@ 4.0.0 org.apache.livy livy-core_2.11 - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar org.apache.livy livy-core-parent -0.7.1-incubating +0.7.2-incubating-SNAPSHOT ../pom.xml diff --git a/coverage/pom.xml b/coverage/pom.xml index 9c23dca..d358671 100644 --- a/coverage/pom.xml +++ b/coverage/pom.xml @@ -23,11 +23,11 @@ org.apache.livy livy-main ../pom.xml -0.7.1-incubating +0.7.2-incubating-SNAPSHOT livy-coverage-report - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT pom diff --git a/examples/pom.xml b/examples/pom.xml index 9692224..7ddc525 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -23,13 +23,13 @@ org.apache.livy livy-main -0.7.1-incubating +0.7.2-incubating-SNAPSHOT ../pom.xml org.apache.livy livy-examples - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar diff --git a/integration-test/pom.xml b/integration-test/pom.xml index 80a9c29..a658b81 100644 --- a/integration-test/pom.xml +++ b/integration-test/pom.xml @@ -23,11 +23,11 @@ org.apache.livy livy-main ../pom.xml -0.7.1-incubating +0.7.2-incubating-SNAPSHOT livy-integration-test - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT jar diff --git a/pom.xml b/pom.xml index 20b1a55..9eb1967 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.apache.livy livy-main - 0.7.1-incubating + 0.7.2-incubating-SNAPSHOT pom Livy Project Parent POM Livy Project diff --git a/python-api/pom.xml b/python-api/pom.xml index 3d7b178..2679ab3 100644 --- a
[incubator-livy] tag v0.7.1-incubating-rc1 created (now 7c3d341)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to tag v0.7.1-incubating-rc1 in repository https://gitbox.apache.org/repos/asf/incubator-livy.git. at 7c3d341 (commit) No new revisions were added by this update.
[incubator-livy] branch branch-0.7 updated: [BUILD] Update version for 0.7.1-incubating
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.7 in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/branch-0.7 by this push: new 7c3d341 [BUILD] Update version for 0.7.1-incubating 7c3d341 is described below commit 7c3d341926db69fb57a4978b15d4e96f06312267 Author: jerryshao AuthorDate: Thu Feb 4 10:31:32 2021 +0800 [BUILD] Update version for 0.7.1-incubating --- api/pom.xml | 4 ++-- assembly/pom.xml | 4 ++-- client-common/pom.xml| 4 ++-- client-http/pom.xml | 4 ++-- core/pom.xml | 4 ++-- core/scala-2.11/pom.xml | 4 ++-- coverage/pom.xml | 4 ++-- examples/pom.xml | 4 ++-- integration-test/pom.xml | 4 ++-- pom.xml | 2 +- python-api/pom.xml | 4 ++-- python-api/setup.py | 2 +- repl/pom.xml | 4 ++-- repl/scala-2.11/pom.xml | 4 ++-- rsc/pom.xml | 2 +- scala-api/pom.xml| 4 ++-- scala-api/scala-2.11/pom.xml | 4 ++-- scala/pom.xml| 4 ++-- server/pom.xml | 4 ++-- test-lib/pom.xml | 4 ++-- thriftserver/client/pom.xml | 2 +- thriftserver/server/pom.xml | 2 +- thriftserver/session/pom.xml | 2 +- 23 files changed, 40 insertions(+), 40 deletions(-) diff --git a/api/pom.xml b/api/pom.xml index 66f175c..fbd8496 100644 --- a/api/pom.xml +++ b/api/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating-SNAPSHOT +0.7.1-incubating org.apache.livy livy-api - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar diff --git a/assembly/pom.xml b/assembly/pom.xml index b94f0da..36bb48c 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating-SNAPSHOT +0.7.1-incubating ../pom.xml livy-assembly - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating pom diff --git a/client-common/pom.xml b/client-common/pom.xml index dac522c..3897c2b 100644 --- a/client-common/pom.xml +++ b/client-common/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating-SNAPSHOT +0.7.1-incubating org.apache.livy livy-client-common - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar diff --git a/client-http/pom.xml b/client-http/pom.xml index ad31b41..a053d8d 100644 --- a/client-http/pom.xml +++ b/client-http/pom.xml @@ -20,12 +20,12 @@ org.apache.livy livy-main -0.7.1-incubating-SNAPSHOT +0.7.1-incubating org.apache.livy livy-client-http - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar diff --git a/core/pom.xml b/core/pom.xml index 5623220..2b21dec 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -22,12 +22,12 @@ org.apache.livy multi-scala-project-root -0.7.1-incubating-SNAPSHOT +0.7.1-incubating ../scala/pom.xml livy-core-parent - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating pom diff --git a/core/scala-2.11/pom.xml b/core/scala-2.11/pom.xml index 041f9c1..5100e19 100644 --- a/core/scala-2.11/pom.xml +++ b/core/scala-2.11/pom.xml @@ -19,13 +19,13 @@ 4.0.0 org.apache.livy livy-core_2.11 - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar org.apache.livy livy-core-parent -0.7.1-incubating-SNAPSHOT +0.7.1-incubating ../pom.xml diff --git a/coverage/pom.xml b/coverage/pom.xml index 6419bc4..9c23dca 100644 --- a/coverage/pom.xml +++ b/coverage/pom.xml @@ -23,11 +23,11 @@ org.apache.livy livy-main ../pom.xml -0.7.1-incubating-SNAPSHOT +0.7.1-incubating livy-coverage-report - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating pom diff --git a/examples/pom.xml b/examples/pom.xml index 1f4aa32..9692224 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -23,13 +23,13 @@ org.apache.livy livy-main -0.7.1-incubating-SNAPSHOT +0.7.1-incubating ../pom.xml org.apache.livy livy-examples - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar diff --git a/integration-test/pom.xml b/integration-test/pom.xml index 9fa230b..80a9c29 100644 --- a/integration-test/pom.xml +++ b/integration-test/pom.xml @@ -23,11 +23,11 @@ org.apache.livy livy-main ../pom.xml -0.7.1-incubating-SNAPSHOT +0.7.1-incubating livy-integration-test - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating jar diff --git a/pom.xml b/pom.xml index 938bdbf..20b1a55 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.apache.livy livy-main - 0.7.1-incubating-SNAPSHOT + 0.7.1-incubating pom Livy Project Parent POM Livy Project diff --git a/python-api/pom.xml b/python-api/pom.xml index 62850c0..3d7b178 100644 --- a/python-api/pom.xml
[incubator-livy] branch branch-0.7 updated: Add html escape to session name
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.7 in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/branch-0.7 by this push: new 9f1ba47 Add html escape to session name 9f1ba47 is described below commit 9f1ba47a2f0d8accc435b133b42c3a76aa9ac846 Author: Marco Gaido AuthorDate: Fri Aug 14 17:25:54 2020 -0700 Add html escape to session name ## What changes were proposed in this pull request? The PR adds HTML escaping to session names. ## How was this patch tested? Manual test. Author: Marco Gaido Closes #302 from mgaido91/escape_html. --- .../org/apache/livy/server/ui/static/js/all-sessions.js| 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js b/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js index 6e35702..d8a84a7 100644 --- a/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js +++ b/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js @@ -15,13 +15,17 @@ * limitations under the License. */ +function escapeHtml(unescapedText) { + return $("").text(unescapedText).html() +} + function loadSessionsTable(sessions) { $.each(sessions, function(index, session) { $("#interactive-sessions .sessions-table-body").append( "" + tdWrap(uiLink("session/" + session.id, session.id)) + tdWrap(appIdLink(session)) + -tdWrap(session.name) + +tdWrap(escapeHtml(session.name)) + tdWrap(session.owner) + tdWrap(session.proxyUser) + tdWrap(session.kind) + @@ -38,7 +42,7 @@ function loadBatchesTable(sessions) { "" + tdWrap(session.id) + tdWrap(appIdLink(session)) + -tdWrap(session.name) + +tdWrap(escapeHtml(session.name)) + tdWrap(session.owner) + tdWrap(session.proxyUser) + tdWrap(session.state) + @@ -79,4 +83,4 @@ $(document).ready(function () { $("#all-sessions").append('No Sessions or Batches have been created yet.'); } }); -}); \ No newline at end of file +});
[incubator-livy] branch master updated: [LIVY-756] Add Spark 3.0 and Scala 2.12 support
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new 97cf2f7 [LIVY-756] Add Spark 3.0 and Scala 2.12 support 97cf2f7 is described below commit 97cf2f75929ef6c152afc468adbead269bd0758f Author: jerryshao AuthorDate: Thu Jul 2 15:44:12 2020 +0800 [LIVY-756] Add Spark 3.0 and Scala 2.12 support ## What changes were proposed in this pull request? This PR is based tprelle 's PR #289 , and address all the left issues in that PR: 1. multi-scala version support in one build (Scala 2.11 and 2.12 support). 2. make SparkR work. Also reverts most of the unnecessary changes. Besides this PR remove the build below 2.4 (2.2, 2.3), since Spark 2.2 and 2.3 only ships with Scala 2.11, hard to maintain multiple version. But user could still use 2.2 and 2.3 without changes. All credits to tprelle. ## How was this patch tested? Run UT and IT with Spark 2.4.5 and 3.0.0 locally. Author: jerryshao Closes #300 from jerryshao/LIVY-756. --- .gitignore | 1 + .rat-excludes | 1 + .travis.yml| 24 +++--- README.md | 4 +- assembly/assembly.xml | 7 ++ assembly/pom.xml | 23 ++ client-common/pom.xml | 2 +- .../org/apache/livy/client/common/Serializer.java | 8 +- {client-common => core/scala-2.12}/pom.xml | 52 ++--- .../org/apache/livy/LivyBaseUnitTestSuite.scala| 4 +- coverage/pom.xml | 35 + .../org/apache/livy/examples/WordCountApp.scala| 2 +- integration-test/pom.xml | 2 +- integration-test/src/test/resources/rtest.R| 9 +-- .../scala/org/apache/livy/test/InteractiveIT.scala | 6 +- .../src/test/spark2/scala/Spark2JobApiIT.scala | 26 +-- pom.xml| 88 +- repl/pom.xml | 3 + repl/scala-2.11/pom.xml| 1 + .../org/apache/livy/repl/SparkInterpreter.scala| 5 +- repl/{scala-2.11 => scala-2.12}/pom.xml| 11 +-- .../org/apache/livy/repl/SparkInterpreter.scala| 17 ++--- .../apache/livy/repl/SparkInterpreterSpec.scala| 68 + .../main/scala/org/apache/livy/repl/Session.scala | 4 +- .../org/apache/livy/repl/SQLInterpreterSpec.scala | 4 +- rsc/pom.xml| 6 +- .../org/apache/livy/rsc/driver/SparkEntries.java | 7 +- .../org/apache/livy/rsc/rpc/KryoMessageCodec.java | 7 -- {repl/scala-2.11 => scala-api/scala-2.12}/pom.xml | 17 ++--- scala-api/src/main/resources/build.marker | 0 .../org/apache/livy/scalaapi/ScalaJobHandle.scala | 8 ++ server/pom.xml | 9 ++- .../org/apache/livy/server/SessionServlet.scala| 2 +- .../server/interactive/InteractiveSession.scala| 6 +- .../org/apache/livy/utils/LivySparkUtils.scala | 4 +- .../apache/livy/server/BaseJsonServletSpec.scala | 3 +- .../apache/livy/server/SessionServletSpec.scala| 2 +- .../livy/server/batch/BatchServletSpec.scala | 2 +- .../livy/server/batch/BatchSessionSpec.scala | 6 +- .../InteractiveSessionServletSpec.scala| 3 +- .../interactive/InteractiveSessionSpec.scala | 2 +- .../livy/server/interactive/JobApiSpec.scala | 2 +- .../server/interactive/SessionHeartbeatSpec.scala | 2 +- .../server/recovery/FileSystemStateStoreSpec.scala | 2 +- .../livy/server/recovery/SessionStoreSpec.scala| 2 +- .../livy/server/recovery/StateStoreSpec.scala | 2 - .../server/recovery/ZooKeeperStateStoreSpec.scala | 2 +- .../apache/livy/sessions/SessionManagerSpec.scala | 2 +- .../apache/livy/utils/LivySparkUtilsSuite.scala| 5 ++ .../org/apache/livy/utils/SparkYarnAppSpec.scala | 2 +- .../org/apache/livy/test/jobs/SQLGetTweets.java| 2 +- .../livy/thriftserver/types/DataTypeUtils.scala| 5 +- .../livy/thriftserver/ThriftServerSuites.scala | 3 +- thriftserver/session/pom.xml | 13 .../thriftserver/session/ColumnBufferTest.java | 16 ++-- 55 files changed, 362 insertions(+), 189 deletions(-) diff --git a/.gitignore b/.gitignore index d46d49f..b1045ea 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ metastore_db/ derby.log dependency-reduced-pom.xml release-staging/ +venv/ # For python setup.py, which pollutes the source dirs. python-api/dist diff --git a/.rat-excludes b/.rat-excludes index ac
[incubator-livy] branch master updated: [MINOR] Modify the description of POST /sessions/{sessionId}/completion
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new ee7fdfc [MINOR] Modify the description of POST /sessions/{sessionId}/completion ee7fdfc is described below commit ee7fdfc45d90c0478dcd446bc8a19a217eebe04d Author: Shingo Furuyama AuthorDate: Thu Mar 26 14:59:21 2020 +0800 [MINOR] Modify the description of POST /sessions/{sessionId}/completion ## What changes were proposed in this pull request? Just modified a description of POST /sessions/{sessionId}/completion in the api-doc. ## How was this patch tested? Since the change is quite small, I didn't test the patch. If I have an instruction, I will follow it. Author: Shingo Furuyama Closes #285 from marblejenka/mod-doc-completion. --- docs/rest-api.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/rest-api.md b/docs/rest-api.md index cca937f..d80e77d 100644 --- a/docs/rest-api.md +++ b/docs/rest-api.md @@ -312,7 +312,7 @@ Cancel the specified statement in this session. ### POST /sessions/{sessionId}/completion -Runs a statement in a session. +Returns code completion candidates for the specified code in the session. Request Body
[incubator-livy] branch master updated: [LIVY-751] Livy server should allow to customize LIVY_CLASSPATH
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new e39d8fe [LIVY-751] Livy server should allow to customize LIVY_CLASSPATH e39d8fe is described below commit e39d8fee43adbddf88acb2e04b470aa14b713785 Author: Shingo Furuyama AuthorDate: Thu Mar 26 14:07:42 2020 +0800 [LIVY-751] Livy server should allow to customize LIVY_CLASSPATH ## What changes were proposed in this pull request? The purpose and background is https://issues.apache.org/jira/browse/LIVY-751 ## How was this patch tested? I tested the following two manually. 1. To confirm there is no degradation, I run 0.7.0-incubating livy server with sources in this PR. I also run an example jobs, and it completed without error. 2. To confirm our workaround works, I build 0.7.0-incubating branch with specifying `-Dhadoop.scope=provided` and sources with this PR. After that, I added `export LIVY_CLASSPATH="$LIVY_HOME/jars/*:$(hadoop classpath)"` in conf/livy-env.sh and boot livy server. I also run an example jobs, and it completed without error. Author: Shingo Furuyama Author: Shingo Furuyama Closes #282 from marblejenka/livy-classpath. --- bin/livy-server | 2 +- conf/livy-env.sh.template | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/livy-server b/bin/livy-server index 8d27d4e..a0e2fb7 100755 --- a/bin/livy-server +++ b/bin/livy-server @@ -90,7 +90,7 @@ start_livy_server() { fi fi - LIVY_CLASSPATH="$LIBDIR/*:$LIVY_CONF_DIR" + LIVY_CLASSPATH="${LIVY_CLASSPATH:-${LIBDIR}/*:${LIVY_CONF_DIR}}" if [ -n "$SPARK_CONF_DIR" ]; then LIVY_CLASSPATH="$LIVY_CLASSPATH:$SPARK_CONF_DIR" diff --git a/conf/livy-env.sh.template b/conf/livy-env.sh.template index 7cba5c3..14f22c3 100644 --- a/conf/livy-env.sh.template +++ b/conf/livy-env.sh.template @@ -30,3 +30,4 @@ # names. (Default: name of the user starting Livy). # - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. (Default: 5.) # - LIVY_NICENESS Niceness of the Livy server process when running in the background. (Default: 0.) +# - LIVY_CLASSPATH Override if the additional classpath is required.
[incubator-livy] branch master updated: [MINOR] Add description of POST /batches
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new d07d103 [MINOR] Add description of POST /batches d07d103 is described below commit d07d103f22941525d3cfa2f07f647e310ffb34a1 Author: Shingo Furuyama AuthorDate: Thu Mar 26 13:55:51 2020 +0800 [MINOR] Add description of POST /batches ## What changes were proposed in this pull request? Just added a description of POST /batches in the api-doc. ## How was this patch tested? Since the change is quite small, I didn't test the patch. If I have an instruction, I will follow it. Author: Shingo Furuyama Closes #283 from marblejenka/add-description. --- docs/rest-api.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/rest-api.md b/docs/rest-api.md index f1ff9b4..cca937f 100644 --- a/docs/rest-api.md +++ b/docs/rest-api.md @@ -389,6 +389,8 @@ Returns all the active batch sessions. ### POST /batches +Creates a new batch session. + Request Body
[incubator-livy] branch master updated (3a26856 -> 06a8d4f)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git. from 3a26856 [LIVY-745] Ensure that a single RSCClientFactory gets loaded. add 06a8d4f [LIVY-748] Add support for running Livy Integration tests against secure external clusters No new revisions were added by this update. Summary of changes: .../apache/livy/client/http/LivyConnection.java| 5 + integration-test/pom.xml | 4 +- .../test/framework/BaseIntegrationTestSuite.scala | 57 ++- .../org/apache/livy/test/framework/Cluster.scala | 44 +++- .../livy/test/framework/ExternalCluster.scala | 103 +++ .../livy/test/framework/LivyRestClient.scala | 113 + .../apache/livy/test/framework/MiniCluster.scala | 60 +++ .../resources/{rtest.R => cluster.spec.template} | 36 --- .../src/test/resources/test_python_api.py | 34 +-- .../test/scala/org/apache/livy/test/BatchIT.scala | 2 +- .../scala/org/apache/livy/test/InteractiveIT.scala | 8 +- .../test/scala/org/apache/livy/test/JobApiIT.scala | 21 +++- .../src/test/spark2/scala/Spark2JobApiIT.scala | 17 +++- pom.xml| 6 +- 14 files changed, 401 insertions(+), 109 deletions(-) create mode 100644 integration-test/src/main/scala/org/apache/livy/test/framework/ExternalCluster.scala copy integration-test/src/test/resources/{rtest.R => cluster.spec.template} (52%)
[incubator-livy] branch branch-0.7 updated: [MINOR] Fix CI breakage in python-api unit tests.
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch branch-0.7 in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/branch-0.7 by this push: new 7867d50 [MINOR] Fix CI breakage in python-api unit tests. 7867d50 is described below commit 7867d5034a27582583c5f96157871554a9172de7 Author: Wing Yew Poon AuthorDate: Tue Feb 4 16:01:52 2020 +0800 [MINOR] Fix CI breakage in python-api unit tests. ## What changes were proposed in this pull request? Freeze python mock library at 3.0.5 to avoid pulling in 4.0.0b1. ## How was this patch tested? Existing unit tests. Author: Wing Yew Poon Closes #279 from wypoon/CI_fix. (cherry picked from commit f4ab5ef5d389d5743410f5839ffc79aea8943c9c) Signed-off-by: jerryshao --- python-api/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-api/setup.py b/python-api/setup.py index 48edcc3..8ea624e 100644 --- a/python-api/setup.py +++ b/python-api/setup.py @@ -32,6 +32,7 @@ requirements = [ 'configparser>=3.5.0', 'future>=0.15.2', 'futures>=3.0.5', +'mock~=3.0.5', 'requests>=2.10.0', 'responses>=0.5.1', 'requests-kerberos>=0.11.0',
[incubator-livy] branch master updated: [MINOR] Fix CI breakage in python-api unit tests.
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new f4ab5ef [MINOR] Fix CI breakage in python-api unit tests. f4ab5ef is described below commit f4ab5ef5d389d5743410f5839ffc79aea8943c9c Author: Wing Yew Poon AuthorDate: Tue Feb 4 16:01:52 2020 +0800 [MINOR] Fix CI breakage in python-api unit tests. ## What changes were proposed in this pull request? Freeze python mock library at 3.0.5 to avoid pulling in 4.0.0b1. ## How was this patch tested? Existing unit tests. Author: Wing Yew Poon Closes #279 from wypoon/CI_fix. --- python-api/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python-api/setup.py b/python-api/setup.py index 3eeb323..709ff1a 100644 --- a/python-api/setup.py +++ b/python-api/setup.py @@ -32,6 +32,7 @@ requirements = [ 'configparser>=3.5.0', 'future>=0.15.2', 'futures>=3.0.5', +'mock~=3.0.5', 'requests>=2.10.0', 'responses>=0.5.1', 'requests-kerberos>=0.11.0',
[incubator-livy-website] branch master updated: Update website for 0.7.0 release
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy-website.git The following commit(s) were added to refs/heads/master by this push: new 8e7efe2 Update website for 0.7.0 release 8e7efe2 is described below commit 8e7efe289d9ec6a8e248a55e751562e2db23fa84 Author: jerryshao AuthorDate: Sun Feb 2 17:32:21 2020 +0800 Update website for 0.7.0 release --- site/_data/project.yml | 2 +- site/history.md| 18 ++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/site/_data/project.yml b/site/_data/project.yml index 292cc79..1c7aa92 100644 --- a/site/_data/project.yml +++ b/site/_data/project.yml @@ -24,7 +24,7 @@ incubator_slash_name: incubator/livy description: A REST Service for Apache Spark download: download -latest_release: 0.6.0-incubating +latest_release: 0.7.0-incubating dev_list: d...@livy.incubator.apache.org dev_list_subscribe: dev-subscr...@livy.incubator.apache.org diff --git a/site/history.md b/site/history.md index 26583ed..1515bf0 100644 --- a/site/history.md +++ b/site/history.md @@ -32,6 +32,24 @@ For a full list of releases, see Downloads are available on the [downloads page]({{ site.baseurl }}/download). +## https://github.com/apache/{{ site.data.project.incubator_name }}/releases/tag/v0.7.0-incubating">0.7.0-incubating / 2020-02-02 +{: #v0-7-0-incubating} + +New features + +* Livy 0.7.0 now requires Java 8, Scala 2.11 and Spark >= 2.2.0. With 0.7.0, JDBC/ODBC feature now becomes GA. + +* Added support for all current versions of Spark (2.2.x to 2.4.x). + +* [https://issues.apache.org/jira/browse/LIVY-575";>LIVY-575] + Hive-compatible JDBC / ODBC server GA. + +* [https://issues.apache.org/jira/browse/LIVY-678";>LIVY-678] + Add LDAP authorization support for REST, JDBC interface. + +* With various bugs fixed, details can be checked [https://issues.apache.org/jira/projects/LIVY/versions/12345179";>here]. + + ## https://github.com/apache/{{ site.data.project.incubator_name }}/releases/tag/v0.6.0-incubating">0.6.0-incubating / 2019-04-01 {: #v0-6-0-incubating}
[incubator-livy-website] branch asf-site updated: Livy 0.7.0 release website
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/incubator-livy-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 1ee935a Livy 0.7.0 release website 1ee935a is described below commit 1ee935a40dacc45421efb20917e4f44f518ffa86 Author: jerryshao AuthorDate: Sun Feb 2 19:04:58 2020 +0800 Livy 0.7.0 release website --- content/community-members/index.html | 2 +- content/community/index.html | 2 +- content/download/index.html | 18 content/examples/index.html | 2 +- content/feed.xml | 4 ++-- content/get-started/index.html | 2 +- content/history/index.html | 26 +++- content/index.html | 4 ++-- content/news/2017/09/01/release-0.4.0/index.html | 2 +- content/news/2018/02/05/release-0.5.0/index.html | 2 +- content/release-process/index.html | 2 +- content/third-party-projects/index.html | 2 +- 12 files changed, 46 insertions(+), 22 deletions(-) diff --git a/content/community-members/index.html b/content/community-members/index.html index c6996e5..28840eb 100644 --- a/content/community-members/index.html +++ b/content/community-members/index.html @@ -270,7 +270,7 @@ - + diff --git a/content/community/index.html b/content/community/index.html index 8cd28a5..f5e518d 100644 --- a/content/community/index.html +++ b/content/community/index.html @@ -250,7 +250,7 @@ the JIRA in your pull request. - + diff --git a/content/download/index.html b/content/download/index.html index 430c882..8ff5859 100644 --- a/content/download/index.html +++ b/content/download/index.html @@ -172,16 +172,16 @@ -Apache Livy 0.6.0-incubating (zip) -https://www.apache.org/dyn/closer.lua/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip";>zip -https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip.sha512";>SHA-512 -https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip.asc";>ASC +Apache Livy 0.7.0-incubating (zip) +https://www.apache.org/dyn/closer.lua/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip";>zip +https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip.sha512";>SHA-512 +https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip.asc";>ASC -Apache Livy 0.6.0-incubating (source zip) -https://www.apache.org/dyn/closer.lua/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip";>zip -https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip.sha512";>SHA-512 -https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip.asc";>ASC +Apache Livy 0.7.0-incubating (source zip) +https://www.apache.org/dyn/closer.lua/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip";>zip +https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip.sha512";>SHA-512 +https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip.asc";>ASC @@ -220,7 +220,7 @@ succeed. - + diff --git a/content/examples/index.html b/content/examples/index.html index c50c22c..db8c336 100644 --- a/content/examples/index.html +++ b/content/examples/index.html @@ -343,7 +343,7 @@ Pi. This is from the https://spark.apache.org/examples.html";>Spark Exam - + diff --git a/content/feed.xml b/content/feed.xml index 19e4d93..eab24ab 100644 --- a/content/feed.xml +++ b/content/feed.xml @@ -5,8 +5,8 @@ / -Wed, 03 Apr 2019 09:51:26 -0700 -Wed, 03 Apr 2019 09:51:26 -0700 +Sun, 02 Feb 2020 17:45:09 +0800 +Sun, 02 Feb 2020 17:45:09 +0800 Jekyll v3.4.5 diff --git a/content/get-started/index.html b/content/get-started/index.html index e1185de..fc2f911 100644 --- a/content/get-started/index.html +++ b/content/get-started/index.html @@ -213,7 +213,7 @@ or you can check out the API documentation: - + diff --git a/content/histor
[incubator-livy] tag v0.7.0-incubating created (now 6645033)
This is an automated email from the ASF dual-hosted git repository. jshao pushed a change to tag v0.7.0-incubating in repository https://gitbox.apache.org/repos/asf/incubator-livy.git. at 6645033 (commit) No new revisions were added by this update.
svn commit: r37830 - /dev/incubator/livy/0.7.0-incubating-rc4/ /release/incubator/livy/0.7.0-incubating/
Author: jshao Date: Sun Feb 2 02:22:45 2020 New Revision: 37830 Log: Release Apache Livy 0.7.0-incubating Added: release/incubator/livy/0.7.0-incubating/ - copied from r37829, dev/incubator/livy/0.7.0-incubating-rc4/ Removed: dev/incubator/livy/0.7.0-incubating-rc4/
[incubator-livy] branch master updated: [LIVY-735][RSC] Fix rpc channel closed when multi clients connect to one driver
This is an automated email from the ASF dual-hosted git repository. jshao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-livy.git The following commit(s) were added to refs/heads/master by this push: new 66b5833 [LIVY-735][RSC] Fix rpc channel closed when multi clients connect to one driver 66b5833 is described below commit 66b5833e413bc10e39e3b92b585f496444c147d4 Author: runzhiwang AuthorDate: Wed Jan 8 17:15:04 2020 +0800 [LIVY-735][RSC] Fix rpc channel closed when multi clients connect to one driver ## What changes were proposed in this pull request? Currently, the driver tries to support communicating with multi-clients, by registering each client at https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java#L220. But actually, if multi-clients connect to one driver, the rpc channel will close, the reason are as follows. 1. In every communication, client sends two packages to driver: header{type, id}, and payload at https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L144. 2. If client1 sends header1, payload1, and client2 sends header2, payload2 at the same time. The driver receives the package in the order: header1, header2, payload1, payload2. 3. When driver receives header1, driver assigns lastHeader at https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L73. 4. Then driver receives header2, driver process it as a payload at https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L78 which cause exception and rpc channel closed. In the muti-active HA mode, the design doc is at: https://docs.google.com/document/d/1bD3qYZpw14_NuCcSGUOfqQ0pqvSbCQsOLFuZp26Ohjc/edit?usp=sharing, the session is allocated among servers by consistent hashing. If a new livy joins, some session will be migrated from old livy to new livy. If the session client in new livy connect to driver before stoping session client in old livy, then two session clients will both connect to driver, and rpc channel close. In this case, it's hard to e [...] How to fix: 1. Move the code of processing client message from `RpcDispatcher` to each `Rpc`. 2. Each `Rpc` registers itself to `channelRpc` in RpcDispatcher. 3. `RpcDispatcher` dispatches each message to `Rpc` according to `ctx.channel()`. ## How was this patch tested? Existed UT and IT Author: runzhiwang Closes #268 from runzhiwang/multi-client-one-driver. --- .../java/org/apache/livy/rsc/driver/RSCDriver.java | 1 + rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java | 185 - .../org/apache/livy/rsc/rpc/RpcDispatcher.java | 167 ++- 3 files changed, 196 insertions(+), 157 deletions(-) diff --git a/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java b/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java index 0d8eec5..a8f31f7 100644 --- a/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java +++ b/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java @@ -224,6 +224,7 @@ public class RSCDriver extends BaseProtocol { @Override public void onSuccess(Void unused) { clients.remove(client); +client.unRegisterRpc(); if (!inShutdown.get()) { setupIdleTimeout(); } diff --git a/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java b/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java index 868dc6d..5fce164 100644 --- a/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java +++ b/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java @@ -19,10 +19,11 @@ package org.apache.livy.rsc.rpc; import java.io.Closeable; import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.LinkedList; -import java.util.Map; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; @@ -208,6 +209,7 @@ public class Rpc implements Closeable { dispatcher); Rpc rpc = new Rpc(new RSCConf(null), c, ImmediateEventExecutor.INSTANCE); rpc.dispatcher = dispatcher; +dispatcher.registerRpc(c, rpc); return rpc; } @@ -218,6 +220,10 @@ public class Rpc implements Closeable { private final EventExecutorGroup egroup; private volatile RpcDispatcher dispatcher; + private final Map, Method> handlers = new ConcurrentHashMap<>(); + private final Collection rpcCalls = new Concurr