(gravitino) branch main updated: [#4132] feat(bundled-catalog): remove all propertiesMeta from bundled catalog (#4178)

2024-07-22 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new b75d58da7 [#4132] feat(bundled-catalog): remove all propertiesMeta 
from bundled catalog (#4178)
b75d58da7 is described below

commit b75d58da76cfd62e4524645b7bff5fd5a736ad04
Author: FANNG 
AuthorDate: Tue Jul 23 14:54:34 2024 +0800

[#4132] feat(bundled-catalog): remove all propertiesMeta from bundled 
catalog (#4178)

### What changes were proposed in this pull request?
- remove all propertiesMeta from bundled catalog
- rename `bundled-catalog` to `catalog-common`

### Why are the changes needed?

Fix: #4132

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
existing tests
---
 catalogs/bundled-catalog/build.gradle.kts  | 100 ---
 .../gravitino/catalog/common/ClassProvider.java|  85 -
 .../build.gradle.kts}  |  25 +---
 .../gravitino/catalog/hive/HiveConstants.java  |  48 
 .../catalog/hive/HiveStorageConstants.java |  63 ++
 .../gravitino/catalog/hive/StorageFormat.java  |  83 +
 .../apache/gravitino/catalog/hive/TableType.java}  |  27 ++---
 .../gravitino/catalog/jdbc/JdbcConstants.java} |  21 +---
 .../lakehouse/iceberg/IcebergConstants.java|   0
 .../gravitino/catalog/mysql/MysqlConstants.java}   |  26 +---
 .../catalog/property/PropertyConverter.java|   0
 catalogs/catalog-hive/build.gradle.kts |   1 +
 .../catalog/hive/HiveCatalogOperations.java|   1 -
 .../catalog/hive/HiveCatalogPropertiesMeta.java|  18 +--
 .../catalog/hive/HiveSchemaPropertiesMetadata.java |   2 +-
 .../apache/gravitino/catalog/hive/HiveTable.java   |   9 +-
 .../catalog/hive/HiveTablePropertiesMetadata.java  | 135 +++--
 .../hive/integration/test/CatalogHiveIT.java   |  22 ++--
 catalogs/catalog-jdbc-common/build.gradle.kts  |   1 +
 .../catalog/jdbc/JdbcTablePropertiesMetadata.java  |   2 +-
 catalogs/catalog-jdbc-mysql/build.gradle.kts   |   1 +
 .../mysql/MysqlTablePropertiesMetadata.java|  10 +-
 .../catalog-lakehouse-iceberg/build.gradle.kts |   2 +-
 flink-connector/build.gradle.kts   |   2 +-
 .../connector/hive/HivePropertiesConverter.java|  14 +--
 .../hive/TestHivePropertiesConverter.java  |   5 +-
 .../connector/integration/test/FlinkCommonIT.java  |   4 +-
 .../integration/test/hive/FlinkHiveCatalogIT.java  |   6 +-
 settings.gradle.kts|   2 +-
 spark-connector/spark-common/build.gradle.kts  |   2 +-
 .../connector/hive/HivePropertiesConstants.java|  47 ---
 .../connector/hive/HivePropertiesConverter.java|   5 +-
 spark-connector/v3.3/spark/build.gradle.kts|   1 +
 spark-connector/v3.4/spark/build.gradle.kts|   1 +
 spark-connector/v3.5/spark/build.gradle.kts|   1 +
 trino-connector/build.gradle.kts   |   2 +-
 .../catalog/hive/HiveSchemaPropertyConverter.java  |   4 +-
 .../catalog/hive/HiveTablePropertyConverter.java   |  26 ++--
 .../jdbc/mysql/MySQLTablePropertyConverter.java|   7 +-
 .../hive/TestHiveCatalogPropertyConverter.java |  16 ---
 40 files changed, 329 insertions(+), 498 deletions(-)

diff --git a/catalogs/bundled-catalog/build.gradle.kts 
b/catalogs/bundled-catalog/build.gradle.kts
deleted file mode 100644
index a9cc1141b..0
--- a/catalogs/bundled-catalog/build.gradle.kts
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar
-
-plugins {
-  id("java")
-  alias(libs.plugins.shadow)
-}
-
-dependencies {
-  implementation(project(":catalogs:catalog-hive"))
-  implementation(project(":catalogs:catalog-jdbc-common"))
-  implementation(project(":catalogs:catalog-jdbc-mysql"))
-  implementation(project(":catalogs:catalo

(gravitino) branch main updated: [#4143] improvment(core): Optimize the privileges of access control (#4214)

2024-07-22 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new cf3293523 [#4143] improvment(core): Optimize the privileges of access 
control (#4214)
cf3293523 is described below

commit cf329352398d19fd1c1645522d9bfd909b9ccffc
Author: roryqi 
AuthorDate: Tue Jul 23 14:40:09 2024 +0800

[#4143] improvment(core): Optimize the privileges of access control (#4214)

### What changes were proposed in this pull request?

Optimize the privileges of access control

### Why are the changes needed?

Fix: #4143

### Does this PR introduce _any_ user-facing change?
No need.

### How was this patch tested?
Exsiting tests.
---
 .../apache/gravitino/authorization/Privilege.java  |  28 +--
 .../apache/gravitino/authorization/Privileges.java | 249 -
 2 files changed, 46 insertions(+), 231 deletions(-)

diff --git 
a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java 
b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
index 3527ead1b..8ec9bb6a2 100644
--- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
+++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
@@ -67,28 +67,14 @@ public interface Privilege {
 PRODUCE_TOPIC(0L, 1L << 12),
 /** The privilege to consume from a topic. */
 CONSUME_TOPIC(0L, 1L << 13),
-/** The privilege to add a user */
-ADD_USER(0L, 1L << 14),
-/** The privilege to remove a user */
-REMOVE_USER(0L, 1L << 15),
-/** The privilege to get a user */
-GET_USER(0L, 1L << 16),
-/** The privilege to add a group */
-ADD_GROUP(0L, 1L << 17),
-/** The privilege to remove a group */
-REMOVE_GROUP(0L, 1L << 18),
-/** The privilege to get a group */
-GET_GROUP(0L, 1L << 19),
+/** The privilege to create a user */
+CREATE_USER(0L, 1L << 14),
+/** The privilege to create a group */
+CREATE_GROUP(0L, 1L << 15),
 /** The privilege to create a role */
-CREATE_ROLE(0L, 1L << 20),
-/** The privilege to delete a role */
-DELETE_ROLE(0L, 1L << 21),
-/** The privilege to grant a role to the user or the group. */
-GRANT_ROLE(0L, 1L << 22),
-/** The privilege to revoke a role from the user or the group. */
-REVOKE_ROLE(0L, 1L << 23),
-/** The privilege to get a role */
-GET_ROLE(0L, 1L << 24);
+CREATE_ROLE(0L, 1L << 16),
+/** The privilege to grant or revoke a role for the user or the group. */
+MANAGE_GRANTS(0L, 1L << 17);
 
 private final long highBits;
 private final long lowBits;
diff --git 
a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java 
b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java
index 07a745760..6947ced25 100644
--- a/api/src/main/java/org/apache/gravitino/authorization/Privileges.java
+++ b/api/src/main/java/org/apache/gravitino/authorization/Privileges.java
@@ -79,32 +79,18 @@ public class Privileges {
 return ConsumeTopic.allow();
 
 // User
-  case ADD_USER:
-return AddUser.allow();
-  case REMOVE_USER:
-return RemoveUser.allow();
-  case GET_USER:
-return GetUser.allow();
+  case CREATE_USER:
+return CreateUser.allow();
 
 // Group
-  case ADD_GROUP:
-return AddGroup.allow();
-  case REMOVE_GROUP:
-return RemoveGroup.allow();
-  case GET_GROUP:
-return GetGroup.allow();
+  case CREATE_GROUP:
+return CreateGroup.allow();
 
 // Role
   case CREATE_ROLE:
 return CreateRole.allow();
-  case DELETE_ROLE:
-return DeleteRole.allow();
-  case GRANT_ROLE:
-return GrantRole.allow();
-  case REVOKE_ROLE:
-return RevokeRole.allow();
-  case GET_ROLE:
-return GetRole.allow();
+  case MANAGE_GRANTS:
+return ManageGrants.allow();
 
   default:
 throw new IllegalArgumentException("Doesn't support the privilege: " + 
name);
@@ -167,32 +153,18 @@ public class Privileges {
 return ConsumeTopic.deny();
 
 // User
-  case ADD_USER:
-return AddUser.deny();
-  case REMOVE_USER:
-return RemoveUser.deny();
-  case GET_USER:
-return GetUser.deny();
+  case CREATE_USER:
+return CreateUser.deny();
 
 // Group
-  case ADD_GROUP:
-return AddGroup.deny();
-  case REMOVE_GROUP:
-return RemoveGroup.deny();
-  case GET_GROUP:
-return GetGroup.deny();
+  case CREATE_GROUP:
+return CreateGroup.deny();
 
 // Role
   case CREATE_ROLE:
 return CreateRole.deny(

(gravitino) branch main updated: [#3914] feat(server): Add REST server interface for Tag System (#3943)

2024-07-22 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 71e6651e2 [#3914] feat(server): Add REST server interface for Tag 
System (#3943)
71e6651e2 is described below

commit 71e6651e2bc7359a9c9b694fa7275871e4e11969
Author: Jerry Shao 
AuthorDate: Mon Jul 22 20:48:52 2024 +0800

[#3914] feat(server): Add REST server interface for Tag System (#3943)

### What changes were proposed in this pull request?

This PR proposes to add REST server interface for Tag System

### Why are the changes needed?

This is a part of work for Tag system.

Fix: #3914

### Does this PR introduce _any_ user-facing change?

Yes

### How was this patch tested?

UTs added.

-

Co-authored-by: bknbkn <67318028+bkn...@users.noreply.github.com>
Co-authored-by: Dev Parikh <51128342+dev79...@users.noreply.github.com>
Co-authored-by: roryqi 
Co-authored-by: JinsYin 
Co-authored-by: rqyin 
---
 .../java/org/apache/gravitino/MetadataObjects.java |   41 +
 .../gravitino/client/ObjectMapperProvider.java |4 +-
 .../gravitino/dto/requests/TagCreateRequest.java   |   76 ++
 .../gravitino/dto/requests/TagUpdateRequest.java   |  201 
 .../gravitino/dto/requests/TagUpdatesRequest.java  |   57 +
 .../dto/requests/TagsAssociateRequest.java |   82 ++
 .../dto/responses/MetadataObjectListResponse.java  |   73 ++
 .../gravitino/dto/responses/NameListResponse.java  |   65 ++
 .../gravitino/dto/responses/TagListResponse.java   |   65 ++
 .../gravitino/dto/responses/TagResponse.java   |   62 ++
 .../gravitino/dto/tag/MetadataObjectDTO.java   |  124 +++
 .../java/org/apache/gravitino/dto/tag/TagDTO.java  |  147 +++
 .../apache/gravitino/dto/util/DTOConverters.java   |   38 +
 .../java/org/apache/gravitino/json/JsonUtils.java  |7 +-
 .../dto/requests/TestTagCreateRequest.java |   49 +
 .../dto/requests/TestTagUpdatesRequest.java|   91 ++
 .../gravitino/dto/responses/TestResponses.java |   57 +
 .../gravitino/dto/tag/TestMetadataObjectDTO.java   |  150 +++
 .../org/apache/gravitino/dto/tag/TestTagDTO.java   |  103 ++
 .../gravitino/tag/SupportsTagOperations.java   |1 -
 .../java/org/apache/gravitino/tag/TagManager.java  |7 +-
 .../apache/gravitino/server/GravitinoServer.java   |3 +-
 .../gravitino/server/web/ObjectMapperProvider.java |4 +-
 .../server/web/rest/ExceptionHandlers.java |   42 +
 .../gravitino/server/web/rest/OperationType.java   |3 +-
 .../gravitino/server/web/rest/TagOperations.java   |  451 
 .../server/web/rest/TestTagOperations.java | 1099 
 27 files changed, 3093 insertions(+), 9 deletions(-)

diff --git a/api/src/main/java/org/apache/gravitino/MetadataObjects.java 
b/api/src/main/java/org/apache/gravitino/MetadataObjects.java
index 5136164c9..6bd72137e 100644
--- a/api/src/main/java/org/apache/gravitino/MetadataObjects.java
+++ b/api/src/main/java/org/apache/gravitino/MetadataObjects.java
@@ -22,6 +22,7 @@ import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import java.util.List;
+import javax.annotation.Nullable;
 import org.apache.commons.lang3.StringUtils;
 
 /** The helper class for {@link MetadataObject}. */
@@ -94,6 +95,46 @@ public class MetadataObjects {
 return new MetadataObjectImpl(getParentFullName(names), 
getLastName(names), type);
   }
 
+  /**
+   * Get the parent metadata object of the given metadata object.
+   *
+   * @param object The metadata object
+   * @return The parent metadata object if it exists, otherwise null
+   */
+  @Nullable
+  public static MetadataObject parent(MetadataObject object) {
+if (object == null) {
+  return null;
+}
+
+// Return null if the object is the root object
+if (object.type() == MetadataObject.Type.METALAKE
+|| object.type() == MetadataObject.Type.CATALOG) {
+  return null;
+}
+
+MetadataObject.Type parentType;
+switch (object.type()) {
+  case COLUMN:
+parentType = MetadataObject.Type.TABLE;
+break;
+  case TABLE:
+  case FILESET:
+  case TOPIC:
+parentType = MetadataObject.Type.SCHEMA;
+break;
+  case SCHEMA:
+parentType = MetadataObject.Type.CATALOG;
+break;
+
+  default:
+throw new IllegalArgumentException(
+"Unexpected to reach here for metadata object type: " + 
object.type());
+}
+
+return parse(object.parent(), parentType);
+  }
+
   /**
* Parse the metadata object with the given full name and type.
*
diff --git 
a/clients/client-java/src/main/java/org/apache/gravitino/client/ObjectMapperProvider.java
 
b/c

(gravitino) branch main updated: [#4197] improvement(common): CatalogListResponse should implement the method validate (#4223)

2024-07-22 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 1e8511ae6 [#4197] improvement(common): CatalogListResponse should 
implement the method validate (#4223)
1e8511ae6 is described below

commit 1e8511ae63df75545c4804d94eda2120ed8cfffc
Author: jingjia88 <32607481+jingji...@users.noreply.github.com>
AuthorDate: Mon Jul 22 18:20:48 2024 +0800

[#4197] improvement(common): CatalogListResponse should implement the 
method validate (#4223)

### What changes were proposed in this pull request?

Add method `validate` in CatalogListResponse

### Why are the changes needed?

Fix: #4197

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add the new ut.
---
 .../dto/responses/CatalogListResponse.java | 26 ++
 .../gravitino/dto/responses/TestResponses.java | 22 ++
 2 files changed, 48 insertions(+)

diff --git 
a/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java
 
b/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java
index dfb31417a..6d069ae46 100644
--- 
a/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java
+++ 
b/common/src/main/java/org/apache/gravitino/dto/responses/CatalogListResponse.java
@@ -19,9 +19,12 @@
 package org.apache.gravitino.dto.responses;
 
 import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import java.util.Arrays;
 import lombok.EqualsAndHashCode;
 import lombok.Getter;
 import lombok.ToString;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.gravitino.dto.CatalogDTO;
 
 /** Represents a response for a list of catalogs with their information. */
@@ -51,4 +54,27 @@ public class CatalogListResponse extends BaseResponse {
 super();
 this.catalogs = null;
   }
+
+  /**
+   * Validates the response data.
+   *
+   * @throws IllegalArgumentException if name, type or audit information is 
not set.
+   */
+  @Override
+  public void validate() throws IllegalArgumentException {
+super.validate();
+
+Preconditions.checkArgument(catalogs != null, "catalogs must be non-null");
+Arrays.stream(catalogs)
+.forEach(
+catalog -> {
+  Preconditions.checkArgument(
+  StringUtils.isNotBlank(catalog.name()),
+  "catalog 'name' must not be null and empty");
+  Preconditions.checkArgument(
+  catalog.type() != null, "catalog 'type' must not be null");
+  Preconditions.checkArgument(
+  catalog.auditInfo() != null, "catalog 'audit' must not be 
null");
+});
+  }
 }
diff --git 
a/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java 
b/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java
index 8d7c52808..7f9ebfeb7 100644
--- a/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java
+++ b/common/src/test/java/org/apache/gravitino/dto/responses/TestResponses.java
@@ -146,6 +146,28 @@ public class TestResponses {
 assertThrows(IllegalArgumentException.class, () -> catalog.validate());
   }
 
+  @Test
+  void testCatalogListResponse() throws IllegalArgumentException {
+AuditDTO audit =
+
AuditDTO.builder().withCreator("creator").withCreateTime(Instant.now()).build();
+CatalogDTO catalog =
+CatalogDTO.builder()
+.withName("CatalogA")
+.withComment("comment")
+.withType(Catalog.Type.RELATIONAL)
+.withProvider("test")
+.withAudit(audit)
+.build();
+CatalogListResponse response = new CatalogListResponse(new CatalogDTO[] 
{catalog});
+response.validate(); // No exception thrown
+  }
+
+  @Test
+  void testCatalogListException() throws IllegalArgumentException {
+CatalogListResponse response = new CatalogListResponse();
+assertThrows(IllegalArgumentException.class, () -> response.validate());
+  }
+
   @Test
   void testSchemaResponse() throws IllegalArgumentException {
 AuditDTO audit =



(gravitino) branch main updated: [#4140] improvement(core): Optimize the privileges of securable objects (#4141)

2024-07-19 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new f3abe9d6f [#4140] improvement(core): Optimize the privileges of 
securable objects (#4141)
f3abe9d6f is described below

commit f3abe9d6ff1b418515f6d68758003fa0af38b5e0
Author: roryqi 
AuthorDate: Fri Jul 19 18:20:38 2024 +0800

[#4140] improvement(core): Optimize the privileges of securable objects 
(#4141)

### What changes were proposed in this pull request?
Optimize the privileges of securable objects

### Why are the changes needed?

Fix: #4140

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing tests.
---
 .../apache/gravitino/authorization/Privilege.java  |  68 ++---
 .../apache/gravitino/authorization/Privileges.java | 278 -
 .../authorization/TestSecurableObjects.java|  14 +-
 .../gravitino/proto/TestEntityProtoSerDe.java  |   2 +-
 .../relational/service/TestRoleMetaService.java|   2 +-
 .../relational/service/TestSecurableObjects.java   |   6 +-
 .../server/web/rest/TestRoleOperations.java|  14 +-
 7 files changed, 100 insertions(+), 284 deletions(-)

diff --git 
a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java 
b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
index 5cb7b3214..3527ead1b 100644
--- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
+++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
@@ -43,66 +43,52 @@ public interface Privilege {
   enum Name {
 /** The privilege to create a catalog. */
 CREATE_CATALOG(0L, 1L),
-/** The privilege to drop a catalog. */
-DROP_CATALOG(0L, 1L << 1),
-/** The privilege to alter a catalog. */
-ALTER_CATALOG(0L, 1L << 2),
 /** The privilege to use a catalog. */
-USE_CATALOG(0L, 1L << 3),
+USE_CATALOG(0L, 1L << 2),
 /** The privilege to create a schema. */
-CREATE_SCHEMA(0L, 1L << 4),
-/** The privilege to drop a schema. */
-DROP_SCHEMA(0L, 1L << 5),
-/** The privilege to alter a schema. */
-ALTER_SCHEMA(0L, 1L << 6),
+CREATE_SCHEMA(0L, 1L << 3),
 /** the privilege to use a schema. */
-USE_SCHEMA(0L, 1L << 7),
+USE_SCHEMA(0L, 1L << 4),
 /** The privilege to create a table. */
-CREATE_TABLE(0L, 1L << 8),
-/** The privilege to drop a table. */
-DROP_TABLE(0L, 1L << 9),
-/** The privilege to write a table. */
-WRITE_TABLE(0L, 1L << 10),
-/** The privilege to read a table. */
-READ_TABLE(0L, 1L << 11),
+CREATE_TABLE(0L, 1L << 5),
+/** The privilege to execute SQL `ALTER`, `INSERT`, `UPDATE`, or `DELETE` 
for a table. */
+MODIFY_TABLE(0L, 1L << 6),
+/** The privilege to select data from a table. */
+SELECT_TABLE(0L, 1L << 7),
 /** The privilege to create a fileset. */
-CREATE_FILESET(0L, 1L << 12),
-/** The privilege to drop a fileset. */
-DROP_FILESET(0L, 1L << 13),
+CREATE_FILESET(0L, 1L << 8),
 /** The privilege to write a fileset. */
-WRITE_FILESET(0L, 1L << 14),
+WRITE_FILESET(0L, 1L << 9),
 /** The privilege to read a fileset. */
-READ_FILESET(0L, 1L << 15),
+READ_FILESET(0L, 1L << 10),
 /** The privilege to create a topic. */
-CREATE_TOPIC(0L, 1L << 16),
-/** The privilege to drop a topic. */
-DROP_TOPIC(0L, 1L << 17),
-/** The privilege to write a topic. */
-WRITE_TOPIC(0L, 1L << 18),
-/** The privilege to read a topic. */
-READ_TOPIC(0L, 1L << 19),
+CREATE_TOPIC(0L, 1L << 11),
+/** The privilege to produce to a topic. */
+PRODUCE_TOPIC(0L, 1L << 12),
+/** The privilege to consume from a topic. */
+CONSUME_TOPIC(0L, 1L << 13),
 /** The privilege to add a user */
-ADD_USER(0L, 1L << 20),
+ADD_USER(0L, 1L << 14),
 /** The privilege to remove a user */
-REMOVE_USER(0L, 1L << 21),
+REMOVE_USER(0L, 1L << 15),
 /** The privilege to get a user */
-GET_USER(0L, 1L << 22),
+GET_USER(0L, 1L << 16),
 /** The privilege to add a group */
-ADD_GROUP(0L, 1L << 23),
+ADD_GROUP(0L, 1L << 17),
 /** The privilege to remove a group */
-REMOVE_GROUP(0L, 1L << 24),
+REMOVE_GROUP(0L, 1L << 18),
 /** The privilege to get a group */
-GET_GROUP(0L, 1L << 25),
+GET_GROUP(0L, 1L << 19),
 /** The privilege to create a role */
-CREATE_ROLE(0L, 1L << 26),
+CREATE_ROLE(0L, 1L << 20),
 /** The privilege to delete a role */
-DELETE_ROLE(0

(gravitino) branch main updated: [#4195] improvement(core): Decouple `OperationDispatcher` from `NormalizeDispatcher` (#4196)

2024-07-19 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new d6721459d [#4195] improvement(core): Decouple `OperationDispatcher` 
from `NormalizeDispatcher` (#4196)
d6721459d is described below

commit d6721459d499d1237cfcb5b0527ed5252e2462ce
Author: mchades 
AuthorDate: Fri Jul 19 18:06:44 2024 +0800

[#4195] improvement(core): Decouple `OperationDispatcher` from 
`NormalizeDispatcher` (#4196)

### What changes were proposed in this pull request?

 - Decouple `OperationDispatcher` from `NormalizeDispatcher`
- move `getCatalogCapability` method from `OperationDispatcher` to
`CapabilityHelpers`

### Why are the changes needed?

Fix: #4195

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests
---
 .../java/org/apache/gravitino/GravitinoEnv.java| 11 ++---
 .../gravitino/catalog/CapabilityHelpers.java   | 23 ++
 .../catalog/FilesetNormalizeDispatcher.java| 48 ++---
 .../catalog/FilesetOperationDispatcher.java|  1 +
 .../gravitino/catalog/OperationDispatcher.java | 40 +-
 .../catalog/PartitionNormalizeDispatcher.java  | 47 ++---
 .../catalog/SchemaNormalizeDispatcher.java | 38 -
 .../catalog/SchemaOperationDispatcher.java |  1 +
 .../catalog/TableNormalizeDispatcher.java  | 49 +++---
 .../catalog/TableOperationDispatcher.java  |  1 +
 .../catalog/TopicNormalizeDispatcher.java  | 47 +++--
 .../catalog/TopicOperationDispatcher.java  |  1 +
 .../apache/gravitino/utils/NameIdentifierUtil.java | 32 ++
 .../catalog/TestFilesetNormalizeDispatcher.java|  6 ++-
 .../gravitino/catalog/TestOperationDispatcher.java | 13 +++---
 .../catalog/TestPartitionNormalizeDispatcher.java  |  2 +-
 .../catalog/TestSchemaNormalizeDispatcher.java |  2 +-
 .../catalog/TestTableNormalizeDispatcher.java  |  6 ++-
 .../catalog/TestTopicNormalizeDispatcher.java  |  6 ++-
 19 files changed, 232 insertions(+), 142 deletions(-)

diff --git a/core/src/main/java/org/apache/gravitino/GravitinoEnv.java 
b/core/src/main/java/org/apache/gravitino/GravitinoEnv.java
index b307cddbd..cf95dd7e7 100644
--- a/core/src/main/java/org/apache/gravitino/GravitinoEnv.java
+++ b/core/src/main/java/org/apache/gravitino/GravitinoEnv.java
@@ -154,30 +154,31 @@ public class GravitinoEnv {
 SchemaOperationDispatcher schemaOperationDispatcher =
 new SchemaOperationDispatcher(catalogManager, entityStore, 
idGenerator);
 SchemaNormalizeDispatcher schemaNormalizeDispatcher =
-new SchemaNormalizeDispatcher(schemaOperationDispatcher);
+new SchemaNormalizeDispatcher(schemaOperationDispatcher, 
catalogManager);
 this.schemaDispatcher = new SchemaEventDispatcher(eventBus, 
schemaNormalizeDispatcher);
 
 TableOperationDispatcher tableOperationDispatcher =
 new TableOperationDispatcher(catalogManager, entityStore, idGenerator);
 TableNormalizeDispatcher tableNormalizeDispatcher =
-new TableNormalizeDispatcher(tableOperationDispatcher);
+new TableNormalizeDispatcher(tableOperationDispatcher, catalogManager);
 this.tableDispatcher = new TableEventDispatcher(eventBus, 
tableNormalizeDispatcher);
 
 PartitionOperationDispatcher partitionOperationDispatcher =
 new PartitionOperationDispatcher(catalogManager, entityStore, 
idGenerator);
 // todo: support PartitionEventDispatcher
-this.partitionDispatcher = new 
PartitionNormalizeDispatcher(partitionOperationDispatcher);
+this.partitionDispatcher =
+new PartitionNormalizeDispatcher(partitionOperationDispatcher, 
catalogManager);
 
 FilesetOperationDispatcher filesetOperationDispatcher =
 new FilesetOperationDispatcher(catalogManager, entityStore, 
idGenerator);
 FilesetNormalizeDispatcher filesetNormalizeDispatcher =
-new FilesetNormalizeDispatcher(filesetOperationDispatcher);
+new FilesetNormalizeDispatcher(filesetOperationDispatcher, 
catalogManager);
 this.filesetDispatcher = new FilesetEventDispatcher(eventBus, 
filesetNormalizeDispatcher);
 
 TopicOperationDispatcher topicOperationDispatcher =
 new TopicOperationDispatcher(catalogManager, entityStore, idGenerator);
 TopicNormalizeDispatcher topicNormalizeDispatcher =
-new TopicNormalizeDispatcher(topicOperationDispatcher);
+new TopicNormalizeDispatcher(topicOperationDispatcher, catalogManager);
 this.topicDispatcher = new TopicEventDispatcher(eventBus, 
topicNormalizeDispatcher);
 
 // Create and initialize access control related modules
diff --git 
a/core/src/main/java/org/apache

(gravitino) branch main updated: [#3755] improvement(client-python): Support OAuth2TokenProvider for Python client (#4011)

2024-07-18 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 213bcc9f2 [#3755] improvement(client-python): Support 
OAuth2TokenProvider for Python client (#4011)
213bcc9f2 is described below

commit 213bcc9f28102a3b472a8b2d9629525e9d00d269
Author: noidname01 <55401762+noidnam...@users.noreply.github.com>
AuthorDate: Fri Jul 19 10:53:57 2024 +0800

[#3755] improvement(client-python): Support OAuth2TokenProvider for Python 
client (#4011)

### What changes were proposed in this pull request?

* Add `OAuth2TokenProvider` and `DefaultOAuth2TokenProvider` in
`client-python`
* There are some components and tests missing because it would be a big
code change if they were also done in this PR, they will be added in the
following PRs
- [ ] Error Handling: #4173
- [ ] Integration Test: #4208
* Modify test file structure, and found issue #4136, solve it by reset
environment variable.

### Why are the changes needed?

Fix: #3755, #4136

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Add UT and tested by `./gradlew clients:client-python:unittest`

-

Co-authored-by: TimWang 
---
 .../client-python/gravitino/auth/auth_constants.py |   2 +
 .../auth/default_oauth2_token_provider.py  | 133 +++
 .../gravitino/auth/oauth2_token_provider.py|  75 +++
 .../gravitino/auth/simple_auth_provider.py |   4 +-
 .../requests/oauth2_client_credential_request.py}  |  15 ++-
 .../dto/responses/oauth2_token_response.py |  55 
 .../client-python/gravitino/utils/http_client.py   |  36 --
 clients/client-python/requirements-dev.txt |   3 +-
 .../tests/integration/test_simple_auth_client.py   |   2 +
 .../unittests/auth/__init__.py}|   6 -
 .../tests/unittests/auth/mock_base.py  | 144 +
 .../unittests/auth/test_oauth2_token_provider.py   |  93 +
 .../{ => auth}/test_simple_auth_provider.py|   4 +
 13 files changed, 551 insertions(+), 21 deletions(-)

diff --git a/clients/client-python/gravitino/auth/auth_constants.py 
b/clients/client-python/gravitino/auth/auth_constants.py
index 2494030fc..247abcaaa 100644
--- a/clients/client-python/gravitino/auth/auth_constants.py
+++ b/clients/client-python/gravitino/auth/auth_constants.py
@@ -21,4 +21,6 @@ under the License.
 class AuthConstants:
 HTTP_HEADER_AUTHORIZATION: str = "Authorization"
 
+AUTHORIZATION_BEARER_HEADER: str = "Bearer "
+
 AUTHORIZATION_BASIC_HEADER: str = "Basic "
diff --git 
a/clients/client-python/gravitino/auth/default_oauth2_token_provider.py 
b/clients/client-python/gravitino/auth/default_oauth2_token_provider.py
new file mode 100644
index 0..3fb730395
--- /dev/null
+++ b/clients/client-python/gravitino/auth/default_oauth2_token_provider.py
@@ -0,0 +1,133 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+"""
+
+import time
+import json
+import base64
+from typing import Optional
+from gravitino.auth.oauth2_token_provider import OAuth2TokenProvider
+from gravitino.dto.responses.oauth2_token_response import OAuth2TokenResponse
+from gravitino.dto.requests.oauth2_client_credential_request import (
+OAuth2ClientCredentialRequest,
+)
+from gravitino.exceptions.base import GravitinoRuntimeException
+
+CLIENT_CREDENTIALS = "client_credentials"
+CREDENTIAL_SPLITTER = ":"
+TOKEN_SPLITTER = "."
+JWT_EXPIRE = "exp"
+
+
+class DefaultOAuth2TokenProvider(OAuth2TokenProvider):
+"""This class is the default implement of OAuth2TokenProvider."""
+
+_credential: Optional[str]
+_scope: Optional[str]
+_path: Optional[str]
+_token: Optional[str]
+
+def __init__(
+self,
+uri: str = None,
+credential: str = None,
+scope: str = None,
+  

(gravitino) branch main updated: [#3985] fix(hadooop-catalog): Create fileset catalog with empty location property success, but can't list schema of the catalog (#4177)

2024-07-18 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 9be7cc999 [#3985] fix(hadooop-catalog): Create fileset catalog with 
empty location property success, but can't list schema of the catalog (#4177)
9be7cc999 is described below

commit 9be7cc999f97fe0962a53945f95bfbd46b7e90c0
Author: liuxian <39123327+dataxplorel...@users.noreply.github.com>
AuthorDate: Thu Jul 18 15:03:50 2024 +0800

[#3985] fix(hadooop-catalog): Create fileset catalog with empty location 
property success, but can't list schema of the catalog (#4177)

### What changes were proposed in this pull request?

Check if the catalogLocation is empty when initializing

### Why are the changes needed?

Fix: #3985

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing pipeline.

Before
https://github.com/user-attachments/assets/42003bf4-f6a2-4729-98fc-bf139a811daf";>

After
https://github.com/user-attachments/assets/531dbc3b-7ad3-4949-8910-ea99bb31baa1";>
---
 .../catalog/hadoop/HadoopCatalogOperations.java  |  5 -
 .../catalog/hadoop/TestHadoopCatalogOperations.java  | 16 
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
index b82eaa359..6b49c1310 100644
--- 
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
+++ 
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
@@ -135,7 +135,10 @@ public class HadoopCatalogOperations implements 
CatalogOperations, SupportsSchem
 conf.forEach(hadoopConf::set);
 
 initAuthentication(conf, hadoopConf);
-this.catalogStorageLocation = 
Optional.ofNullable(catalogLocation).map(Path::new);
+this.catalogStorageLocation =
+StringUtils.isNotBlank(catalogLocation)
+? Optional.of(catalogLocation).map(Path::new)
+: Optional.empty();
   }
 
   private void initAuthentication(Map conf, Configuration 
hadoopConf) {
diff --git 
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
 
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
index 284070f0b..3c8a4d463 100644
--- 
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
+++ 
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
@@ -230,6 +230,22 @@ public class TestHadoopCatalogOperations {
 Assertions.assertEquals("Schema m1.c1.schema11 already exists", 
exception.getMessage());
   }
 
+  @Test
+  public void testCreateSchemaWithEmptyCatalogLocation() throws IOException {
+String name = "schema28";
+String comment = "comment28";
+String catalogPath = "";
+Schema schema = createSchema(name, comment, catalogPath, null);
+Assertions.assertEquals(name, schema.name());
+Assertions.assertEquals(comment, schema.comment());
+
+Throwable exception =
+Assertions.assertThrows(
+SchemaAlreadyExistsException.class,
+() -> createSchema(name, comment, catalogPath, null));
+Assertions.assertEquals("Schema m1.c1.schema28 already exists", 
exception.getMessage());
+  }
+
   @Test
   public void testCreateSchemaWithCatalogLocation() throws IOException {
 String name = "schema12";



(gravitino) branch main updated: [#4128] improvement(core): Remove privileges of metalakes (#4139)

2024-07-17 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new f8a472916 [#4128] improvement(core): Remove privileges of metalakes 
(#4139)
f8a472916 is described below

commit f8a472916f3d58640cbbd9bf8b9cbc284d960e0b
Author: roryqi 
AuthorDate: Wed Jul 17 20:48:40 2024 +0800

[#4128] improvement(core): Remove privileges of metalakes (#4139)

### What changes were proposed in this pull request?
Remove privileges of metalakes. We use the ownership instead of metalake
privileges.

### Why are the changes needed?

Fix: #4128

### Does this PR introduce _any_ user-facing change?
Modify APIs. But this feature isn't released yet.

### How was this patch tested?
Existing UTs
---
 .../java/org/apache/gravitino/MetadataObjects.java |  13 +-
 .../apache/gravitino/authorization/Privilege.java  |  28 +--
 .../apache/gravitino/authorization/Privileges.java | 229 ++---
 .../gravitino/authorization/SecurableObjects.java  |  24 ---
 .../authorization/TestSecurableObjects.java|  24 ---
 .../src/main/java/org/apache/gravitino/Entity.java |  12 --
 .../relational/service/MetadataObjectService.java  |  11 -
 .../relational/service/RoleMetaService.java|   8 -
 .../relational/service/TestSecurableObjects.java   |   9 +-
 .../gravitino/server/web/rest/RoleOperations.java  |   5 -
 .../server/web/rest/TestRoleOperations.java|  21 --
 11 files changed, 71 insertions(+), 313 deletions(-)

diff --git a/api/src/main/java/org/apache/gravitino/MetadataObjects.java 
b/api/src/main/java/org/apache/gravitino/MetadataObjects.java
index 70f795fa0..5136164c9 100644
--- a/api/src/main/java/org/apache/gravitino/MetadataObjects.java
+++ b/api/src/main/java/org/apache/gravitino/MetadataObjects.java
@@ -27,11 +27,7 @@ import org.apache.commons.lang3.StringUtils;
 /** The helper class for {@link MetadataObject}. */
 public class MetadataObjects {
 
-  /**
-   * The reserved name for the metadata object.
-   *
-   * It is used to represent the root metadata object of all metalakes.
-   */
+  /** The reserved name for the metadata object. */
   public static final String METADATA_OBJECT_RESERVED_NAME = "*";
 
   private static final Splitter DOT_SPLITTER = Splitter.on('.');
@@ -106,13 +102,6 @@ public class MetadataObjects {
* @return The parsed metadata object
*/
   public static MetadataObject parse(String fullName, MetadataObject.Type 
type) {
-if (METADATA_OBJECT_RESERVED_NAME.equals(fullName)) {
-  if (type != MetadataObject.Type.METALAKE) {
-throw new IllegalArgumentException("If metadata object isn't metalake, 
it can't be `*`");
-  }
-  return new MetadataObjectImpl(null, METADATA_OBJECT_RESERVED_NAME, type);
-}
-
 Preconditions.checkArgument(
 StringUtils.isNotBlank(fullName), "Metadata object full name cannot be 
blank");
 
diff --git 
a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java 
b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
index 36229c8fc..5cb7b3214 100644
--- a/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
+++ b/api/src/main/java/org/apache/gravitino/authorization/Privilege.java
@@ -81,34 +81,28 @@ public interface Privilege {
 WRITE_TOPIC(0L, 1L << 18),
 /** The privilege to read a topic. */
 READ_TOPIC(0L, 1L << 19),
-/** The privilege to create a metalake. */
-CREATE_METALAKE(0L, 1L << 20),
-/** The privilege to manage a metalake, including drop and alter a 
metalake. */
-MANAGE_METALAKE(0L, 1L << 21),
-/** The privilege to use a metalake, the user can load the information of 
the metalake. */
-USE_METALAKE(0L, 1L << 22),
 /** The privilege to add a user */
-ADD_USER(0L, 1L << 23),
+ADD_USER(0L, 1L << 20),
 /** The privilege to remove a user */
-REMOVE_USER(0L, 1L << 24),
+REMOVE_USER(0L, 1L << 21),
 /** The privilege to get a user */
-GET_USER(0L, 1L << 25),
+GET_USER(0L, 1L << 22),
 /** The privilege to add a group */
-ADD_GROUP(0L, 1L << 26),
+ADD_GROUP(0L, 1L << 23),
 /** The privilege to remove a group */
-REMOVE_GROUP(0L, 1L << 27),
+REMOVE_GROUP(0L, 1L << 24),
 /** The privilege to get a group */
-GET_GROUP(0L, 1L << 28),
+GET_GROUP(0L, 1L << 25),
 /** The privilege to create a role */
-CREATE_ROLE(0L, 1L << 29),
+CREATE_ROLE(0L, 1L << 26),
 /** The privilege to delete a role */
-DELETE_ROLE(0L, 1L << 30),
+DELETE_ROLE(0L, 1L << 27),
 /** The privilege to grant a role to the user or the group. */
-GRANT_RO

(gravitino) branch main updated: [#4020] feat(core): Add Tag Manage core logic to support tag operations (part-2) (#4109)

2024-07-16 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new d85a9b482 [#4020] feat(core): Add Tag Manage core logic to support tag 
operations (part-2) (#4109)
d85a9b482 is described below

commit d85a9b4825f75e075daf74fe4109831dbe1bf373
Author: Jerry Shao 
AuthorDate: Tue Jul 16 20:53:04 2024 +0800

[#4020] feat(core): Add Tag Manage core logic to support tag operations 
(part-2) (#4109)

### What changes were proposed in this pull request?

This PR add the second part tag core logic to support associate tags
with metadata object, and query the associations between tags and
metadata objects.

### Why are the changes needed?

This is a part of work to support tag system.

Fix: #4020

### Does this PR introduce _any_ user-facing change?

NO.

### How was this patch tested?

Add UTs to cover the logic.
---
 .../exceptions/TagAlreadyAssociatedException.java  |  49 +++
 .../java/org/apache/gravitino/EntityStore.java |  11 +
 .../gravitino/storage/relational/JDBCBackend.java  |  32 ++
 .../storage/relational/RelationalBackend.java  |   3 +-
 .../storage/relational/RelationalEntityStore.java  |  41 ++-
 .../storage/relational/mapper/TagMetaMapper.java   |  33 +-
 .../mapper/TagMetadataObjectRelMapper.java | 112 ++-
 .../relational/po/TagMetadataObjectRelPO.java  | 130 
 .../gravitino/storage/relational/po/TagPO.java |  21 +-
 .../gravitino/storage/relational/po/TopicPO.java   |  26 +-
 .../MetadataObjectService.java}|  14 +-
 .../relational/service/RoleMetaService.java|   6 +-
 .../storage/relational/service/TagMetaService.java | 198 +++
 .../storage/relational/utils/POConverters.java |  26 ++
 .../gravitino/tag/SupportsTagOperations.java   |  96 ++
 .../java/org/apache/gravitino/tag/TagManager.java  | 210 +++-
 .../apache/gravitino/utils/MetadataObjectUtil.java |  97 ++
 .../apache/gravitino/utils/NameIdentifierUtil.java |  55 
 .../relational/service/TestTagMetaService.java | 361 +
 .../storage/relational/utils/TestPOConverters.java |  17 +
 .../org/apache/gravitino/tag/TestTagManager.java   | 355 +++-
 .../gravitino/utils/TestMetadataObjectUtil.java| 124 +++
 .../gravitino/utils/TestNameIdentifierUtil.java|  66 
 scripts/h2/schema-h2.sql   |   2 +-
 scripts/mysql/schema-0.6.0-mysql.sql   |   2 +-
 scripts/mysql/upgrade-0.5.0-to-0.6.0-mysql.sql |   2 +-
 26 files changed, 2024 insertions(+), 65 deletions(-)

diff --git 
a/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java
 
b/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java
new file mode 100644
index 0..61cab11fb
--- /dev/null
+++ 
b/api/src/main/java/org/apache/gravitino/exceptions/TagAlreadyAssociatedException.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.gravitino.exceptions;
+
+import com.google.errorprone.annotations.FormatMethod;
+
+/** Exception thrown when a tag with specified name already associated to a 
metadata object. */
+public class TagAlreadyAssociatedException extends AlreadyExistsException {
+
+  /**
+   * Constructs a new exception with the specified detail message.
+   *
+   * @param message the detail message.
+   * @param args the arguments to the message.
+   */
+  @FormatMethod
+  public TagAlreadyAssociatedException(String message, Object... args) {
+super(message, args);
+  }
+
+  /**
+   * Constructs a new exception with the specified detail message and cause.
+   *
+   * @param cause the cause.
+   * @param message the detail message.
+   * @param args the arguments to the message.
+   */
+  @FormatMethod
+  public TagAlreadyAssociatedException(Throwable cause, String message, 
Object... args) {
+super(cause, message, args);
+  }
+}
diff --git a/core/src/

(gravitino) branch main updated: [#4107] feat(all): Add testConnection API for catalog (#4108)

2024-07-16 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 116e5ae15 [#4107] feat(all): Add testConnection API for catalog (#4108)
116e5ae15 is described below

commit 116e5ae1591feca15b8eef20b3d77fcb3a99e8a8
Author: mchades 
AuthorDate: Tue Jul 16 18:54:29 2024 +0800

[#4107] feat(all): Add testConnection API for catalog (#4108)

### What changes were proposed in this pull request?

Add testConnection API for catalog

### Why are the changes needed?

Fix: #4107

### Does this PR introduce _any_ user-facing change?

yes, add a new API

### How was this patch tested?

tests added
---
 .../org/apache/gravitino/SupportsCatalogs.java |  18 
 .../exceptions/ConnectionFailedException.java  |  49 +
 build.gradle.kts   |   4 +-
 .../catalog/hadoop/HadoopCatalogOperations.java|  21 
 .../hadoop/TestHadoopCatalogOperations.java|  14 +++
 .../hadoop/integration/test/HadoopCatalogIT.java   |   3 +-
 .../test/HadoopUserImpersonationIT.java|   3 +-
 .../catalog/hive/HiveCatalogOperations.java|  27 +
 .../catalog/hive/TestHiveCatalogOperations.java|  28 +
 .../catalog/jdbc/JdbcCatalogOperations.java|  20 
 .../jdbc/converter/JdbcExceptionConverter.java |   4 +-
 .../catalog/jdbc/TestJdbcCatalogOperations.java|  54 ++
 .../jdbc/operation/SqliteDatabaseOperations.java   |  16 ++-
 .../catalog/kafka/KafkaCatalogOperations.java  |  23 +++-
 .../catalog/kafka/TestKafkaCatalogOperations.java  |  15 +++
 .../kafka/integration/test/CatalogKafkaIT.java |  28 -
 .../iceberg/IcebergCatalogOperations.java  |  26 +
 .../iceberg/TestIcebergCatalogOperations.java  |  45 
 .../lakehouse/paimon/PaimonCatalogOperations.java  |  25 +
 .../lakehouse/paimon/TestPaimonCatalog.java|  13 +++
 .../org/apache/gravitino/client/ErrorHandlers.java |   4 +
 .../apache/gravitino/client/GravitinoClient.java   |  22 
 .../apache/gravitino/client/GravitinoMetalake.java |  43 
 .../gravitino/client/TestGravitinoClient.java  |  65 
 .../gravitino/dto/responses/ErrorConstants.java|   3 +
 .../gravitino/dto/responses/ErrorResponse.java |  26 +
 .../org/apache/gravitino/StringIdentifier.java |   3 +
 .../apache/gravitino/catalog/CatalogManager.java   | 116 +
 .../catalog/CatalogNormalizeDispatcher.java|  12 +++
 .../apache/gravitino/catalog/SupportsCatalogs.java |  18 
 .../gravitino/connector/CatalogOperations.java |  20 
 .../gravitino/listener/CatalogEventDispatcher.java |  12 +++
 .../gravitino/catalog/DummyCatalogOperations.java  |  11 ++
 .../gravitino/catalog/TestCatalogManager.java  |  22 +++-
 .../gravitino/connector/TestCatalogOperations.java |  16 +++
 docs/open-api/catalogs.yaml|  70 +
 docs/open-api/openapi.yaml |   3 +
 .../integration/test/client/CatalogIT.java |  20 +++-
 .../integration/test/client/MetalakeIT.java|   3 +-
 .../org/apache/gravitino/server/web/Utils.java |  14 +++
 .../server/web/rest/CatalogOperations.java |  39 +++
 .../server/web/rest/ExceptionHandlers.java |  31 ++
 .../org/apache/gravitino/server/web/TestUtils.java |  10 ++
 .../gravitino/server/web/rest/TestCatalog.java |  10 ++
 .../server/web/rest/TestCatalogOperations.java |  55 --
 45 files changed, 1038 insertions(+), 46 deletions(-)

diff --git a/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java 
b/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java
index c3805edec..8644430bc 100644
--- a/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java
+++ b/api/src/main/java/org/apache/gravitino/SupportsCatalogs.java
@@ -114,4 +114,22 @@ public interface SupportsCatalogs {
* @return True if the catalog was dropped, false otherwise.
*/
   boolean dropCatalog(String catalogName);
+
+  /**
+   * Test whether the catalog with specified parameters can be connected to 
before creating it.
+   *
+   * @param catalogName the name of the catalog.
+   * @param type the type of the catalog.
+   * @param provider the provider of the catalog.
+   * @param comment the comment of the catalog.
+   * @param properties the properties of the catalog.
+   * @throws Exception if the test failed.
+   */
+  void testConnection(
+  String catalogName,
+  Catalog.Type type,
+  String provider,
+  String comment,
+  Map properties)
+  throws Exception;
 }
diff --git 
a/api/src/main/java/org/apache/gravitino/exceptions/ConnectionFailedException.java
 
b/api/src/main/java/org/apache/gravitino/exceptions

(gravitino) branch main updated: [#4126] improvement(core): Remove MetalakeAdmin API (#4127)

2024-07-16 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 627e88ffb [#4126] improvement(core): Remove MetalakeAdmin API (#4127)
627e88ffb is described below

commit 627e88ffb9cbb8f5efe1b6e8af3a69f8e051d3e0
Author: roryqi 
AuthorDate: Tue Jul 16 18:21:18 2024 +0800

[#4126] improvement(core): Remove MetalakeAdmin API (#4127)

### What changes were proposed in this pull request?
Remove MetalakeAdmin API.

### Why are the changes needed?

Fix: #4126

### Does this PR introduce _any_ user-facing change?
Remove API. But this API isn't released.

### How was this patch tested?
Existing tests.
---
 .../gravitino/client/GravitinoAdminClient.java |  45 -
 .../apache/gravitino/client/TestMetalakeAdmin.java | 110 ---
 .../authorization/AccessControlManager.java|  91 ++---
 .../gravitino/authorization/AdminManager.java  | 122 
 .../authorization/TestAccessControlManager.java|  21 --
 .../gravitino/server/web/rest/GroupOperations.java |  33 +++-
 .../server/web/rest/MetalakeAdminOperations.java   |  98 --
 .../server/web/rest/PermissionOperations.java  |  76 ++--
 .../gravitino/server/web/rest/RoleOperations.java  |  37 ++--
 .../gravitino/server/web/rest/UserOperations.java  |  32 +++-
 .../web/rest/TestMetalakeAdminOperations.java  | 213 -
 11 files changed, 149 insertions(+), 729 deletions(-)

diff --git 
a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java
 
b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java
index 67d32289f..b70839733 100644
--- 
a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java
+++ 
b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoAdminClient.java
@@ -67,7 +67,6 @@ public class GravitinoAdminClient extends GravitinoClientBase 
implements Support
   private static final String API_METALAKES_USERS_PATH = 
"api/metalakes/%s/users/%s";
   private static final String API_METALAKES_GROUPS_PATH = 
"api/metalakes/%s/groups/%s";
   private static final String API_METALAKES_ROLES_PATH = 
"api/metalakes/%s/roles/%s";
-  private static final String API_ADMIN_PATH = "api/admins/%s";
   private static final String API_PERMISSION_PATH = 
"api/metalakes/%s/permissions/%s";
   private static final String BLANK_PLACE_HOLDER = "";
 
@@ -334,50 +333,6 @@ public class GravitinoAdminClient extends 
GravitinoClientBase implements Support
 return resp.getGroup();
   }
 
-  /**
-   * Adds a new metalake admin.
-   *
-   * @param user The name of the User.
-   * @return The added User instance.
-   * @throws UserAlreadyExistsException If a metalake admin with the same name 
already exists.
-   * @throws RuntimeException If adding the User encounters storage issues.
-   */
-  public User addMetalakeAdmin(String user) throws UserAlreadyExistsException {
-UserAddRequest req = new UserAddRequest(user);
-req.validate();
-
-UserResponse resp =
-restClient.post(
-String.format(API_ADMIN_PATH, BLANK_PLACE_HOLDER),
-req,
-UserResponse.class,
-Collections.emptyMap(),
-ErrorHandlers.userErrorHandler());
-resp.validate();
-
-return resp.getUser();
-  }
-
-  /**
-   * Removes a metalake admin.
-   *
-   * @param user The name of the User.
-   * @return True if the User was successfully removed, false only when 
there's no such metalake
-   * admin, otherwise it will throw an exception.
-   * @throws RuntimeException If removing the User encounters storage issues.
-   */
-  public boolean removeMetalakeAdmin(String user) {
-RemoveResponse resp =
-restClient.delete(
-String.format(API_ADMIN_PATH, user),
-RemoveResponse.class,
-Collections.emptyMap(),
-ErrorHandlers.userErrorHandler());
-resp.validate();
-
-return resp.removed();
-  }
-
   /**
* Gets a Role.
*
diff --git 
a/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java
 
b/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java
deleted file mode 100644
index 6dd10965d..0
--- 
a/clients/client-java/src/test/java/org/apache/gravitino/client/TestMetalakeAdmin.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "Lic

(gravitino) branch main updated: [#4165] improvement(Filesystem): Improve the potential storage replication issues in Hadoop GVFS (#4166)

2024-07-16 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new b2a930225 [#4165] improvement(Filesystem): Improve the potential 
storage replication issues in Hadoop GVFS (#4166)
b2a930225 is described below

commit b2a93022553a258a8f599f2e5a6e0bbdc3e5ace7
Author: xloya <982052...@qq.com>
AuthorDate: Tue Jul 16 17:03:26 2024 +0800

[#4165] improvement(Filesystem): Improve the potential storage replication 
issues in Hadoop GVFS (#4166)

### What changes were proposed in this pull request?

Currently, Hadoop GVFS does not implement the `getDefaultBlockSize(Path
f)` and `getBlockSize(Path f)` methods, which will result in the use of
the FileSystem default values, causing the storage replications and
block sizes to not meet expectations.

### Why are the changes needed?

Fix: #4165

### How was this patch tested?

Add UTs and ITs.

-

Co-authored-by: xiaojiebao 
---
 .../hadoop/GravitinoVirtualFileSystem.java | 12 +++
 .../gravitino/filesystem/hadoop/TestGvfsBase.java  | 16 +
 .../hadoop/GravitinoVirtualFileSystemIT.java   | 42 ++
 3 files changed, 70 insertions(+)

diff --git 
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
 
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
index ef51c1753..bbcf0c71e 100644
--- 
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
+++ 
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
@@ -515,6 +515,18 @@ public class GravitinoVirtualFileSystem extends FileSystem 
{
 return context.getFileSystem().mkdirs(context.getActualPath(), permission);
   }
 
+  @Override
+  public short getDefaultReplication(Path f) {
+FilesetContext context = getFilesetContext(f);
+return 
context.getFileSystem().getDefaultReplication(context.getActualPath());
+  }
+
+  @Override
+  public long getDefaultBlockSize(Path f) {
+FilesetContext context = getFilesetContext(f);
+return 
context.getFileSystem().getDefaultBlockSize(context.getActualPath());
+  }
+
   @Override
   public synchronized void close() throws IOException {
 // close all actual FileSystems
diff --git 
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
 
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
index ce87d8d02..13b365a25 100644
--- 
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
+++ 
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
@@ -591,4 +591,20 @@ public class TestGvfsBase extends GravitinoMockServerBase {
   () -> fs.extractIdentifier(new 
URI("/catalog1/schema1/fileset1/dir//")));
 }
   }
+
+  @Test
+  public void testGetDefaultReplications() throws IOException {
+try (GravitinoVirtualFileSystem fs =
+(GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) {
+  assertEquals(1, fs.getDefaultReplication(managedFilesetPath));
+}
+  }
+
+  @Test
+  public void testGetDefaultBlockSize() throws IOException {
+try (GravitinoVirtualFileSystem fs =
+(GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) {
+  assertEquals(32 * 1024 * 1024, 
fs.getDefaultBlockSize(managedFilesetPath));
+}
+  }
 }
diff --git 
a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
 
b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
index 9321080d9..feb8446be 100644
--- 
a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
+++ 
b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
@@ -422,6 +422,48 @@ public class GravitinoVirtualFileSystemIT extends 
AbstractIT {
 }
   }
 
+  @Test
+  public void testGetDefaultReplications() throws IOException {
+String filesetName = "test_get_default_replications";
+NameIdentifier filesetIdent = NameIdentifier.of(schemaName, filesetName);
+Catalog catalog = metalake.loadCatalog(catalogName);
+String storageLocation = genStorageLocation(filesetName);
+catalog
+.asFilesetCatalog()
+.createFileset(
+filesetIdent,
+"fileset comment&q

(gravitino) branch main updated: [#4077] improvement(docs): Fixed an incorrect description and some incomplete examples (#4146)

2024-07-14 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 0961af355 [#4077] improvement(docs): Fixed an incorrect description 
and some incomplete examples (#4146)
0961af355 is described below

commit 0961af35585f4419ded7c53969d5422e3f1b65b0
Author: JinsYin 
AuthorDate: Mon Jul 15 11:13:17 2024 +0800

[#4077] improvement(docs): Fixed an incorrect description and some 
incomplete examples (#4146)

### What changes were proposed in this pull request?

Fixed an incorrect description.

### Why are the changes needed?

Fix: #4145

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

No testing required

-

Co-authored-by: rqyin 
---
 docs/trino-connector/supported-catalog.md | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docs/trino-connector/supported-catalog.md 
b/docs/trino-connector/supported-catalog.md
index 54d0bedd3..306b2dcc2 100644
--- a/docs/trino-connector/supported-catalog.md
+++ b/docs/trino-connector/supported-catalog.md
@@ -14,7 +14,6 @@ The catalogs currently supported by the Apache Gravitino 
connector are as follow
 
 ## Create catalog
 
-Trino itself does not support creating catalogs. 
 Users can create catalogs through the Gravitino connector and then load them 
into Trino. 
 The Gravitino connector provides the following stored procedures to create, 
delete, and alter catalogs.
 User can also use the system table `catalog` to describe all the catalogs.
@@ -87,9 +86,9 @@ call gravitino.system.create_catalog(
 'jdbc-mysql',
 Map(
 Array['jdbc-url', 'jdbc-user', 'jdbc-password', 'jdbc-driver'],
-Array['jdbc:mysql:192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 
'com.mysql.cj.jdbc.Driver']
+Array['jdbc:mysql://192.168.164.4:3306?useSSL=false', 'trino', 
'ds123', 'com.mysql.cj.jdbc.Driver']
 )
-)
+);
 call gravitino.system.drop_datalog('mysql');
 
 -- Call stored procedures with name.
@@ -98,10 +97,10 @@ call gravitino.system.create_catalog(
 provider => 'jdbc-mysql',
 properties => Map(
 Array['jdbc-url', 'jdbc-user', 'jdbc-password', 'jdbc-driver'],
-Array['jdbc:mysql:192.168.164.4:3306?useSSL=false', 'trino', 'ds123', 
'com.mysql.cj.jdbc.Driver']
+Array['jdbc:mysql://192.168.164.4:3306?useSSL=false', 'trino', 
'ds123', 'com.mysql.cj.jdbc.Driver']
 ),
 ignore_exist => true
-)
+);
 
 call gravitino.system.drop_datalog(
 catalog => 'mysql'
@@ -112,10 +111,10 @@ call gravitino.system.alter_catalog(
 catalog => 'mysql',
 set_properties=> Map(
 Array['jdbc-url'],
-Array['jdbc:mysql:127.0.0.1:3306?useSSL=false']
+Array['jdbc:mysql://127.0.0.1:3306?useSSL=false']
 ),
 remove_properties => Array['jdbc-driver']
-)
+);
 ```
 
 if you need more information about catalog, please refer to:



(gravitino) branch main updated: [#4157] fix(doc): Fix the doc format in `how-to-build` (#4158)

2024-07-12 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 245c4579f [#4157] fix(doc): Fix the doc format in `how-to-build` 
(#4158)
245c4579f is described below

commit 245c4579f935062dc39207f0724ca3ca0764e6d5
Author: Rui Fan <1996fan...@gmail.com>
AuthorDate: Sat Jul 13 10:25:33 2024 +0800

[#4157] fix(doc): Fix the doc format in `how-to-build` (#4158)

### What changes were proposed in this pull request?

[#4157] fix(doc): Fix the doc format in `how-to-build`

### Why are the changes needed?



https://github.com/apache/gravitino/blob/main/docs/how-to-build.md#quick-start

The code block should only show the `git clone
g...@github.com:apache/gravitino.git`

https://github.com/user-attachments/assets/ac00cf90-85a5-4ad7-8c3e-ae24ccd4abe8";>


Fix: #4157

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

The doc format is fine in my dev branch.



https://github.com/1996fanrui/gravitino/blob/4157/fix-doc-format/docs/how-to-build.md#quick-start

https://github.com/user-attachments/assets/d54634f7-1bd8-4df8-9ea2-9537a71c0bcc";>
---
 docs/how-to-build.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/how-to-build.md b/docs/how-to-build.md
index f95206403..e609be671 100644
--- a/docs/how-to-build.md
+++ b/docs/how-to-build.md
@@ -40,7 +40,7 @@ license: "This software is licensed under the Apache License 
version 2."
 
 1. Clone the Gravitino project.
 
-If you want to contribute to this open-source project, please fork the project 
on GitHub first. After forking, clone the forked project to your local 
environment, make your changes, and submit a pull request (PR).
+If you want to contribute to this open-source project, please fork the 
project on GitHub first. After forking, clone the forked project to your local 
environment, make your changes, and submit a pull request (PR).
 
 ```shell
 git clone g...@github.com:apache/gravitino.git



(gravitino) branch main updated: [#4155]fix(doc): Fixed an unreachable link (#4156)

2024-07-12 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new fad4beeda [#4155]fix(doc): Fixed an unreachable link (#4156)
fad4beeda is described below

commit fad4beedaf240b74563120edc0fddd22464e5279
Author: JinsYin 
AuthorDate: Fri Jul 12 18:05:34 2024 +0800

[#4155]fix(doc): Fixed an unreachable link (#4156)

### What changes were proposed in this pull request?

Fixed an unreachable link.

### Why are the changes needed?

Fix: #4155

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

No testing required

Co-authored-by: rqyin 
---
 docs/lakehouse-iceberg-catalog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/lakehouse-iceberg-catalog.md 
b/docs/lakehouse-iceberg-catalog.md
index 15e463c96..1434405e0 100644
--- a/docs/lakehouse-iceberg-catalog.md
+++ b/docs/lakehouse-iceberg-catalog.md
@@ -234,7 +234,7 @@ Meanwhile, the data types other than listed above are 
mapped to Gravitino **[Ext
 
 ### Table properties
 
-You can pass [Iceberg table 
properties](https://iceberg.apache.org/docs/1.3.1/configuration/) to Gravitino 
when creating an Iceberg table.
+You can pass [Iceberg table 
properties](https://web.archive.org/web/20231210013537/https://iceberg.apache.org/docs/1.3.1/configuration/)
 to Gravitino when creating an Iceberg table.
 
 The Gravitino server doesn't allow passing the following reserved fields.
 



(gravitino) branch main updated: [#4105] improvement(core): Remove the logic of getValidRoles (#4121)

2024-07-12 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new d6135447a [#4105] improvement(core): Remove the logic of getValidRoles 
(#4121)
d6135447a is described below

commit d6135447af900e15954b21d6ccf7637d66237237
Author: roryqi 
AuthorDate: Fri Jul 12 16:27:05 2024 +0800

[#4105] improvement(core): Remove the logic of getValidRoles (#4121)

### What changes were proposed in this pull request?

Remove the logic of getValidRoles.

### Why are the changes needed?

Fix: #4105

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Modify some test cases.
---
 .../authorization/AccessControlManager.java|  2 +-
 .../gravitino/authorization/PermissionManager.java | 39 ++---
 .../gravitino/authorization/RoleManager.java   | 25 --
 .../gravitino/authorization/UserGroupManager.java  | 40 +++---
 .../TestAccessControlManagerForPermissions.java| 36 +--
 .../relational/service/TestGroupMetaService.java   |  6 
 .../relational/service/TestUserMetaService.java|  6 
 7 files changed, 46 insertions(+), 108 deletions(-)

diff --git 
a/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java
 
b/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java
index eb7dbfb04..26ec14a7e 100644
--- 
a/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java
+++ 
b/core/src/main/java/com/datastrato/gravitino/authorization/AccessControlManager.java
@@ -53,7 +53,7 @@ public class AccessControlManager {
   public AccessControlManager(EntityStore store, IdGenerator idGenerator, 
Config config) {
 this.adminManager = new AdminManager(store, idGenerator, config);
 this.roleManager = new RoleManager(store, idGenerator, config);
-this.userGroupManager = new UserGroupManager(store, idGenerator, 
roleManager);
+this.userGroupManager = new UserGroupManager(store, idGenerator);
 this.permissionManager = new PermissionManager(store, roleManager);
   }
 
diff --git 
a/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java
 
b/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java
index 3b24e8cde..95a59c18c 100644
--- 
a/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java
+++ 
b/core/src/main/java/com/datastrato/gravitino/authorization/PermissionManager.java
@@ -42,7 +42,7 @@ import org.slf4j.LoggerFactory;
 
 /**
  * PermissionManager is used for managing the logic the granting and revoking 
roles. Role is used
- * for manging permissions. PermissionManager will filter the invalid roles, 
too.
+ * for manging permissions.
  */
 class PermissionManager {
   private static final Logger LOG = 
LoggerFactory.getLogger(PermissionManager.class);
@@ -67,14 +67,17 @@ class PermissionManager {
   UserEntity.class,
   Entity.EntityType.USER,
   userEntity -> {
-List roleEntities =
-roleManager.getValidRoles(metalake, userEntity.roleNames(), 
userEntity.roleIds());
-
+List roleEntities = Lists.newArrayList();
+if (userEntity.roleNames() != null) {
+  for (String role : userEntity.roleNames()) {
+roleEntities.add(roleManager.getRole(metalake, role));
+  }
+}
 List roleNames = 
Lists.newArrayList(toRoleNames(roleEntities));
 List roleIds = Lists.newArrayList(toRoleIds(roleEntities));
 
 for (RoleEntity roleEntityToGrant : roleEntitiesToGrant) {
-  if (roleNames.contains(roleEntityToGrant.name())) {
+  if (roleIds.contains(roleEntityToGrant.id())) {
 LOG.warn(
 "Failed to grant, role {} already exists in the user {} of 
metalake {}",
 roleEntityToGrant.name(),
@@ -129,13 +132,17 @@ class PermissionManager {
   GroupEntity.class,
   Entity.EntityType.GROUP,
   groupEntity -> {
-List roleEntities =
-roleManager.getValidRoles(metalake, groupEntity.roleNames(), 
groupEntity.roleIds());
+List roleEntities = Lists.newArrayList();
+if (groupEntity.roleNames() != null) {
+  for (String role : groupEntity.roleNames()) {
+roleEntities.add(roleManager.getRole(metalake, role));
+  }
+}
 List roleNames = 
Lists.newArrayList(toRoleNames(roleEntities));
 List roleIds = Lists.newArrayList(toRoleIds(roleEntities));
 
 for (RoleEntity roleEntityToGrant : roleEntitiesToGrant) {
-  if (ro

(gravitino) branch main updated: [#4135] fix(trino-connector): Fix typo about Gravitino in trino-connector (#4144)

2024-07-12 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new b80be6ec7 [#4135] fix(trino-connector):  Fix typo about Gravitino in 
trino-connector (#4144)
b80be6ec7 is described below

commit b80be6ec790e79f6ea4c7dd7ba9ab36deb6e3a41
Author: Dev Parikh <51128342+dev79...@users.noreply.github.com>
AuthorDate: Fri Jul 12 13:36:24 2024 +0530

[#4135] fix(trino-connector):  Fix typo about Gravitino in trino-connector 
(#4144)

### Why are the changes needed?
Fix: #4135

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UTs
---
 .../datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java
 
b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java
index f2311ea6a..89d330e1a 100644
--- 
a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java
+++ 
b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoQueryTestTool.java
@@ -91,7 +91,7 @@ public class TrinoQueryTestTool {
 + "TrinoTestTool --testset=tpch --tester_id=5 
--catalog=hive --auto=all\n\n"
 + "Run all the tpch testset's testers in the 'testsets/tpch' 
directory under 'mysql' "
 + "catalog with manual start the test environment:\n"
-+ "TrinoTestTool --testset=tpch -- catalog=mysql --auto=none 
--gravition_uri=http://10.3.21.12:8090 "
++ "TrinoTestTool --testset=tpch -- catalog=mysql --auto=none 
--gravitino_uri=http://10.3.21.12:8090 "
 + "--trino_uri=http://10.3.21.12:8080 
--mysql_url=jdbc:mysql:/10.3.21.12 \n";
 System.out.println(example);
 return;



(gravitino) branch main updated: [#4129] improvement(core): Support hold multiple tree lock within a thread at the same time (#4130)

2024-07-11 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 1b1ef58b0 [#4129] improvement(core): Support hold multiple tree lock 
within a thread at the same time (#4130)
1b1ef58b0 is described below

commit 1b1ef58b0fe9f9f84575319fbd953d8ea3351f61
Author: Qi Yu 
AuthorDate: Fri Jul 12 10:09:06 2024 +0800

[#4129] improvement(core): Support hold multiple tree lock within a thread 
at the same time (#4130)

### What changes were proposed in this pull request?

Add the value of the name identifier in the holdingThreadTimestamp to
support holding multiple tree lock at the same time.

### Why are the changes needed?

To support more user sceanrio

Fix: #4129

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Add new test class `TestTreeLockUtils`
---
 .../com/datastrato/gravitino/lock/LockManager.java |  6 +-
 .../com/datastrato/gravitino/lock/TreeLock.java| 21 +-
 .../datastrato/gravitino/lock/TreeLockNode.java| 82 +++---
 .../gravitino/lock/TestTreeLockUtils.java  | 51 ++
 4 files changed, 131 insertions(+), 29 deletions(-)

diff --git a/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java 
b/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java
index b1dbb27fe..9fb0ef6e1 100644
--- a/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java
+++ b/core/src/main/java/com/datastrato/gravitino/lock/LockManager.java
@@ -132,12 +132,12 @@ public class LockManager {
 // Check self
 node.getHoldingThreadTimestamp()
 .forEach(
-(thread, ts) -> {
+(threadIdentifier, ts) -> {
   // If the thread is holding the lock for more than 30 seconds, 
we will log it.
   if (System.currentTimeMillis() - ts > 3) {
 LOG.warn(
-"Dead lock detected for thread {} on node {}, threads that 
holding the node: {} ",
-thread,
+"Dead lock detected for thread with identifier {} on node 
{}, threads that holding the node: {} ",
+threadIdentifier,
 node,
 node.getHoldingThreadTimestamp());
   }
diff --git a/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java 
b/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java
index 76d9ab028..02cb0c757 100644
--- a/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java
+++ b/core/src/main/java/com/datastrato/gravitino/lock/TreeLock.java
@@ -104,8 +104,17 @@ public class TreeLock {
   try {
 treeLockNode.lock(type);
 heldLocks.push(Pair.of(treeLockNode, type));
+
+treeLockNode.addHoldingThreadTimestamp(
+Thread.currentThread(), identifier, System.currentTimeMillis());
 if (LOG.isTraceEnabled()) {
-  LOG.trace("Locked node: {}, lock type: {}", treeLockNode, type);
+  LOG.trace(
+  "Node {} has been lock with '{}' lock, hold by {} with ident 
'{}' at {}",
+  this,
+  lockType,
+  Thread.currentThread(),
+  identifier,
+  System.currentTimeMillis());
 }
   } catch (Exception e) {
 LOG.error(
@@ -140,8 +149,16 @@ public class TreeLock {
   TreeLockNode current = pair.getLeft();
   LockType type = pair.getRight();
   current.unlock(type);
+
+  long holdStartTime = 
current.removeHoldingThreadTimestamp(Thread.currentThread(), identifier);
   if (LOG.isTraceEnabled()) {
-LOG.trace("Unlocked node: {}, lock type: {}", current, type);
+LOG.trace(
+"Node {} has been unlock with '{}' lock, hold by {} with ident 
'{}' for {} ms",
+this,
+lockType,
+Thread.currentThread(),
+identifier,
+System.currentTimeMillis() - holdStartTime);
   }
 }
 
diff --git a/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java 
b/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java
index a4953c541..92db979aa 100644
--- a/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java
+++ b/core/src/main/java/com/datastrato/gravitino/lock/TreeLockNode.java
@@ -19,6 +19,7 @@
 
 package com.datastrato.gravitino.lock;
 
+import com.datastrato.gravitino.NameIdentifier;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
 import com.google.common.collect.Lists;
@@ -44,13 +45,60 @@ public class TreeLockNode {
   private final String name;
   private final ReentrantReadW

(gravitino-playground) branch main updated: [MINOR] fix(git): correct gitignore file name (#53)

2024-07-11 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git


The following commit(s) were added to refs/heads/main by this push:
 new 02db830  [MINOR] fix(git): correct gitignore file name (#53)
02db830 is described below

commit 02db83012bedc777d3bd97297e8b346aeb1f4aae
Author: mchades <793098...@qq.com>
AuthorDate: Thu Jul 11 18:01:33 2024 +0800

[MINOR] fix(git): correct gitignore file name (#53)

correct gitignore file name
---
 .gitigore => .gitignore | 0
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/.gitigore b/.gitignore
similarity index 100%
rename from .gitigore
rename to .gitignore



(gravitino) branch main updated: [#3733] feat(core): Unified authorization framework (#3946)

2024-07-10 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 87b58fee3 [#3733] feat(core): Unified authorization framework (#3946)
87b58fee3 is described below

commit 87b58fee3ce630033857e9665ff0d63dd6d778c6
Author: Xun Liu 
AuthorDate: Wed Jul 10 17:20:14 2024 +0800

[#3733] feat(core): Unified authorization framework (#3946)

### What changes were proposed in this pull request?

Provide an authorization hook plugin framework, In the next step we can
develop an authorization plugin, just like Catalogs.
+ [Unified authorization design

document](https://docs.google.com/document/d/1RtKfU0uO-N7OjcrB3DOtY1ZsbhVp3GsLSJ26c_YQosQ/edit)

https://github.com/apache/gravitino/assets/3677382/b9a06b79-057a-494c-a1be-15691f478de1";>


### Why are the changes needed?

Fix: #3733

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

CI Passed.
---
 .../java/com/datastrato/gravitino/Catalog.java |   6 +
 .../gravitino/authorization/RoleChange.java| 155 +
 .../gravitino/authorization/SecurableObjects.java  |  14 +-
 build.gradle.kts   |   3 +-
 .../catalog/hive/TestHiveCatalogOperations.java|   3 +-
 .../gravitino/authorization/PermissionManager.java |   2 +-
 .../gravitino/connector/BaseCatalog.java   |  63 +
 .../connector/BaseCatalogPropertiesMetadata.java   |   7 +
 .../authorization/AuthorizationPlugin.java |  29 
 .../authorization/AuthorizationProvider.java   |  33 +
 .../connector/authorization/BaseAuthorization.java |  64 +
 .../authorization/RoleAuthorizationPlugin.java |  70 ++
 .../UserGroupAuthorizationPlugin.java  | 143 +++
 .../com/datastrato/gravitino/meta/RoleEntity.java  |   2 +-
 .../java/com/datastrato/gravitino/TestCatalog.java |   9 ++
 .../connector/authorization/TestAuthorization.java |  95 +
 .../mysql/TestMySQLAuthorization.java  |  37 +
 .../mysql/TestMySQLAuthorizationPlugin.java| 105 ++
 .../ranger/TestRangerAuthorization.java|  37 +
 .../ranger/TestRangerAuthorizationPlugin.java  | 105 ++
 ...o.connector.authorization.AuthorizationProvider |  20 +++
 integration-test/build.gradle.kts  |   2 +-
 .../test/authorization/ranger/RangerIT.java|  56 +++-
 23 files changed, 1042 insertions(+), 18 deletions(-)

diff --git a/api/src/main/java/com/datastrato/gravitino/Catalog.java 
b/api/src/main/java/com/datastrato/gravitino/Catalog.java
index 2f75cab38..d7627cf14 100644
--- a/api/src/main/java/com/datastrato/gravitino/Catalog.java
+++ b/api/src/main/java/com/datastrato/gravitino/Catalog.java
@@ -88,6 +88,12 @@ public interface Catalog extends Auditable {
*/
   String CLOUD_REGION_CODE = "cloud.region-code";
 
+  /**
+   * This variable is used as a key in properties of catalogs to use 
authorization provider in
+   * Gravitino.
+   */
+  String AUTHORIZATION_PROVIDER = "authorization-provider";
+
   /** @return The name of the catalog. */
   String name();
 
diff --git 
a/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java 
b/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java
new file mode 100644
index 0..4271bc7f0
--- /dev/null
+++ b/api/src/main/java/com/datastrato/gravitino/authorization/RoleChange.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.datastrato.gravitino.authorization;
+
+import com.datastrato.gravitino.annotation.Evolving;
+
+/** The RoleChange interface defines the public API for managing roles in an 
authorization. */
+@Evolving
+public interface RoleChange {
+  /**
+   * Create a RoleChange to add a securable object into a role.
+   *
+   * @param securableObject The securable object.
+   * @return return a RoleChang

(gravitino) branch main updated: [#4000] improvement(client-python): Support simple auth for PyGVFS (#4001)

2024-07-09 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 0f18b638a [#4000] improvement(client-python): Support simple auth for 
PyGVFS (#4001)
0f18b638a is described below

commit 0f18b638a2ce2f946bb8fc50ddfed7e88c1b73e4
Author: xloya <982052...@qq.com>
AuthorDate: Wed Jul 10 13:39:31 2024 +0800

[#4000] improvement(client-python): Support simple auth for PyGVFS (#4001)

### What changes were proposed in this pull request?

Support simple auth for gravitino client in PyGVFS. The integration test
depends on this PR: #3876 #3931 . When #3876 #3931 is merged, I will add
integration tests and docs for this PR.

### Why are the changes needed?

Fix: #4000

### How was this patch tested?

Add UTs and ITs.

-

Co-authored-by: xiaojiebao 
---
 clients/client-python/gravitino/filesystem/gvfs.py | 43 +++---
 .../gravitino/filesystem/gvfs_config.py| 29 +++
 .../tests/integration/test_gvfs_with_hdfs.py   | 24 
 .../tests/unittests/test_gvfs_with_local.py| 36 --
 docs/how-to-use-gvfs.md| 43 ++
 5 files changed, 157 insertions(+), 18 deletions(-)

diff --git a/clients/client-python/gravitino/filesystem/gvfs.py 
b/clients/client-python/gravitino/filesystem/gvfs.py
index a50c97f4c..a2b2461b3 100644
--- a/clients/client-python/gravitino/filesystem/gvfs.py
+++ b/clients/client-python/gravitino/filesystem/gvfs.py
@@ -32,8 +32,10 @@ from pyarrow.fs import HadoopFileSystem
 from readerwriterlock import rwlock
 from gravitino.api.catalog import Catalog
 from gravitino.api.fileset import Fileset
+from gravitino.auth.simple_auth_provider import SimpleAuthProvider
 from gravitino.client.gravitino_client import GravitinoClient
 from gravitino.exceptions.base import GravitinoRuntimeException
+from gravitino.filesystem.gvfs_config import GVFSConfig
 from gravitino.name_identifier import NameIdentifier
 
 PROTOCOL_NAME = "gvfs"
@@ -94,15 +96,44 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):
 
 def __init__(
 self,
-server_uri=None,
-metalake_name=None,
-cache_size=20,
-cache_expired_time=3600,
+server_uri: str = None,
+metalake_name: str = None,
+options: Dict = None,
 **kwargs,
 ):
+"""Initialize the GravitinoVirtualFileSystem.
+:param server_uri: Gravitino server URI
+:param metalake_name: Gravitino metalake name
+:param options: Options for the GravitinoVirtualFileSystem
+:param kwargs: Extra args for super filesystem
+"""
 self._metalake = metalake_name
-self._client = GravitinoClient(
-uri=server_uri, metalake_name=metalake_name, check_version=False
+auth_type = (
+GVFSConfig.DEFAULT_AUTH_TYPE
+if options is None
+else options.get(GVFSConfig.AUTH_TYPE, 
GVFSConfig.DEFAULT_AUTH_TYPE)
+)
+if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE:
+self._client = GravitinoClient(
+uri=server_uri,
+metalake_name=metalake_name,
+auth_data_provider=SimpleAuthProvider(),
+)
+else:
+raise GravitinoRuntimeException(
+f"Authentication type {auth_type} is not supported."
+)
+cache_size = (
+GVFSConfig.DEFAULT_CACHE_SIZE
+if options is None
+else options.get(GVFSConfig.CACHE_SIZE, 
GVFSConfig.DEFAULT_CACHE_SIZE)
+)
+cache_expired_time = (
+GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+if options is None
+else options.get(
+GVFSConfig.CACHE_EXPIRED_TIME, 
GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
+)
 )
 self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time)
 self._cache_lock = rwlock.RWLockFair()
diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py 
b/clients/client-python/gravitino/filesystem/gvfs_config.py
new file mode 100644
index 0..539b9045a
--- /dev/null
+++ b/clients/client-python/gravitino/filesystem/gvfs_config.py
@@ -0,0 +1,29 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://w

(gravitino) branch main updated: [#3764] improvement(docs): Add user docs for using GVFS in Python (#3931)

2024-07-09 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 14222171d [#3764] improvement(docs): Add user docs for using GVFS in 
Python (#3931)
14222171d is described below

commit 14222171d4be88be9b4b88471140316123751248
Author: xloya <982052...@qq.com>
AuthorDate: Wed Jul 10 11:06:07 2024 +0800

[#3764] improvement(docs): Add user docs for using GVFS in Python (#3931)

### What changes were proposed in this pull request?

Provides documentation for users to use Gravitino Virtual FileSystem in
Python.

### Why are the changes needed?

Fix: #3764

### How was this patch tested?

No code changes, no testing required.

-

Co-authored-by: xiaojiebao 
---
 docs/how-to-use-gvfs.md | 251 +---
 1 file changed, 237 insertions(+), 14 deletions(-)

diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md
index 654c90387..46e0c1b60 100644
--- a/docs/how-to-use-gvfs.md
+++ b/docs/how-to-use-gvfs.md
@@ -11,8 +11,10 @@ directories, with `fileset` you can manage non-tabular data 
through Gravitino. F
 details, you can read [How to manage fileset metadata using 
Gravitino](./manage-fileset-metadata-using-gravitino.md).
 
 To use `Fileset` managed by Gravitino, Gravitino provides a virtual file 
system layer called
-the Gravitino Virtual File System (GVFS) that's built on top of the Hadoop 
Compatible File System
-(HCFS) interface.
+the Gravitino Virtual File System (GVFS):
+* In Java, it's built on top of the Hadoop Compatible File System(HCFS) 
interface.
+* In Python, it's built on top of the 
[fsspec](https://filesystem-spec.readthedocs.io/en/stable/index.html)
+interface.
 
 GVFS is a virtual layer that manages the files and directories in the fileset 
through a virtual
 path, without needing to understand the specific storage details of the 
fileset. You can access
@@ -22,6 +24,12 @@ the files or folders as shown below:
 gvfs://fileset/${catalog_name}/${schema_name}/${fileset_name}/sub_dir/
 ```
 
+In python GVFS, you can also access the files or folders as shown below:
+
+```text
+fileset/${catalog_name}/${schema_name}/${fileset_name}/sub_dir/
+```
+
 Here `gvfs` is the scheme of the GVFS, `fileset` is the root directory of the 
GVFS which can't
 modified, and `${catalog_name}/${schema_name}/${fileset_name}` is the virtual 
path of the fileset.
 You can access the files and folders under this virtual path by concatenating 
a file or folder
@@ -30,14 +38,16 @@ name to the virtual path.
 The usage pattern for GVFS is the same as HDFS or S3. GVFS internally manages
 the path mapping and convert automatically.
 
-## Prerequisites
+## 1. Managing files of Fileset with Java GVFS
+
+### Prerequisites
 
 + A Hadoop environment with HDFS running. GVFS has been tested against
   Hadoop 3.1.0. It is recommended to use Hadoop 3.1.0 or later, but it should 
work with Hadoop 2.
   x. Please create an [issue](https://www.github.com/apache/gravitino/issues) 
if you find any
   compatibility issues.
 
-## Configuration
+### Configuration
 
 | Configuration item| Description  


   | Default value | Required| 
Since version |
 
|---|-|---|-|---|
@@ -94,7 +104,7 @@ You can configure these properties in two ways:
   
 ```
 
-## How to use the Apache Gravitino Virtual File System
+### Usage examples
 
 First make sure to obtain the Gravitino Virtual File System runtime jar, which 
you can get in
 two ways:
@@ -111,7 +121,7 @@ two ways:
./gradlew :clients:filesystem-hadoop3-runtime:build -x test
 ```
 
-### Use GVFS via Hadoop shell command
+ Via Hadoop shell command
 
 You can use the Hadoop shell command to perform operations on the fileset 
storage. For example:
 
@@ -131,7 +141,7 @@ kinit -kt your_kerberos.keytab your_kerbe...@xxx.com
 ./${HADOOP_HOME}/bin/hadoop dfs -ls 
gvfs://fileset/test_catalog/test_schema/test_fileset_1
 ```
 
-### Using the GVFS via Java code
+ Via Java code
 
 You can also perform operations on the files or directories managed by fileset 
through Java code.
 Make sure that your code is using the correct Hadoop environment, and that 
your environment
@@ -150,7 +160,7 @@ FileSystem fs = file

(gravitino) branch main updated: [#4012] improvement(client-python): Refactor Error Handling in client-python (#4093)

2024-07-09 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 9e80cbcf9 [#4012] improvement(client-python): Refactor Error Handling 
in client-python (#4093)
9e80cbcf9 is described below

commit 9e80cbcf9ea8de21eabec9fdb3651d5b376c7b26
Author: noidname01 <55401762+noidnam...@users.noreply.github.com>
AuthorDate: Tue Jul 9 18:01:33 2024 +0800

[#4012] improvement(client-python): Refactor Error Handling in 
client-python (#4093)

### What changes were proposed in this pull request?

* Refactor the error handling structure, each API can implement their
own error handler to raise custom exceptions
* Add unit test for error handler, but unit tests and integration tests
for each API(ex. metalake, catalog, schema) have not been added, I will
create issues for them.
- [ ] Add Metalake Error Handler and related exceptions, test cases
- [ ] Add Catalog Error Handler and related exceptions, test cases
- [ ] Add Schema Error Handler and related exceptions, test cases
- [ ] Add OAuth Error Handler and related exceptions, test cases
* Create `gravitino/exceptions/base.py` to define all the exceptions.
* Remove some unused files and exceptions

### Why are the changes needed?

Fix: #4012

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

UT added and one IT added, test with `./gradlew
clients:client-python:test`

-

Co-authored-by: TimWang 
---
 .../gravitino/catalog/fileset_catalog.py   |  18 +++-
 .../gravitino/client/gravitino_client.py   |  12 ---
 .../gravitino/client/gravitino_client_base.py  |   2 +-
 .../gravitino/client/gravitino_metalake.py |  12 ---
 .../gravitino/client/gravitino_version.py  |   2 +-
 clients/client-python/gravitino/constants/error.py |  71 
 .../gravitino/dto/responses/base_response.py   |   5 +-
 .../gravitino/dto/responses/error_response.py  |  79 ++
 .../gravitino/dto/responses/version_response.py|   4 +-
 clients/client-python/gravitino/exceptions/base.py |  83 ++
 .../exceptions/gravitino_runtime_exception.py  |  25 -
 .../__init__.py}   |   6 --
 .../error_handler.py}  |  26 +++--
 .../exceptions/handlers/fileset_error_handler.py   |  43 
 .../rest_error_handler.py} |  26 +++--
 .../gravitino/exceptions/not_found_exception.py|  24 -
 clients/client-python/gravitino/filesystem/gvfs.py |   2 +-
 clients/client-python/gravitino/name_identifier.py |   4 +-
 clients/client-python/gravitino/namespace.py   |   5 +-
 .../client-python/gravitino/rest/rest_message.py   |   6 --
 .../client-python/gravitino/utils/exceptions.py| 114 
 .../client-python/gravitino/utils/http_client.py   |  75 ++---
 clients/client-python/scripts/generate_version.py  |   2 +-
 .../tests/integration/base_hadoop_env.py   |   2 +-
 .../tests/integration/hdfs_container.py|   2 +-
 .../tests/integration/integration_test_env.py  |   2 +-
 .../tests/integration/test_fileset_catalog.py  |   6 ++
 .../tests/integration/test_gvfs_with_hdfs.py   |   2 +-
 .../tests/unittests/test_error_handler.py  | 120 +
 .../tests/unittests/test_gravitino_version.py  |   2 +-
 .../tests/unittests/test_gvfs_with_local.py|   2 +-
 31 files changed, 531 insertions(+), 253 deletions(-)

diff --git a/clients/client-python/gravitino/catalog/fileset_catalog.py 
b/clients/client-python/gravitino/catalog/fileset_catalog.py
index 82c345411..5ab2e00e6 100644
--- a/clients/client-python/gravitino/catalog/fileset_catalog.py
+++ b/clients/client-python/gravitino/catalog/fileset_catalog.py
@@ -35,6 +35,7 @@ from gravitino.name_identifier import NameIdentifier
 from gravitino.namespace import Namespace
 from gravitino.utils import HTTPClient
 from gravitino.rest.rest_utils import encode_string
+from gravitino.exceptions.handlers.fileset_error_handler import 
FILESET_ERROR_HANDLER
 
 logger = logging.getLogger(__name__)
 
@@ -88,7 +89,10 @@ class FilesetCatalog(BaseSchemaCatalog):
 
 full_namespace = self._get_fileset_full_namespace(namespace)
 
-resp = 
self.rest_client.get(self.format_fileset_request_path(full_namespace))
+resp = self.rest_client.get(
+self.format_fileset_request_path(full_namespace),
+error_handler=FILESET_ERROR_HANDLER,
+)
 entity_list_resp = EntityListResponse.from_json(resp.body, 
infer_missing=True)
 entity_list_resp.validate()
 
@@ -114,7 +118,8 @@ class FilesetC

(gravitino) branch main updated: [#4018] feat(core): Add tag management logic for Tag System (Part 1) (#4019)

2024-07-08 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 178eb37f8 [#4018] feat(core): Add tag management logic for Tag System 
(Part 1) (#4019)
178eb37f8 is described below

commit 178eb37f8b5013abdd2464dda764ddd5b0787f38
Author: Jerry Shao 
AuthorDate: Tue Jul 9 09:35:33 2024 +0800

[#4018] feat(core): Add tag management logic for Tag System (Part 1) (#4019)

### What changes were proposed in this pull request?

This PR tracks the work of adding the core logics for tag management.

### Why are the changes needed?

This is a part of work for adding tag support in Gravitino.

Fix: #4018

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

UTs added.
---
 .../com/datastrato/gravitino/meta/TagEntity.java   |  16 --
 .../gravitino/storage/relational/JDBCBackend.java  |  15 ++
 .../storage/relational/mapper/TagMetaMapper.java   | 175 
 .../mapper/TagMetadataObjectRelMapper.java |  59 
 .../gravitino/storage/relational/po/TagPO.java | 142 ++
 .../relational/service/MetalakeMetaService.java|  22 +-
 .../storage/relational/service/TagMetaService.java | 180 +
 .../session/SqlSessionFactoryHelper.java   |   4 +
 .../storage/relational/utils/POConverters.java |  57 
 .../com/datastrato/gravitino/tag/TagManager.java   | 185 -
 .../com/datastrato/gravitino/meta/TestEntity.java  |  20 --
 .../storage/relational/TestJDBCBackend.java|  37 +++
 .../relational/service/TestTagMetaService.java | 300 +
 .../storage/relational/utils/TestPOConverters.java |  91 +++
 .../datastrato/gravitino/tag/TestTagManager.java   | 248 +
 .../integration/test/util/AbstractIT.java  |   9 +-
 .../relational/service/FilesetMetaServiceIT.java   |   7 +-
 scripts/h2/schema-h2.sql   |  31 ++-
 scripts/mysql/schema-0.5.0-mysql.sql   |   2 +-
 scripts/mysql/schema-0.6.0-mysql.sql   |  31 ++-
 scripts/mysql/upgrade-0.5.0-to-0.6.0-mysql.sql |  29 ++
 21 files changed, 1609 insertions(+), 51 deletions(-)

diff --git a/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java 
b/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java
index b4acaf71f..c6e01ec7e 100644
--- a/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java
+++ b/core/src/main/java/com/datastrato/gravitino/meta/TagEntity.java
@@ -24,7 +24,6 @@ import com.datastrato.gravitino.Auditable;
 import com.datastrato.gravitino.Entity;
 import com.datastrato.gravitino.Field;
 import com.datastrato.gravitino.HasIdentifier;
-import com.datastrato.gravitino.MetadataObject;
 import com.datastrato.gravitino.Namespace;
 import com.datastrato.gravitino.tag.Tag;
 import com.google.common.collect.Maps;
@@ -47,10 +46,6 @@ public class TagEntity implements Tag, Entity, Auditable, 
HasIdentifier {
   public static final Field PROPERTIES =
   Field.optional("properties", Map.class, "The properties of the tag 
entity.");
 
-  public static final Field ASSOCIATED_OBJECTS =
-  Field.optional(
-  "objects", MetadataObject[].class, "The associated objects of the 
tag entity.");
-
   public static final Field AUDIT_INFO =
   Field.required("audit_info", Audit.class, "The audit details of the tag 
entity.");
 
@@ -59,7 +54,6 @@ public class TagEntity implements Tag, Entity, Auditable, 
HasIdentifier {
   private Namespace namespace;
   private String comment;
   private Map properties;
-  private MetadataObject[] objects = null;
   private Audit auditInfo;
 
   private TagEntity() {}
@@ -72,7 +66,6 @@ public class TagEntity implements Tag, Entity, Auditable, 
HasIdentifier {
 fields.put(COMMENT, comment);
 fields.put(PROPERTIES, properties);
 fields.put(AUDIT_INFO, auditInfo);
-fields.put(ASSOCIATED_OBJECTS, objects);
 
 return Collections.unmodifiableMap(fields);
   }
@@ -112,10 +105,6 @@ public class TagEntity implements Tag, Entity, Auditable, 
HasIdentifier {
 return Optional.empty();
   }
 
-  public MetadataObject[] objects() {
-return objects;
-  }
-
   @Override
   public Audit auditInfo() {
 return auditInfo;
@@ -181,11 +170,6 @@ public class TagEntity implements Tag, Entity, Auditable, 
HasIdentifier {
   return this;
 }
 
-public Builder withMetadataObjects(MetadataObject[] objects) {
-  tagEntity.objects = objects;
-  return this;
-}
-
 public Builder withAuditInfo(Audit auditInfo) {
   tagEntity.auditInfo = auditInfo;
   return this;
diff --git 
a/core/src/main/java/com/datastrato/gravitino/storage/relational/JDBCBackend.java
 
b/co

(gravitino) branch main updated: [#3968] improvement(core): Disable KV entity store and optimize CI (#3975)

2024-07-08 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 27ca87535 [#3968] improvement(core): Disable KV entity store and 
optimize CI (#3975)
27ca87535 is described below

commit 27ca87535e30535df2dc8f4108570671cd9e80bd
Author: Qi Yu 
AuthorDate: Mon Jul 8 19:49:18 2024 +0800

[#3968] improvement(core): Disable KV entity store and optimize CI (#3975)

### What changes were proposed in this pull request?

- Disabling support for the KV entity store and adjusting the tests
accordingly.
- Change CI about backend option `jdbcBackend`

### Why are the changes needed?

We are going to deprecate kv entity store

Fix: #3968

### Why are the changes needed?

N/A

### How was this patch tested?

Existing test.
---
 .github/workflows/backend-integration-test.yml |   2 +-
 build.gradle.kts   |   5 +-
 catalogs/catalog-hadoop/build.gradle.kts   |  10 +
 .../hadoop/TestHadoopCatalogOperations.java|  81 ++--
 catalogs/catalog-kafka/build.gradle.kts|  10 +
 .../catalog/kafka/TestKafkaCatalogOperations.java  |  69 ++-
 .../lakehouse/paimon/TestGravitinoPaimonTable.java |  11 ++
 .../lakehouse/paimon/TestPaimonCatalog.java|  13 +-
 .../catalog/lakehouse/paimon/TestPaimonSchema.java |  13 ++
 .../gravitino/filesystem/hadoop/TestGvfsBase.java  |   8 +
 .../java/com/datastrato/gravitino/Configs.java |   2 +-
 .../datastrato/gravitino/EntityStoreFactory.java   |   9 +-
 .../gravitino/storage/TestEntityStorage.java   |  35 +---
 .../storage/kv/TestEntityKeyEncoding.java  |   2 +
 .../gravitino/storage/kv/TestKvEntityStorage.java  |   1 +
 .../storage/kv/TestKvGarbageCollector.java |   2 +
 .../storage/kv/TestKvNameMappingService.java   |   2 +
 .../gravitino/storage/kv/TestRocksDBKvBackend.java |   2 +
 .../gravitino/storage/kv/TestStorageVersion.java   |   2 +
 .../storage/kv/TestTransactionIdGenerator.java |   1 +
 .../storage/kv/TestTransactionalKvBackend.java |   1 +
 .../storage/relational/TestJDBCBackend.java|   9 +-
 .../storage/relational/session/TestSqlSession.java |   8 +-
 core/src/test/resources/h2/schema-h2.sql   | 218 -
 docs/gravitino-server-config.md|   2 +-
 gradle/libs.versions.toml  |   1 +
 .../integration/test/util/AbstractIT.java  |   2 +-
 .../relational/service/FilesetMetaServiceIT.java   |   4 +-
 28 files changed, 242 insertions(+), 283 deletions(-)

diff --git a/.github/workflows/backend-integration-test.yml 
b/.github/workflows/backend-integration-test.yml
index da534f467..437acbd02 100644
--- a/.github/workflows/backend-integration-test.yml
+++ b/.github/workflows/backend-integration-test.yml
@@ -61,7 +61,7 @@ jobs:
 architecture: [linux/amd64]
 java-version: [ 8, 11, 17 ]
 test-mode: [ embedded, deploy ]
-backend: [ jdbcBackend, kvBackend]
+backend: [ mysql, h2]
 env:
   PLATFORM: ${{ matrix.architecture }}
 steps:
diff --git a/build.gradle.kts b/build.gradle.kts
index ec5898921..a29b0405d 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -178,9 +178,8 @@ allprojects {
 
   // Change poll image pause time from 30s to 60s
   param.environment("TESTCONTAINERS_PULL_PAUSE_TIMEOUT", "60")
-  if (project.hasProperty("jdbcBackend")) {
-param.environment("jdbcBackend", "true")
-  }
+  val jdbcDatabase = project.properties["jdbcBackend"] as? String ?: "h2"
+  param.environment("jdbcBackend", jdbcDatabase)
 
   val testMode = project.properties["testMode"] as? String ?: "embedded"
   param.systemProperty("gravitino.log.path", project.buildDir.path + 
"/${project.name}-integration-test.log")
diff --git a/catalogs/catalog-hadoop/build.gradle.kts 
b/catalogs/catalog-hadoop/build.gradle.kts
index ccdf7c996..0dfa23154 100644
--- a/catalogs/catalog-hadoop/build.gradle.kts
+++ b/catalogs/catalog-hadoop/build.gradle.kts
@@ -53,6 +53,7 @@ dependencies {
 
   testImplementation(libs.bundles.log4j)
   testImplementation(libs.mockito.core)
+  testImplementation(libs.mockito.inline)
   testImplementation(libs.mysql.driver)
   testImplementation(libs.junit.jupiter.api)
   testImplementation(libs.junit.jupiter.params)
@@ -101,6 +102,15 @@ tasks {
 }
 
 tasks.test {
+  doFirst {
+val testMode = project.properties["testMode"] as? String ?: "embedded"
+if (testMode == "deploy") {
+  environment("GRAVITINO_HOME", project.rootDir.path + 
"/distribution/package")
+} el

(gravitino) branch main updated: [#4086] Remove Datastrato name and fix support email (#4087)

2024-07-07 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 46f825f8c [#4086] Remove Datastrato name and fix support email (#4087)
46f825f8c is described below

commit 46f825f8c028af75e620ef41268a40c98080c3bc
Author: Justin Mclean 
AuthorDate: Mon Jul 8 13:08:24 2024 +1000

[#4086] Remove Datastrato name and fix support email (#4087)

### What changes were proposed in this pull request?

Remove Datastrato name and fix the support email. Note DockerHub still
needs fixing but another issue covers this.

### Why are the changes needed?

As we are now an ASF project.

Fix: #4086

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

Built and tested locally.
---
 .asf.yaml | 2 +-
 .github/CONTRIBUTING  | 4 ++--
 build.gradle.kts  | 6 +++---
 clients/client-python/setup.py| 4 ++--
 dev/docker/doris/Dockerfile   | 2 +-
 dev/docker/gravitino/Dockerfile   | 2 +-
 dev/docker/hive/Dockerfile| 2 +-
 dev/docker/kerberos-hive/Dockerfile   | 2 +-
 dev/docker/ranger/Dockerfile  | 2 +-
 dev/docker/trino/Dockerfile   | 2 +-
 docs/glossary.md  | 2 +-
 docs/how-to-sign-releases.md  | 6 +++---
 docs/how-to-use-the-playground.md | 8 
 .../gravitino/integration/test/web/ui/MetalakePageTest.java   | 2 +-
 .../gravitino/integration/test/web/ui/pages/MetalakePage.java | 2 +-
 web/src/app/rootLayout/Footer.js  | 6 +++---
 16 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index 84a019b42..de078eeed 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -17,7 +17,7 @@
 
 github:
   description: World's most powerful open data catalog for building a 
high-performance, geo-distributed and federated metadata lake.
-  homepage: https://datastrato.ai/docs/
+  homepage: https://gravitino.apache.org
   labels:
 - metadata
 - data-catalog
diff --git a/.github/CONTRIBUTING b/.github/CONTRIBUTING
index b65ca92fe..2c8ad3161 100644
--- a/.github/CONTRIBUTING
+++ b/.github/CONTRIBUTING
@@ -72,11 +72,11 @@ We value and appreciate the diverse contributions and ideas 
from the community.
 
 For significant contributions to Gravitino, we require contributors to sign an 
Individual Contributor License Agreement (ICLA). This ensures that the project 
and its community can properly manage and maintain intellectual property rights.
 
-If you plan to make a large contribution, please contact us at 
[jus...@datastrato.com](mailto:jus...@datastrato.com) to discuss the ICLA 
process.
+If you plan to make a large contribution, please contact us at 
[d...@gravitino.apache.org](mailto:d...@gravitino.apache.org) to discuss the 
ICLA process.
 
 ## Contact
 
-If you have any questions or need further assistance, you can reach out to us 
at [jus...@datastrato.com](mailto:jus...@datastrato.com).
+If you have any questions or need further assistance, you can reach out to us 
at [d...@gravitino.apache.org](mailto:d...@gravitino.apache.org).
 
 ## License
 
diff --git a/build.gradle.kts b/build.gradle.kts
index 51d1f2175..ec5898921 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -373,7 +373,7 @@ subprojects {
 pom {
   name.set("Gravitino")
   description.set("Gravitino is a high-performance, geo-distributed 
and federated metadata lake.")
-  url.set("https://datastrato.ai";)
+  url.set("https://gravitino.apache.org";)
   licenses {
 license {
   name.set("The Apache Software License, Version 2.0")
@@ -382,9 +382,9 @@ subprojects {
   }
   developers {
 developer {
-  id.set("The maintainers of Gravitino")
+  id.set("The Gravitino community")
   name.set("support")
-  email.set("d...@datastrato.com")
+  email.set("d...@gravitino.apache.org")
 }
   }
   scm {
diff --git a/clients/client-python/setup.py b/clients/client-python/setup.py
index 48e8af031..02790f1e3 100644
--- a/clients/client-python/setup.py
+++ b/clients/client-python/setup.py
@@ -34,8 +34,8 @@ setup(
 long_description=

(gravitino) branch main updated: [#4064] Fix GitHub and resources (#4070)

2024-07-04 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 34a81df3f [#4064] Fix GitHub and resources (#4070)
34a81df3f is described below

commit 34a81df3f4c47ea50fd64ed5eb413a6187178240
Author: Justin Mclean 
AuthorDate: Fri Jul 5 16:36:19 2024 +1000

[#4064] Fix GitHub and resources (#4070)

### What changes were proposed in this pull request?

Change GitHub and resources to ASF ones. Still to fix are Docker and the
documentation URL. But this can be merged now.

### Why are the changes needed?

As we are now an ASF project.

Fix: #4064

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Built locally with no issues.
---
 CONTRIBUTING.md   |  6 +++---
 build.gradle.kts  |  6 +++---
 clients/client-python/setup.py|  2 +-
 docs/docker-image-details.md  |  2 +-
 docs/getting-started.md   | 13 ++---
 docs/hadoop-catalog.md|  6 +++---
 docs/how-to-build.md  |  4 ++--
 docs/how-to-install.md|  4 ++--
 docs/how-to-use-gvfs.md   |  4 ++--
 docs/how-to-use-python-client.md  | 15 ++-
 docs/how-to-use-the-playground.md |  2 +-
 docs/index.md |  4 ++--
 docs/manage-table-partition-using-gravitino.md|  4 ++--
 docs/publish-docker-images.md |  2 +-
 docs/trino-connector/installation.md  |  4 ++--
 .../com/datastrato/gravitino/server/web/JettyServer.java  |  2 +-
 web/src/app/rootLayout/Footer.js  |  4 ++--
 17 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c4c83b6f6..89a63d79d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -55,13 +55,13 @@ Before you get started, please read and follow these 
guidelines to ensure a smoo
 Either click the "Fork" button at the top right of the repository's page on 
GitHub OR create a fork on your local machine using `git clone`.
 
 ```bash
-git clone https://github.com/datastrato/gravitino.git
+git clone https://github.com/apache/gravitino.git
 cd gravitino
 ```
 
 ### Development Setup
 
-Once you have cloned the [GitHub 
repository](https://github.com/datastrato/gravitino), see [how to 
build](/docs/how-to-build.md) for instructions on how to build, or you can use 
the provided docker images at [Datastrato's DockerHub 
repository](https://hub.docker.com/u/datastrato).
+Once you have cloned the [GitHub 
repository](https://github.com/apache/gravitino), see [how to 
build](/docs/how-to-build.md) for instructions on how to build, or you can use 
the provided docker images at [Datastrato's DockerHub 
repository](https://hub.docker.com/u/datastrato).
 
 To stop and start a local Gravitino server via `bin/gravitino.sh start` and 
`bin/gravitino.sh stop` in a Gravitino distribution, see [how to 
build](/docs/how-to-build.md) for more instructions.
 
@@ -225,7 +225,7 @@ If you have ideas for enhancements or new features, feel 
free to create an issue
 
 ### Good First Issues
 
-If you are new to open source or can't find something to work on, check out 
the [Good First Issues 
list](https://github.com/datastrato/gravitino/contribute).
+If you are new to open source or can't find something to work on, check out 
the [Good First Issues list](https://github.com/apache/gravitino/contribute).
 
 ### Working on Issues
 
diff --git a/build.gradle.kts b/build.gradle.kts
index abba4ee2a..51d1f2175 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -384,12 +384,12 @@ subprojects {
 developer {
   id.set("The maintainers of Gravitino")
   name.set("support")
-  email.set("supp...@datastrato.com")
+  email.set("d...@datastrato.com")
 }
   }
   scm {
-url.set("https://github.com/datastrato/gravitino";)
-connection.set("scm:git:git://github.com/datastrato/gravitino.git")
+url.set("https://github.com/apache/gravitino";)
+connection.set("scm:git:git://github.com/apache/gravitino.git")
   }
 }
   }
diff --git a/clients/client-python/setup.py b/clients/client-python/setup.py
index d812e593c..48e8af031 100644
--- a/clients/client-python/setup.py
+++ b/client

(gravitino) branch main updated: [#3760] improvement(client-python): Add Docker env and PyGVFS Integration tests (#3876)

2024-07-04 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 4312b632b [#3760] improvement(client-python): Add Docker env and 
PyGVFS Integration tests (#3876)
4312b632b is described below

commit 4312b632b225b5a5a2ea6e0f0bdcbdd3092a1db8
Author: xloya <982052...@qq.com>
AuthorDate: Fri Jul 5 11:26:27 2024 +0800

[#3760] improvement(client-python): Add Docker env and PyGVFS Integration 
tests (#3876)

### What changes were proposed in this pull request?

Add Hive Docker env for client-python, and add integration tests for
PyGVFS + HDFS. Depends on #3528.

### Why are the changes needed?

Fix: #3760

### How was this patch tested?

Add some ITs.

-

Co-authored-by: xiaojiebao 
---
 .github/workflows/python-integration-test.yml  |   2 +-
 clients/client-python/build.gradle.kts |  55 +-
 clients/client-python/requirements-dev.txt |   3 +-
 .../tests/integration/base_hadoop_env.py   | 101 +++
 .../tests/integration/hdfs_container.py| 158 +
 .../tests/integration/integration_test_env.py  |  86 +++
 .../tests/integration/test_gvfs_with_hdfs.py   | 704 +
 .../tests/integration/test_simple_auth_client.py   |   4 +-
 8 files changed, 1098 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/python-integration-test.yml 
b/.github/workflows/python-integration-test.yml
index f2e5fd4ed..a7ffacfd7 100644
--- a/.github/workflows/python-integration-test.yml
+++ b/.github/workflows/python-integration-test.yml
@@ -66,7 +66,7 @@ jobs:
   for pythonVersion in "3.8" "3.9" "3.10" "3.11"
   do
 echo "Use Python version ${pythonVersion} to test the Python 
client."
-./gradlew -PjdkVersion=${{ matrix.java-version }} 
-PpythonVersion=${pythonVersion} :clients:client-python:test
+./gradlew -PjdkVersion=${{ matrix.java-version }} 
-PpythonVersion=${pythonVersion} -PskipDockerTests=false 
:clients:client-python:test
 # Clean Gravitino database to clean test data
 rm -rf ./distribution/package/data
   done
diff --git a/clients/client-python/build.gradle.kts 
b/clients/client-python/build.gradle.kts
index 68cc897e5..2cf83c376 100644
--- a/clients/client-python/build.gradle.kts
+++ b/clients/client-python/build.gradle.kts
@@ -16,12 +16,15 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+import de.undercouch.gradle.tasks.download.Download
+import de.undercouch.gradle.tasks.download.Verify
 import io.github.piyushroshan.python.VenvTask
 import java.net.HttpURLConnection
 import java.net.URL
 
 plugins {
   id("io.github.piyushroshan.python-gradle-miniforge-plugin") version "1.0.0"
+  id("de.undercouch.download") version "5.6.0"
 }
 
 pythonPlugin {
@@ -148,6 +151,10 @@ fun generatePypiProjectHomePage() {
   }
 }
 
+val hadoopVersion = "2.7.3"
+val hadoopPackName = "hadoop-${hadoopVersion}.tar.gz"
+val hadoopDirName = "hadoop-${hadoopVersion}"
+val hadoopDownloadUrl = 
"https://archive.apache.org/dist/hadoop/core/hadoop-${hadoopVersion}/${hadoopPackName}";
 tasks {
   val pipInstall by registering(VenvTask::class) {
 venvExec = "pip"
@@ -173,6 +180,26 @@ tasks {
 workingDir = projectDir.resolve("./tests/integration")
   }
 
+  val build by registering(VenvTask::class) {
+dependsOn(pylint)
+venvExec = "python"
+args = listOf("scripts/generate_version.py")
+  }
+
+  val downloadHadoopPack by registering(Download::class) {
+dependsOn(build)
+onlyIfModified(true)
+src(hadoopDownloadUrl)
+dest(layout.buildDirectory.dir("tmp"))
+  }
+
+  val verifyHadoopPack by registering(Verify::class) {
+dependsOn(downloadHadoopPack)
+src(layout.buildDirectory.file("tmp/${hadoopPackName}"))
+algorithm("MD5")
+checksum("3455bb57e4b4906bbea67b58cca78fa8")
+  }
+
   val integrationTest by registering(VenvTask::class) {
 doFirst {
   gravitinoServer("start")
@@ -181,11 +208,23 @@ tasks {
 venvExec = "coverage"
 args = listOf("run", "--branch", "-m", "unittest")
 workingDir = projectDir.resolve("./tests/integration")
-environment = mapOf(
-  "PROJECT_VERSION" to project.version,
-  "GRAVITINO_HOME" to project.rootDir.path + "/distribution/package",
-  "START_EXTERNAL_GRAVITINO" to "true"
-)
+val dockerTest = project.rootProject.extra["dockerTest"] as? Boolean 

(gravitino) branch main updated: [#4073] Update policies to be in line with ASF policy. (#4080)

2024-07-04 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 2133d95ac [#4073] Update policies to be in line with ASF policy. 
(#4080)
2133d95ac is described below

commit 2133d95ac86918c9345e9a2440fc646c0fbbe0d3
Author: Justin Mclean 
AuthorDate: Fri Jul 5 13:05:58 2024 +1000

[#4073] Update policies to be in line with ASF policy. (#4080)

### What changes were proposed in this pull request?

Update project's current policies to be in line with ASF policy.

### Why are the changes needed?

To comply with ASF policy.

Fix: #4073

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Built locally.
---
 CODE_OF_CONDUCT.md |   2 +-
 CONTRIBUTING.md|   2 +-
 GOVERNANCE.md  |  30 +--
 MAINTAINERS.md | 106 -
 SECURITY.md|  27 +-
 5 files changed, 61 insertions(+), 106 deletions(-)

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index c83f1d12e..7c9052a60 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -58,7 +58,7 @@ Examples of representing our community include using an 
official e-mail address,
 
 ## Enforcement
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be 
reported to the community leaders responsible for enforcement at 
.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be 
reported to the community leaders responsible for enforcement at 
.
 All complaints will be reviewed and investigated promptly and fairly.
 All community leaders are obligated to respect the privacy and security of the 
reporter of any incident.
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 028b782be..c4c83b6f6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -281,7 +281,7 @@ All text files should use macOS/unix style line endings 
(LF) not windows style l
 
 ## Community and communication
 
-Join the [community discourse group](https://gravitino.discourse.group) to 
discuss ideas and seek help. You are also encouraged to use GitHub discussions 
and follow Datastrato on social media to stay updated on project news.
+Join the [community mailing 
list](https://lists.apache.org/list.html?d...@gravitino.apache.org) to discuss 
ideas and seek help and are also encouraged to use GitHub discussions.
 
 ## License
 
diff --git a/GOVERNANCE.md b/GOVERNANCE.md
index 5d28e44b4..37418d608 100644
--- a/GOVERNANCE.md
+++ b/GOVERNANCE.md
@@ -19,32 +19,4 @@
 
 # Governance Policy
 
-This document provides the governance policy for the project. Maintainers 
agree to this policy and to follow all project polices by adding their name to 
the [maintainers.md file](./MAINTAINERS.md).
-
-## 1. Roles
-
-This project includes the following roles.
-
-**1.1. Maintainers**. Maintainers oversee the development, maintenance, and 
updates of the project, and play a role in consensus decision-making. The 
addition or removal of Maintainers requires approval from the existing 
Maintainers.
-
-**1.2. Contributors**. Contributors are individuals who have made 
contributions to the project.
-
-## 2. Decisions
-
-**2.1. Consensus-Based Decision Making**. Decisions in projects are reached 
through consensus. Although unanimous agreement is preferred, it's not required.
-
-## 3. How We Work
-
-**3.1. Openness**. Anyone can participate in the project, and there should be 
minimal barriers to entry.
-
-**3.2. Balance**. The development process should balance the interests of all 
stakeholders.
-
-**3.3. Harmonization**. Good-faith efforts shall be made to resolve any 
conflicts.
-
-## 4. Trademarks
-
-Any names, trademarks or logos of the project may only be used if they 
indicate the project's source.
-
-## 5. Amendments
-
-Amendments to this governance policy may be made by approval of the 
Maintainers.
+The Apache Gravitino project follows the standard [ASF governance 
model](https://www.apache.org/foundation/governance/) and [ASF 
policies](https://www.apache.org/foundation/policies/) and [ASF Incubator 
policies] (https://incubator.apache.org/policy/incubation.html).
\ No newline at end of file
diff --git a/MAINTAINERS.md b/MAINTAINERS.md
index ceb250584..f9b693345 100644
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -17,58 +17,11 @@
   under the License.
 -->
 
-This document lists the maintainers and contributors of the Project.
+Note: This maintainer and contributor tables at the end of this document list 
the maintainers and contributors of the project before it become an ASF project 
and are no longer updated.
 
-# Maintainers
+# Committers
 
-Maintainers may be added once approved by the existing maintainers (see 
[Governance document](GOVERNANCE.md)). By adding your nam

(gravitino) branch main updated: [#4074] Add work in progress disclaimer. (#4076)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new e17a910c9 [#4074] Add work in progress disclaimer. (#4076)
e17a910c9 is described below

commit e17a910c94d5462e705a0870569b1252be14bd50
Author: Justin Mclean 
AuthorDate: Thu Jul 4 16:20:21 2024 +1000

[#4074] Add work in progress disclaimer. (#4076)

### What changes were proposed in this pull request?

Add work in progress disclaimer.

### Why are the changes needed?

Required by ASF incubator policy.

Fix: #4074

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

N/A
---
 DISCLAIMER_WIP.txt | 6 ++
 build.gradle.kts   | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/DISCLAIMER_WIP.txt b/DISCLAIMER_WIP.txt
new file mode 100644
index 0..c285b1214
--- /dev/null
+++ b/DISCLAIMER_WIP.txt
@@ -0,0 +1,6 @@
+Apache Gravitino is an effort undergoing incubation at The Apache Software 
Foundation (ASF), sponsored by the name of Apache TLP sponsor. Incubation is 
required of all newly accepted projects until a further review indicates that 
the infrastructure, communications, and decision-making process have stabilized 
in a manner consistent with other successful ASF projects. While incubation 
status is not necessarily a reflection of the completeness or stability of the 
code, it does indicate that [...]
+
+Some of the incubating project’s releases may not be fully compliant with ASF 
policy and while we have documented the licensing of all code in detail, we 
know that currently our release would:
+- Contains code that may not be compatible with the Apache License
+
+If you are planning to incorporate this work into your product/project, please 
be aware that you will need to conduct a thorough licensing review to determine 
the overall implications of including this work. For the current status of this 
project through the Apache Incubator, visit: 
https://incubator.apache.org/projects/Apache Podling-Name.html
\ No newline at end of file
diff --git a/build.gradle.kts b/build.gradle.kts
index 32e13d72c..abba4ee2a 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -484,6 +484,8 @@ tasks.rat {
 "web/src/lib/icons/svg/**/*.svg",
 "**/LICENSE.*",
 "**/NOTICE.*",
+"DISCLAIMER_WIP.txt",
+"DISCLAIMER.txt",
 "ROADMAP.md",
 "clients/client-python/.pytest_cache/*",
 "clients/client-python/gravitino.egg-info/*",



(gravitino-playground) branch main updated: Update README (#49)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git


The following commit(s) were added to refs/heads/main by this push:
 new 60ba5be  Update README (#49)
60ba5be is described below

commit 60ba5bea17d36c0679e395dfe53e097a88538f8d
Author: Justin Mclean 
AuthorDate: Thu Jul 4 16:11:52 2024 +1000

Update README (#49)

Add ASF disclaimer, trademark attribution and Apache Gravitino where
needed.
---
 README.md | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fec2bbb..863d035 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
 
 ## Playground introduction
 
-The playground is a complete Gravitino Docker runtime environment with `Hive`, 
`HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupter`, and a `Gravitino` server.
+The playground is a complete Apache Gravitino Docker runtime environment with 
`Hive`, `HDFS`, `Trino`, `MySQL`, `PostgreSQL`, `Jupter`, and a `Gravitino` 
server.
 
 Depending on your network and computer, startup time may take 3-5 minutes. 
Once the playground environment has started, you can open 
[http://localhost:8090](http://localhost:8090) in a browser to access the 
Gravitino Web UI.
 
@@ -74,7 +74,7 @@ cd gravitino-playground
 ./launch-playground.sh hive|gravitino|trino|postgresql|mysql|spark|jupyter
 ```
 
-## Experiencing Gravitino with Trino SQL
+## Experiencing Apache Gravitino with Trino SQL
 
 ### Using Trino CLI in Docker Container
 
@@ -223,3 +223,9 @@ select * from catalog_hive.sales.customers
 union
 select * from catalog_iceberg.sales.customers;
 ```
+
+## ASF Incubator disclaimer
+
+Apache Gravitino is an effort undergoing incubation at The Apache Software 
Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of 
all newly accepted projects until a further review indicates that the 
infrastructure, communications, and decision making process have stabilized in 
a manner consistent with other successful ASF projects. While incubation status 
is not necessarily a reflection of the completeness or stability of the code, 
it does indicate that the proje [...]
+
+Apache®, Apache Gravitino™, Apache Hive™, Apache 
Icebergâ„¢, and Apache Sparkâ„¢ are either registered trademarks or 
trademarks of the Apache Software Foundation in the United States and/or other 
countries.



(gravitino-playground) branch main updated: Add ASF headers (#47)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git


The following commit(s) were added to refs/heads/main by this push:
 new 48f1eb0  Add ASF headers (#47)
48f1eb0 is described below

commit 48f1eb0d2ec72674833baf317ffe7bbeaa8eab7f
Author: Justin Mclean 
AuthorDate: Thu Jul 4 16:08:19 2024 +1000

Add ASF headers (#47)

Change file headers to ASF headers where needed
---
 README.md| 18 --
 docker-compose.yaml  | 18 --
 healthcheck/gravitino-healthcheck.sh | 18 --
 healthcheck/trino-healthcheck.sh | 18 --
 init/gravitino/gravitino.conf| 18 --
 init/gravitino/init.sh   | 18 --
 init/hive/init.sh| 18 --
 init/jupyter/init.sh | 18 --
 init/mysql/init.sql  | 18 --
 init/postgres/init.sql   | 18 --
 init/spark/init.sh   | 18 --
 init/spark/spark-defaults.conf   | 18 --
 init/trino/init.sh   | 18 --
 init/trino/init.sql  | 18 --
 launch-playground.sh | 18 --
 15 files changed, 240 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index e638802..fec2bbb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,20 @@
 
 
 ## Playground introduction
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 039d032..7be5912 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,6 +1,20 @@
 #
-# Copyright 2023 Datastrato Pvt Ltd.
-# This software is licensed under the Apache License version 2.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 #
 version: '3.0'
 services:
diff --git a/healthcheck/gravitino-healthcheck.sh 
b/healthcheck/gravitino-healthcheck.sh
index f1f6952..4f9e35f 100755
--- a/healthcheck/gravitino-healthcheck.sh
+++ b/healthcheck/gravitino-healthcheck.sh
@@ -1,7 +1,21 @@
 #!/bin/bash
 #
-# Copyright 2023 Datastrato Pvt Ltd.
-# This software is licensed under the Apache License version 2.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 #
 set -ex
 
diff --git a/healthcheck/trino-healthcheck.sh b/healthcheck/trino-healthcheck.sh
index e4826e2..752c7bb 100755
--- a/healthcheck/trino-healthcheck.sh
+++ b/healthcheck/trino-healthcheck.sh
@@ -1,7 +1,21 @@
 #!/bin/bash
 #
-# Copyright 2023 Datastrato Pvt Ltd.
-# This software is licensed under the Apache License version 2.
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  

(gravitino) branch main updated (5b6d71af8 -> 828658162)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


from 5b6d71af8 [#4007] improvement: Use template to reduce Privileges 
duplicate codes (#4010)
 add 828658162 [#4066] improvment(build): Add dependabots and 
protected_tags (#4067)

No new revisions were added by this update.

Summary of changes:
 .asf.yaml | 4 
 1 file changed, 4 insertions(+)



(gravitino) branch main updated: [MINOR] fix(client-python): fix license header in new python file (#4051)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 0463ea458 [MINOR] fix(client-python): fix license header in new python 
file (#4051)
0463ea458 is described below

commit 0463ea458b78a5a9538962ad1a075f2cc5fcbb85
Author: Shaofeng Shi 
AuthorDate: Wed Jul 3 16:30:08 2024 +0800

[MINOR] fix(client-python): fix license header in new python file (#4051)

### What changes were proposed in this pull request?

fix license header in new python file

### Why are the changes needed?

fix license header in new python file
---
 clients/client-python/gravitino/api/__init__.py|  1 -
 clients/client-python/gravitino/api/audit.py   |  1 -
 clients/client-python/gravitino/api/auditable.py   |  1 -
 clients/client-python/gravitino/api/catalog.py |  1 -
 clients/client-python/gravitino/api/catalog_change.py  |  1 -
 clients/client-python/gravitino/api/fileset.py |  1 -
 clients/client-python/gravitino/api/fileset_change.py  |  1 -
 clients/client-python/gravitino/api/metalake.py|  1 -
 clients/client-python/gravitino/api/metalake_change.py |  1 -
 clients/client-python/gravitino/api/schema.py  |  1 -
 clients/client-python/gravitino/api/schema_change.py   |  1 -
 .../client-python/gravitino/api/supports_schemas.py|  1 -
 clients/client-python/gravitino/auth/__init__.py   |  1 -
 clients/client-python/gravitino/auth/auth_constants.py |  1 -
 .../client-python/gravitino/auth/auth_data_provider.py |  1 -
 .../gravitino/auth/simple_auth_provider.py |  1 -
 clients/client-python/gravitino/catalog/__init__.py|  1 -
 .../gravitino/catalog/base_schema_catalog.py   |  1 -
 .../client-python/gravitino/catalog/fileset_catalog.py |  1 -
 clients/client-python/gravitino/client/__init__.py |  1 -
 .../gravitino/client/gravitino_admin_client.py |  1 -
 .../client-python/gravitino/client/gravitino_client.py |  1 -
 .../gravitino/client/gravitino_client_base.py  |  1 -
 .../gravitino/client/gravitino_metalake.py |  1 -
 .../gravitino/client/gravitino_version.py  |  1 -
 clients/client-python/gravitino/constants/__init__.py  |  1 -
 clients/client-python/gravitino/constants/doc.py   |  1 -
 clients/client-python/gravitino/constants/root.py  |  1 -
 clients/client-python/gravitino/constants/timeout.py   |  1 -
 clients/client-python/gravitino/constants/version.py   |  1 -
 clients/client-python/gravitino/dto/__init__.py|  1 -
 clients/client-python/gravitino/dto/audit_dto.py   |  1 -
 clients/client-python/gravitino/dto/catalog_dto.py |  1 -
 clients/client-python/gravitino/dto/dto_converters.py  |  1 -
 clients/client-python/gravitino/dto/fileset_dto.py |  1 -
 clients/client-python/gravitino/dto/metalake_dto.py|  1 -
 .../client-python/gravitino/dto/requests/__init__.py   |  1 -
 .../gravitino/dto/requests/catalog_create_request.py   |  1 -
 .../gravitino/dto/requests/catalog_update_request.py   |  1 -
 .../gravitino/dto/requests/catalog_updates_request.py  |  1 -
 .../gravitino/dto/requests/fileset_create_request.py   |  1 -
 .../gravitino/dto/requests/fileset_update_request.py   |  1 -
 .../gravitino/dto/requests/fileset_updates_request.py  |  1 -
 .../gravitino/dto/requests/metalake_create_request.py  |  1 -
 .../gravitino/dto/requests/metalake_update_request.py  |  1 -
 .../gravitino/dto/requests/metalake_updates_request.py |  1 -
 .../gravitino/dto/requests/schema_create_request.py|  1 -
 .../gravitino/dto/requests/schema_update_request.py|  1 -
 .../gravitino/dto/requests/schema_updates_request.py   |  1 -
 .../client-python/gravitino/dto/responses/__init__.py  |  1 -
 .../gravitino/dto/responses/base_response.py   |  1 -
 .../gravitino/dto/responses/catalog_list_response.py   |  1 -
 .../gravitino/dto/responses/catalog_response.py|  1 -
 .../gravitino/dto/responses/drop_response.py   |  1 -
 .../gravitino/dto/responses/entity_list_response.py|  1 -
 .../gravitino/dto/responses/fileset_response.py|  1 -
 .../gravitino/dto/responses/metalake_list_response.py  |  1 -
 .../gravitino/dto/responses/metalake_response.py   |  1 -
 .../gravitino/dto/responses/schema_response.py |  1 -
 .../gravitino/dto/responses/version_response.py|  1 -
 clients/client-python/gravitino/dto/schema_dto.py  |  1 -
 clients/client-python/gravitino/dto/version_dto.py |  1 -
 clients/client-python/gravitino/exceptions/__init__.py |  1 -
 .../exceptions/gravitino_runtime_exception.py  |  1 -
 .../exceptions/illegal_name_identifier_exception.py|  1 -
 .../exceptions/illegal_namespace_exception.py  |  1 -
 .../gravitino/exceptions/no_such_metalake_exception.py |  1 -
 .../gravitino

(gravitino) branch main updated: [#4048] Update README with incubator disclaimer (#4049)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new f387a441c [#4048] Update README with incubator disclaimer (#4049)
f387a441c is described below

commit f387a441c8408ae41dabe0e5683fe16cde87a4ef
Author: Justin Mclean 
AuthorDate: Wed Jul 3 18:00:28 2024 +1000

[#4048] Update README with incubator disclaimer (#4049)

### What changes were proposed in this pull request?

Update README with ASF Incubator disclaimer and updated links and used
Apache Gravitino where needed.

### Why are the changes needed?

To comply with ASF policy

Fix: #4048

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A - only changes a text file
---
 README.md | 34 +++---
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 44a8eba49..b7e3a9cc2 100644
--- a/README.md
+++ b/README.md
@@ -17,20 +17,20 @@
   under the License.
 -->
 
-# Gravitino
-
-[![GitHub Actions 
Build](https://github.com/datastrato/gravitino/actions/workflows/build.yml/badge.svg)](https://github.com/datastrato/gravitino/actions/workflows/build.yml)
-[![GitHub Actions Integration 
Test](https://github.com/datastrato/gravitino/actions/workflows/integration-test.yml/badge.svg)](https://github.com/datastrato/gravitino/actions/workflows/integration-test.yml)
-[![License](https://img.shields.io/github/license/datastrato/gravitino)](https://github.com/datastrato/gravitino/blob/main/LICENSE)
-[![Contributors](https://img.shields.io/github/contributors/datastrato/gravitino)](https://github.com/datastrato/gravitino/graphs/contributors)
-[![Release](https://img.shields.io/github/v/release/datastrato/gravitino)](https://github.com/datastrato/gravitino/releases)
-[![Open 
Issues](https://img.shields.io/github/issues-raw/datastrato/gravitino)](https://github.com/datastrato/gravitino/issues)
-[![Last 
Committed](https://img.shields.io/github/last-commit/datastrato/gravitino)](https://github.com/datastrato/gravitino/commits/main/)
+# Apache Gravitino (incubating)
+
+[![GitHub Actions 
Build](https://github.com/apache/gravitino/actions/workflows/build.yml/badge.svg)](https://github.com/apache/gravitino/actions/workflows/build.yml)
+[![GitHub Actions Integration 
Test](https://github.com/apache/gravitino/actions/workflows/integration-test.yml/badge.svg)](https://github.com/apache/gravitino/actions/workflows/integration-test.yml)
+[![License](https://img.shields.io/github/license/apache/gravitino)](https://github.com/apache/gravitino/blob/main/LICENSE)
+[![Contributors](https://img.shields.io/github/contributors/apache/gravitino)](https://github.com/apache/gravitino/graphs/contributors)
+[![Release](https://img.shields.io/github/v/release/apache/gravitino)](https://github.com/apache/gravitino/releases)
+[![Open 
Issues](https://img.shields.io/github/issues-raw/apache/gravitino)](https://github.com/apache/gravitino/issues)
+[![Last 
Committed](https://img.shields.io/github/last-commit/apache/gravitino)](https://github.com/apache/gravitino/commits/main/)
 [![OpenSSF Best 
Practices](https://www.bestpractices.dev/projects/8358/badge)](https://www.bestpractices.dev/projects/8358)
 
 ## Introduction
 
-Gravitino is a high-performance, geo-distributed, and federated metadata lake. 
It manages the metadata directly in different sources, types, and regions. It 
also provides users with unified metadata access for data and AI assets.
+Apache Gravitino is a high-performance, geo-distributed, and federated 
metadata lake. It manages the metadata directly in different sources, types, 
and regions. It also provides users with unified metadata access for data and 
AI assets.
 
 ![Gravitino Architecture](docs/assets/gravitino-architecture.png)
 
@@ -41,7 +41,7 @@ Gravitino aims to provide several key features:
 * Security in one place, centralizing the security for different sources.
 * Built-in data management and data access management.
 
-## Contributing to Gravitino
+## Contributing to Apache Gravitino
 
 Gravitino is open source software available under the Apache 2.0 license. For 
information on how to contribute to Gravitino please see the [Contribution 
guidelines](CONTRIBUTING.md).
 
@@ -49,7 +49,7 @@ Gravitino is open source software available under the Apache 
2.0 license. For in
 
 You can find the latest Gravitino documentation in the [doc folder](docs). 
This README file only contains basic setup instructions.
 
-## Building Gravitino
+## Building Apache Gravitino
 
 You can build Gravitino using Gradle. Currently you can build Gravitino on 
Linux and macOS, Windows isn't supported.
 
@@ -81,7 +81,7 @@ For the details of building and testing Gravitino, please see 
[How to build Grav
 
 ## Qui

(gravitino) branch main updated: [#4040] Update Rat check to ignore Datastrato headers (#4044)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 373f5 [#4040] Update Rat check to ignore Datastrato headers (#4044)
373f5 is described below

commit 373f5613d79db4b62373984ae2e3d1794d1f
Author: Justin Mclean 
AuthorDate: Wed Jul 3 17:42:05 2024 +1000

[#4040] Update Rat check to ignore Datastrato headers (#4044)

### What changes were proposed in this pull request?

No longer accept Datastrato headers.

### Why are the changes needed?

See above.

Fix: #4040

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Note that until all file headers have changed the Rat test will not
pass. I made it a separate PR for easy review.
---
 build.gradle.kts| 21 -
 .../client-python/tests/integration/test_catalog.py | 18 --
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/build.gradle.kts b/build.gradle.kts
index 5c956e7ed..32e13d72c 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -1,6 +1,20 @@
 /*
- * Copyright 2023 Datastrato Pvt Ltd.
- * This software is licensed under the Apache License version 2.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 import com.github.gradle.node.NodeExtension
 import com.github.gradle.node.NodePlugin
@@ -440,9 +454,6 @@ subprojects {
 }
 
 tasks.rat {
-  substringMatcher("DS", "Datastrato", "Copyright 2023 Datastrato Pvt Ltd.")
-  substringMatcher("DS", "Datastrato", "Copyright 2024 Datastrato Pvt Ltd.")
-  approvedLicense("Datastrato")
   approvedLicense("Apache License Version 2.0")
 
   // Set input directory to that of the root project instead of the CWD. This
diff --git a/clients/client-python/tests/integration/test_catalog.py 
b/clients/client-python/tests/integration/test_catalog.py
index 5b08edc23..1535e709c 100644
--- a/clients/client-python/tests/integration/test_catalog.py
+++ b/clients/client-python/tests/integration/test_catalog.py
@@ -1,6 +1,20 @@
 """
-Copyright 2024 Datastrato Pvt Ltd.
-This software is licensed under the Apache License version 2.
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
 """
 
 import logging



(gravitino) branch main updated: [MINOR] [#4037] Upgrade twine version to fix python client deploy task (#4038)

2024-07-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new bee0bb007 [MINOR] [#4037] Upgrade twine version to fix python client 
deploy task (#4038)
bee0bb007 is described below

commit bee0bb00763d9baa781492ce141ffcb3647a88b9
Author: xloya <982052...@qq.com>
AuthorDate: Wed Jul 3 15:08:39 2024 +0800

[MINOR] [#4037] Upgrade twine version to fix python client deploy task 
(#4038)

### What changes were proposed in this pull request?

Currently twine 5.0.0 will cause Python client deploy task failed,
upgrade twine version to fix this problem.

### Why are the changes needed?

Fix: #4037

Co-authored-by: xiaojiebao 
---
 clients/client-python/requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clients/client-python/requirements-dev.txt 
b/clients/client-python/requirements-dev.txt
index 4e1d6b4a8..77387c01c 100644
--- a/clients/client-python/requirements-dev.txt
+++ b/clients/client-python/requirements-dev.txt
@@ -19,7 +19,7 @@ requests==2.32.2
 dataclasses-json==0.6.6
 pylint==3.2.2
 black==24.4.2
-twine==5.1.0
+twine==5.1.1
 coverage==7.5.1
 pandas==2.0.3
 pyarrow==15.0.2



(gravitino) branch main updated: [#4039] Update NOTICE files to ASF norms (#4043)

2024-07-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 30b11ce4a [#4039] Update NOTICE files to ASF norms (#4043)
30b11ce4a is described below

commit 30b11ce4a23d2acb8ab60e5db936034d9b10ff29
Author: Justin Mclean 
AuthorDate: Wed Jul 3 16:18:28 2024 +1000

[#4039] Update NOTICE files to ASF norms (#4043)

### What changes were proposed in this pull request?

Update NOTICE files to comply with ASF policy.

### Why are the changes needed?

To comply with ASF policy.

Fix: #4039

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Built and run non-integration tests locally.
---
 NOTICE | 7 +--
 NOTICE.bin | 8 
 web/NOTICE | 7 +--
 web/NOTICE.bin | 7 +--
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/NOTICE b/NOTICE
index 6900d05a9..3f221e49c 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,8 @@
-Gravitino
-Copyright 2023-2024 Datastrato Pvt Ltd
+Apache Gravitino (incubating)
+Copyright 2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
 
 This product includes software developed at
 Datastrato (https://datastrato.ai).
diff --git a/NOTICE.bin b/NOTICE.bin
index 939c7f67a..5c63b8189 100644
--- a/NOTICE.bin
+++ b/NOTICE.bin
@@ -1,11 +1,11 @@
-Gravitino
-Copyright 2023-2024 Datastrato Pvt Ltd
+Apache Gravitino (incubating)
+Copyright 2024 The Apache Software Foundation
 
 This product includes software developed at
-Datastrato (https://datastrato.ai).
+The Apache Software Foundation (http://www.apache.org/).
 
 This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
+Datastrato (https://datastrato.ai).
 
 The Web UI also has a NOTICE file please see web/NOTICE
 for it's contents.
diff --git a/web/NOTICE b/web/NOTICE
index 4f96647d6..5ddaf26a8 100644
--- a/web/NOTICE
+++ b/web/NOTICE
@@ -1,5 +1,8 @@
-Gravitino
-Copyright 2023 Datastrato Pvt Ltd
+Apache Gravitino (incubating)
+Copyright 2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
 
 This product includes software developed at
 Datastrato (https://datastrato.ai).
diff --git a/web/NOTICE.bin b/web/NOTICE.bin
index 4f96647d6..5ddaf26a8 100644
--- a/web/NOTICE.bin
+++ b/web/NOTICE.bin
@@ -1,5 +1,8 @@
-Gravitino
-Copyright 2023 Datastrato Pvt Ltd
+Apache Gravitino (incubating)
+Copyright 2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
 
 This product includes software developed at
 Datastrato (https://datastrato.ai).



(gravitino) branch main updated (0e59cc854 -> 33ca5812c)

2024-07-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


from 0e59cc854 [#4031] build: Add the asf yaml for Gravitino (#4033)
 add 33ca5812c [#4031][followup] build: Revert the partial protected 
branches (#4047)

No new revisions were added by this update.

Summary of changes:
 .asf.yaml | 6 --
 1 file changed, 6 deletions(-)



(gravitino) branch main updated: [#4031] build: Add the asf yaml for Gravitino (#4033)

2024-07-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 0e59cc854 [#4031] build: Add the asf yaml for Gravitino (#4033)
0e59cc854 is described below

commit 0e59cc854a043d29b19a5622cf4f16691ab18a6d
Author: roryqi 
AuthorDate: Wed Jul 3 12:05:45 2024 +0800

[#4031] build: Add the asf yaml for Gravitino (#4033)

### What changes were proposed in this pull request?
Add the asf yaml for Gravitino

### Why are the changes needed?

Fix: #4031

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
This pull request needs to be verified after mering.

-

Co-authored-by: Jerry Shao 
---
 .asf.yaml | 61 +
 1 file changed, 61 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 0..44c9f9055
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,61 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+github:
+  description: World's most powerful open data catalog for building a 
high-performance, geo-distributed and federated metadata lake.
+  homepage: https://datastrato.ai/docs/
+  labels:
+- metadata
+- data-catalog
+- datalake
+- stratosphere
+- federated-query
+- lakehouse
+- model-catalog
+- metalake
+- skycomputing
+- ai-catalog
+- opendatacatalog
+  features:
+# Enable wiki for documentation
+wiki: false
+# Enable issues management
+issues: true
+# Enable projects for project management boards
+projects: true
+  enabled_merge_buttons:
+squash: true
+merge: false
+rebase: true
+  protected_branches:
+main:
+  required_status_checks:
+strict: true
+  required_pull_request_reviews:
+dismiss_stale_reviews: true
+required_approving_review_count: 1
+branch-*:
+  required_status_checks:
+strict: true
+  required_pull_request_reviews:
+dismiss_stale_reviews: true
+required_approving_review_count: 1
+
+notifications:
+  commits: commits@gravitino.apache.org
+  issues: commits@gravitino.apache.org
+  pullrequests: commits@gravitino.apache.org



(gravitino) branch main updated: [#4032] Fix(CI): Remove some CI actions to make CI back to normal (#4030)

2024-07-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git


The following commit(s) were added to refs/heads/main by this push:
 new 6180bc650 [#4032] Fix(CI): Remove some CI actions to make CI back to 
normal (#4030)
6180bc650 is described below

commit 6180bc650aec6847d625a8551b9a54c0a913eb28
Author: Qi Yu 
AuthorDate: Wed Jul 3 11:37:35 2024 +0800

[#4032] Fix(CI): Remove some CI actions to make CI back to normal (#4030)

### What changes were proposed in this pull request?

Remove `csexton/debugger-action@master` action in the github CI, if
someone wants this functionality, they can add it in his fork repo and
do the debug work the personal account.

### Why are the changes needed?

This action is not allowed in the Apache project.


Fixed: #4032


### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Test in the CI
---
 .github/workflows/backend-integration-test.yml |  4 
 .github/workflows/build.yml| 18 --
 .github/workflows/cron-integration-test.yml|  4 
 .github/workflows/flink-integration-test.yml   |  4 
 .github/workflows/frontend-integration-test.yml|  4 
 .github/workflows/spark-integration-test.yml   |  4 
 docs/how-to-test.md|  1 -
 .../integration/test/web/ui/MetalakePageTest.java  |  5 ++---
 8 files changed, 2 insertions(+), 42 deletions(-)

diff --git a/.github/workflows/backend-integration-test.yml 
b/.github/workflows/backend-integration-test.yml
index aa564ac8d..da534f467 100644
--- a/.github/workflows/backend-integration-test.yml
+++ b/.github/workflows/backend-integration-test.yml
@@ -85,10 +85,6 @@ jobs:
 run: |
   ./gradlew compileDistribution -x test -PjdkVersion=${{ 
matrix.java-version }}
 
-  - name: Setup debug Github Action
-if: ${{ contains(github.event.pull_request.labels.*.name, 'debug 
action') }}
-uses: csexton/debugger-action@master
-
   - name: Free up disk space
 run: |
   dev/ci/util_free_space.sh
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e6a22cf35..cbb9eaffb 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -107,21 +107,3 @@ jobs:
 build/reports
 catalogs/**/*.log
 catalogs/**/*.tar
-
-  - name: Jacoco Report to PR
-id: jacoco
-uses: madrapps/jacoco-report@v1.6.1
-with:
-  paths: ${{ github.workspace 
}}/**/build/reports/jacoco/test/jacocoTestReport.xml
-  token: ${{ secrets.GITHUB_TOKEN }}
-  min-coverage-overall: 40
-  min-coverage-changed-files: 60
-  title: 'Code Coverage Report'
-  debug-mode: false
-  update-comment: true
-  pass-emoji: ':green_circle:'
-  fail-emoji: ':red_circle:'
-  - name: Get the Coverage info
-run: |
-  echo "Total coverage ${{ steps.jacoco.outputs.coverage-overall }}"
-  echo "Changed Files coverage ${{ 
steps.jacoco.outputs.coverage-changed-files }}"
diff --git a/.github/workflows/cron-integration-test.yml 
b/.github/workflows/cron-integration-test.yml
index 4a7c498ee..195e1b6e9 100644
--- a/.github/workflows/cron-integration-test.yml
+++ b/.github/workflows/cron-integration-test.yml
@@ -76,10 +76,6 @@ jobs:
 run: |
   ./gradlew compileDistribution -x test -PjdkVersion=${{ 
matrix.java-version }}
 
-  - name: Setup debug Github Action
-if: ${{ contains(github.event.pull_request.labels.*.name, 'debug 
action') }}
-uses: csexton/debugger-action@master
-
   - name: Free up disk space
 run: |
   dev/ci/util_free_space.sh
diff --git a/.github/workflows/flink-integration-test.yml 
b/.github/workflows/flink-integration-test.yml
index 54141c842..ba7648c72 100644
--- a/.github/workflows/flink-integration-test.yml
+++ b/.github/workflows/flink-integration-test.yml
@@ -79,10 +79,6 @@ jobs:
 run: |
   ./gradlew compileDistribution -x test -PjdkVersion=${{ 
matrix.java-version }}
 
-  - name: Setup debug Github Action
-if: ${{ contains(github.event.pull_request.labels.*.name, 'debug 
action') }}
-uses: csexton/debugger-action@master
-
   - name: Free up disk space
 run: |
   dev/ci/util_free_space.sh
diff --git a/.github/workflows/frontend-integration-test.yml 
b/.github/workflows/frontend-integration-test.yml
index e8925afb7..7b0315e46 100644
--- a/.github/workflows/frontend-integration-test.yml
+++ b/.github/workflows/frontend-integration-test.yml
@@ -82,10 +82,6 @@ jobs:
 run: 

[incubator-uniffle] branch master updated: Rename DISCLAIMER to DISCLAIMER-WIP (#258)

2022-10-10 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new 13f61cd3 Rename DISCLAIMER to DISCLAIMER-WIP (#258)
13f61cd3 is described below

commit 13f61cd35b130928ddb2d1de8bf0605ed005f741
Author: roryqi 
AuthorDate: Tue Oct 11 09:37:34 2022 +0800

Rename DISCLAIMER to DISCLAIMER-WIP (#258)

Co-authored-by: roryqi 
---
 DISCLAIMER | 11 ---
 DISCLAIMER-WIP | 21 +
 pom.xml|  2 +-
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/DISCLAIMER b/DISCLAIMER
deleted file mode 100644
index 3e401182..
--- a/DISCLAIMER
+++ /dev/null
@@ -1,11 +0,0 @@
-Apache Uniffle (incubating) is an effort undergoing incubation at The Apache
-Software Foundation (ASF), sponsored by the Apache Incubator PMC.
-
-Incubation is required of all newly accepted projects until a further review
-indicates that the infrastructure, communications, and decision-making process
-have stabilized in a manner consistent with other successful ASF projects.
-
-While incubation status is not necessarily a reflection of the completeness
-or stability of the code, it does indicate that the project has yet to be
-fully endorsed by the ASF.
-
diff --git a/DISCLAIMER-WIP b/DISCLAIMER-WIP
new file mode 100644
index ..23df370b
--- /dev/null
+++ b/DISCLAIMER-WIP
@@ -0,0 +1,21 @@
+Apache Uniffle (incubating) is an effort undergoing incubation at The Apache
+Software Foundation (ASF), sponsored by the Apache Incubator PMC.
+
+Incubation is required of all newly accepted projects until a further review
+indicates that the infrastructure, communications, and decision-making process
+have stabilized in a manner consistent with other successful ASF projects.
+
+While incubation status is not necessarily a reflection of the completeness
+or stability of the code, it does indicate that the project has yet to be
+fully endorsed by the ASF.
+
+Some of the incubating project’s releases may not be fully compliant with ASF 
policy.
+For example, releases may have incomplete or un-reviewed licensing conditions.
+What follows is a list of issues the project is currently aware of (this list 
is likely to be incomplete):
+
+1. Releases may have incomplete licensing conditions
+
+If you are planning to incorporate this work into your product/project,please 
be aware that
+you will need to conduct a thorough licensing review to determine the overall 
implications of
+including this work.For the current status of this project through the Apache 
Incubator,
+visit: https://incubator.apache.org/projects/uniffle.html
diff --git a/pom.xml b/pom.xml
index b8e57475..c18d1be7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -925,7 +925,7 @@
 
   
 LICENSE
-DISCLAIMER
+DISCLAIMER-WIP
 NOTICE
 **/target/**
 src/test/resources/empty



[incubator-uniffle] branch master updated: Change url of total lines badge in README (#222)

2022-09-15 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new 8be68ab4 Change url of total lines badge in README (#222)
8be68ab4 is described below

commit 8be68ab42de921e36073024c9bd2f08ae4814b23
Author: Kaijie Chen 
AuthorDate: Thu Sep 15 18:49:06 2022 +0800

Change url of total lines badge in README (#222)
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8794e016..4ab67422 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Currently it supports [Apache 
Spark](https://spark.apache.org) and [MapReduce](h
 
 
[![Build](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml/badge.svg?branch=master&event=push)](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml)
 
[![Codecov](https://codecov.io/gh/apache/incubator-uniffle/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/incubator-uniffle)
-[![Total 
Lines](https://img.shields.io/tokei/lines/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle)
+[![](https://sloc.xyz/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle)
 [![Code 
Quality](https://img.shields.io/lgtm/grade/java/github/apache/incubator-uniffle?label=code%20quality)](https://lgtm.com/projects/g/apache/incubator-uniffle/)
 
[![License](https://img.shields.io/github/license/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle/blob/master/LICENSE)
 



[incubator-uniffle] branch master updated: Add more badges in README (#219)

2022-09-15 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new 3b210c0c Add more badges in README (#219)
3b210c0c is described below

commit 3b210c0cf2ea5e9cd23ce759a267c6c5b3eb302d
Author: Kaijie Chen 
AuthorDate: Thu Sep 15 15:51:43 2022 +0800

Add more badges in README (#219)
---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index e3521acc..8794e016 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,9 @@ Currently it supports [Apache 
Spark](https://spark.apache.org) and [MapReduce](h
 
 
[![Build](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml/badge.svg?branch=master&event=push)](https://github.com/apache/incubator-uniffle/actions/workflows/build.yml)
 
[![Codecov](https://codecov.io/gh/apache/incubator-uniffle/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/incubator-uniffle)
+[![Total 
Lines](https://img.shields.io/tokei/lines/github/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle)
+[![Code 
Quality](https://img.shields.io/lgtm/grade/java/github/apache/incubator-uniffle?label=code%20quality)](https://lgtm.com/projects/g/apache/incubator-uniffle/)
+[![License](https://img.shields.io/github/license/apache/incubator-uniffle)](https://github.com/apache/incubator-uniffle/blob/master/LICENSE)
 
 ## Architecture
 ![Rss Architecture](docs/asset/rss_architecture.png)



[incubator-uniffle] branch master updated: Add Notice and DISCLAMER file (#215)

2022-09-14 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new dcdf8ae5 Add Notice and DISCLAMER file (#215)
dcdf8ae5 is described below

commit dcdf8ae55a774adbd5126919868b4fa5376f99ab
Author: frankliee 
AuthorDate: Wed Sep 14 15:25:21 2022 +0800

Add Notice and DISCLAMER file (#215)
---
 DISCLAIMER | 2 +-
 NOTICE | 7 +++
 pom.xml| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/DISCLAIMER b/DISCLAIMER
index 805a8e84..3e401182 100644
--- a/DISCLAIMER
+++ b/DISCLAIMER
@@ -1,4 +1,4 @@
-Apache Uniffle (Incubating) is an effort undergoing incubation at The Apache
+Apache Uniffle (incubating) is an effort undergoing incubation at The Apache
 Software Foundation (ASF), sponsored by the Apache Incubator PMC.
 
 Incubation is required of all newly accepted projects until a further review
diff --git a/NOTICE b/NOTICE
new file mode 100644
index ..2cfb9fb7
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,7 @@
+Apache Uniffle (incubating)
+Copyright 2022 and onwards The Apache Software Foundation.
+
+This product includes software developed at
+The Apache Software Foundation (https://www.apache.org/).
+
+The initial codebase was donated to the ASF by Tencent, copyright 2020-2022.
diff --git a/pom.xml b/pom.xml
index 7db56f06..327d614f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -926,6 +926,7 @@
   
 LICENSE
 DISCLAIMER
+NOTICE
 **/target/**
 src/test/resources/empty
 **/dependency-reduced-pom.xml



[incubator-uniffle-website] branch master updated: Update Slack invitation link (#4)

2022-09-08 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git


The following commit(s) were added to refs/heads/master by this push:
 new e2fb0e5  Update Slack invitation link (#4)
e2fb0e5 is described below

commit e2fb0e5f1ca9c6d42e4b6b862bae2aed3bebd714
Author: Kaijie Chen 
AuthorDate: Thu Sep 8 19:28:40 2022 +0800

Update Slack invitation link (#4)
---
 docusaurus.config.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docusaurus.config.js b/docusaurus.config.js
index a1aaf70..66b86e5 100644
--- a/docusaurus.config.js
+++ b/docusaurus.config.js
@@ -117,7 +117,7 @@ const config = {
 items: [
 {
 label: 'Slack',
-href: 
'https://github.com/apache/incubator-uniffle/issues',
+href: 
'https://join.slack.com/t/the-asf/shared_invite/zt-1fm9561yr-uzTpjqg3jf5nxSJV5AE3KQ',
 },
 {
 label: 'Issue Tracker',



[incubator-uniffle-website] branch master created (now d7af0cf)

2022-08-25 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git


  at d7af0cf  first commit

This branch includes the following new commits:

 new d7af0cf  first commit

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[incubator-uniffle-website] 01/01: first commit

2022-08-25 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle-website.git

commit d7af0cfa6d7a831188b8f6f79c7626ede2e600d9
Author: Jerry Shao 
AuthorDate: Fri Aug 26 11:42:04 2022 +0800

first commit
---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
new file mode 100644
index 000..ecb1a1c
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# incubator-uniffle-website



[incubator-uniffle] branch master updated: [TYPO] Fix misspelled word "integration" (#34)

2022-07-05 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new 49f1a16  [TYPO] Fix misspelled word "integration" (#34)
49f1a16 is described below

commit 49f1a16a3bcf33429307b6326d77f782ec9eb79d
Author: Kaijie Chen 
AuthorDate: Wed Jul 6 10:26:10 2022 +0800

[TYPO] Fix misspelled word "integration" (#34)
---
 integration-test/common/pom.xml   | 2 +-
 integration-test/mr/pom.xml   | 2 +-
 integration-test/spark-common/pom.xml | 2 +-
 integration-test/spark2/pom.xml   | 2 +-
 integration-test/spark3/pom.xml   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml
index deeb403..c4dc048 100644
--- a/integration-test/common/pom.xml
+++ b/integration-test/common/pom.xml
@@ -31,7 +31,7 @@
 rss-integration-common-test
 0.6.0-snapshot
 jar
-Apache Uniffle Intergration Test (Common)
+Apache Uniffle Integration Test (Common)
 
 
 
diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml
index cc9e9c1..2199759 100644
--- a/integration-test/mr/pom.xml
+++ b/integration-test/mr/pom.xml
@@ -30,7 +30,7 @@
 rss-integration-mr-test
 0.6.0-snapshot
 jar
-Apache Uniffle Intergration Test (MapReduce)
+Apache Uniffle Integration Test (MapReduce)
 
 
 
diff --git a/integration-test/spark-common/pom.xml 
b/integration-test/spark-common/pom.xml
index 3a7b56a..42890d3 100644
--- a/integration-test/spark-common/pom.xml
+++ b/integration-test/spark-common/pom.xml
@@ -31,7 +31,7 @@
   rss-integration-spark-common-test
   0.6.0-snapshot
   jar
-  Apache Uniffle Intergration Test (Spark Common)
+  Apache Uniffle Integration Test (Spark Common)
 
   
 
diff --git a/integration-test/spark2/pom.xml b/integration-test/spark2/pom.xml
index 08557d8..c384fda 100644
--- a/integration-test/spark2/pom.xml
+++ b/integration-test/spark2/pom.xml
@@ -31,7 +31,7 @@
   rss-integration-spark2-test
   0.6.0-snapshot
   jar
-  Apache Uniffle Intergration Test (Spark 2)
+  Apache Uniffle Integration Test (Spark 2)
 
   
 
diff --git a/integration-test/spark3/pom.xml b/integration-test/spark3/pom.xml
index c166979..0075522 100644
--- a/integration-test/spark3/pom.xml
+++ b/integration-test/spark3/pom.xml
@@ -31,7 +31,7 @@
 rss-integration-spark3-test
 0.6.0-snapshot
 jar
-Apache Uniffle Intergration Test (Spark 3)
+Apache Uniffle Integration Test (Spark 3)
 
 
 



[incubator-uniffle] branch master updated: Improve asf.yaml to reduce the notifications (#25)

2022-07-05 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new 0d7dfdb  Improve asf.yaml to reduce the notifications (#25)
0d7dfdb is described below

commit 0d7dfdbcc382aee4bdfa6924afd2bfe56d0a0bf5
Author: Saisai Shao 
AuthorDate: Tue Jul 5 15:17:18 2022 +0800

Improve asf.yaml to reduce the notifications (#25)
---
 .asf.yaml | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index bff9c79..5137082 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -23,6 +23,7 @@ github:
 - mapreduce
 - shuffle
 - remote-shuffle-service
+- rss
   features:
 # Enable wiki for documentation
 wiki: true
@@ -43,6 +44,6 @@ github:
 required_approving_review_count: 1
 
   notifications:
-  commits: notificati...@uniffle.apache.org
-  issues: d...@uniffle.apache.org
-  pullrequests: notificati...@uniffle.apache.org
+commits: commits@uniffle.apache.org
+issues: d...@uniffle.apache.org
+pullrequests: iss...@uniffle.apache.org



[incubator-uniffle] branch master updated: Add asf yaml

2022-07-01 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


The following commit(s) were added to refs/heads/master by this push:
 new e5dd0ea  Add asf yaml
e5dd0ea is described below

commit e5dd0eaf1651680420f081b3fc456f1c7be3d316
Author: Jerry Shao 
AuthorDate: Fri Jul 1 15:56:56 2022 +0800

Add asf yaml
---
 .asf.yaml | 48 
 1 file changed, 48 insertions(+)

diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 000..bff9c79
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+github:
+  description: Uniffle is a high performance, general purpose Remote Shuffle 
Service.
+  homepage: https://uniffle.apache.org/
+  labels:
+- spark
+- mapreduce
+- shuffle
+- remote-shuffle-service
+  features:
+# Enable wiki for documentation
+wiki: true
+# Enable issues management
+issues: true
+# Enable projects for project management boards
+projects: true
+  enabled_merge_buttons:
+squash: true
+merge: false
+rebase: false
+  protected_branches:
+master:
+  required_status_checks:
+strict: true
+  required_pull_request_reviews:
+dismiss_stale_reviews: true
+required_approving_review_count: 1
+
+  notifications:
+  commits: notificati...@uniffle.apache.org
+  issues: d...@uniffle.apache.org
+  pullrequests: notificati...@uniffle.apache.org



[incubator-uniffle] branch branch-0.3.0 created (now 1d69058)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch branch-0.3.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 1d69058  [Bugfix] Fix uncorrect index file (#92) (#93)

This branch includes the following new commits:

 new 1d69058  [Bugfix] Fix uncorrect index file (#92) (#93)

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[incubator-uniffle] 01/01: [Bugfix] Fix uncorrect index file (#92) (#93)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.3.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 1d69058c32f8f943e1694cfe182fb19d55943a11
Author: roryqi 
AuthorDate: Tue Mar 8 17:21:55 2022 +0800

[Bugfix] Fix uncorrect index file (#92) (#93)

backport 0.3.0
### What changes were proposed in this pull request?
Modify the method that calculate the offset in the index file.

### Why are the changes needed?
If we don't have this patch, we run 10TB tpcds, query24a will fail.
https://user-images.githubusercontent.com/8159038/157178756-d8a39b3f-0ea6-4864-ac68-ee382a88bb0f.png";>
When we write many data to dataOutputStream, dataOutputStream.size() won't 
increase again. dataOutputStream.size() will
always be Integer.MAX_VALUE.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new uts.

Co-authored-by: roryqi 
---
 .../rss/storage/handler/impl/LocalFileWriter.java   |  6 ++
 .../rss/storage/handler/impl/LocalFileHandlerTest.java  | 17 +
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git 
a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
 
b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
index 10185a4..609db7e 100644
--- 
a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
+++ 
b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
@@ -30,21 +30,19 @@ public class LocalFileWriter implements Closeable {
 
   private DataOutputStream dataOutputStream;
   private FileOutputStream fileOutputStream;
-  private long initSize;
   private long nextOffset;
 
   public LocalFileWriter(File file) throws IOException {
 fileOutputStream = new FileOutputStream(file, true);
 // init fsDataOutputStream
 dataOutputStream = new DataOutputStream(fileOutputStream);
-initSize = file.length();
-nextOffset = initSize;
+nextOffset = file.length();
   }
 
   public void writeData(byte[] data) throws IOException {
 if (data != null && data.length > 0) {
   dataOutputStream.write(data);
-  nextOffset = initSize + dataOutputStream.size();
+  nextOffset = nextOffset + data.length;
 }
   }
 
diff --git 
a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
 
b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
index 32b7ace..846ab20 100644
--- 
a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
+++ 
b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
@@ -39,6 +39,7 @@ import com.tencent.rss.storage.handler.api.ServerReadHandler;
 import com.tencent.rss.storage.handler.api.ShuffleWriteHandler;
 import com.tencent.rss.storage.util.ShuffleStorageUtils;
 import java.io.File;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
@@ -53,6 +54,7 @@ public class LocalFileHandlerTest {
   @Test
   public void writeTest() throws Exception {
 File tmpDir = Files.createTempDir();
+tmpDir.deleteOnExit();
 File dataDir1 = new File(tmpDir, "data1");
 File dataDir2 = new File(tmpDir, "data2");
 String[] basePaths = new String[]{dataDir1.getAbsolutePath(),
@@ -111,6 +113,21 @@ public class LocalFileHandlerTest {
 }
   }
 
+  @Test
+  public void writeBigDataTest() throws IOException  {
+File tmpDir = Files.createTempDir();
+tmpDir.deleteOnExit();
+File writeFile = new File(tmpDir, "writetest");
+LocalFileWriter writer = new LocalFileWriter(writeFile);
+int  size = Integer.MAX_VALUE / 100;
+byte[] data = new byte[size];
+for (int i = 0; i < 200; i++) {
+  writer.writeData(data);
+}
+long totalSize = 200L * size;
+assertEquals(writer.nextOffset(), totalSize);
+  }
+
 
   private void writeTestData(
   ShuffleWriteHandler writeHandler,



[incubator-uniffle] branch branch-0.4.0 created (now 6a4295a)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch branch-0.4.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 6a4295a  upgrade to 0.4.1

No new revisions were added by this update.



[incubator-uniffle] 04/04: [Bugfix] [0.5] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.5.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 55cb16fb6b9f494f657068721ca81c74043a4bb9
Author: roryqi 
AuthorDate: Thu Jun 23 10:52:59 2022 +0800

[Bugfix] [0.5] Fix MR don't have remote storage information when we use 
dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196)

backport 0.5

### What changes were proposed in this pull request?
We should aquire the storageType from extraConf.
### Why are the changes needed?
If we don't have this patch, MR don't work when we use dynamic conf and 
MEMORY_LOCALE_HDFS storageType.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 .../main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
index 7511104..976b03c 100644
--- 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
+++ 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
@@ -180,7 +180,7 @@ public class RssMRAppMaster extends MRAppMaster {
 RssMRUtils.applyDynamicClientConf(extraConf, clusterClientConf);
   }
 
-  String storageType = conf.get(RssMRConfig.RSS_STORAGE_TYPE);
+  String storageType = RssMRUtils.getString(extraConf, conf, 
RssMRConfig.RSS_STORAGE_TYPE);
   RemoteStorageInfo defaultRemoteStorage =
   new RemoteStorageInfo(conf.get(RssMRConfig.RSS_REMOTE_STORAGE_PATH, 
""));
   RemoteStorageInfo remoteStorage = ClientUtils.fetchRemoteStorage(



[incubator-uniffle] 01/04: [Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.5.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 59856687f8e17b20f206815cbcf31bbbaacf4292
Author: roryqi 
AuthorDate: Wed Jun 22 14:50:40 2022 +0800

[Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188)

backport 0.5.0

### What changes were proposed in this pull request?
We need to judge heartbeatExecutorService whether is null when we will stop 
it.

### Why are the changes needed?
#177 pr introduce this problem, when we run Spark applications on our 
cluster, the executor will throw NPE when method `stop` is called.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
index f1f2a36..2970489 100644
--- 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
+++ 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
@@ -370,7 +370,9 @@ public class RssShuffleManager implements ShuffleManager {
 
   @Override
   public void stop() {
-heartBeatScheduledExecutorService.shutdownNow();
+if (heartBeatScheduledExecutorService != null) {
+  heartBeatScheduledExecutorService.shutdownNow();
+}
 threadPoolExecutor.shutdownNow();
 shuffleWriteClient.close();
   }



[incubator-uniffle] 12/17: [Improvement] Move detailed client configuration to individual doc (#201)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 2c1c554bb9a47a25e56164d1af2efa1acff66cd8
Author: frankliee 
AuthorDate: Tue Jun 28 11:02:00 2022 +0800

[Improvement] Move detailed client configuration to individual doc (#201)

 ### What changes were proposed in this pull request?
1.  Put detailed configuration to doc subdirectory.
2. Add doc for client quorum setting.

### Why are the changes needed?
Update doc

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Just doc.
---
 README.md |  22 +--
 docs/client_guide.md  | 148 ++
 docs/coordinator_guide.md |   8 +++
 docs/index.md |   8 +++
 docs/pageA.md |   7 ---
 docs/server_guide.md  |   7 +++
 6 files changed, 173 insertions(+), 27 deletions(-)

diff --git a/README.md b/README.md
index 51a1ed0..eba4fd3 100644
--- a/README.md
+++ b/README.md
@@ -233,27 +233,9 @@ The important configuration is listed as following.
 |rss.server.flush.cold.storage.threshold.size|64M| The threshold of data size 
for LOACALFILE and HDFS if MEMORY_LOCALFILE_HDFS is used|
 
 
-### Spark Client
+### Shuffle Client
 
-|Property Name|Default|Description|
-|---|---|---|
-|spark.rss.writer.buffer.size|3m|Buffer size for single partition data|
-|spark.rss.writer.buffer.spill.size|128m|Buffer size for total partition data|
-|spark.rss.coordinator.quorum|-|Coordinator quorum|
-|spark.rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, 
MEMORY_LOCALFILE_HDFS|
-|spark.rss.client.send.size.limit|16m|The max data size sent to shuffle server|
-|spark.rss.client.read.buffer.size|32m|The max data size read from storage|
-|spark.rss.client.send.threadPool.size|10|The thread size for send shuffle 
data to shuffle server|
-
-
-### MapReduce Client
-
-|Property Name|Default|Description|
-|---|---|---|
-|mapreduce.rss.coordinator.quorum|-|Coordinator quorum|
-|mapreduce.rss.storage.type|-|Supports MEMORY_LOCALFILE, MEMORY_HDFS, 
MEMORY_LOCALFILE_HDFS|
-|mapreduce.rss.client.max.buffer.size|3k|The max buffer size in map side|
-|mapreduce.rss.client.read.buffer.size|32m|The max data size read from storage|
+For more details of advanced configuration, please see [Firestorm Shuffle 
Client 
Guide](https://github.com/Tencent/Firestorm/blob/master/docs/client_guide.md).
 
 ## LICENSE
 
diff --git a/docs/client_guide.md b/docs/client_guide.md
new file mode 100644
index 000..95b960b
--- /dev/null
+++ b/docs/client_guide.md
@@ -0,0 +1,148 @@
+---
+layout: page
+displayTitle: Firestorm Shuffle Client Guide
+title: Firestorm Shuffle Client Guide
+description: Firestorm Shuffle Client Guide
+---
+# Firestorm Shuffle Client Guide
+
+Firestorm is designed as a unified shuffle engine for multiple computing 
frameworks, including Apache Spark and Apache Hadoop.
+Firestorm has provided pluggable client plugins to enable remote shuffle in 
Spark and MapReduce.
+
+## Deploy
+This document will introduce how to deploy Firestorm client plugins with Spark 
and MapReduce.
+
+### Deploy Spark Client Plugin
+
+1. Add client jar to Spark classpath, eg, SPARK_HOME/jars/
+
+   The jar for Spark2 is located in 
/jars/client/spark2/rss-client-X-shaded.jar
+
+   The jar for Spark3 is located in 
/jars/client/spark3/rss-client-X-shaded.jar
+
+2. Update Spark conf to enable Firestorm, eg,
+
+   ```
+   spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
+   spark.rss.coordinator.quorum :1,:1
+   # Note: For Spark2, spark.sql.adaptive.enabled should be false because 
Spark2 doesn't support AQE.
+   ```
+
+### Support Spark Dynamic Allocation
+
+To support spark dynamic allocation with Firestorm, spark code should be 
updated.
+There are 2 patches for spark-2.4.6 and spark-3.1.2 in spark-patches folder 
for reference.
+
+After apply the patch and rebuild spark, add following configuration in spark 
conf to enable dynamic allocation:
+  ```
+  spark.shuffle.service.enabled false
+  spark.dynamicAllocation.enabled true
+  ```
+
+### Deploy MapReduce Client Plugin
+
+1. Add client jar to the classpath of each NodeManager, e.g., 
/share/hadoop/mapreduce/
+
+The jar for MapReduce is located in 
/jars/client/mr/rss-client-mr-X-shaded.jar
+
+2. Update MapReduce conf to enable Firestorm, eg,
+
+   ```
+   
-Dmapreduce.rss.coordinator.quorum=:1,:1
+   
-Dyarn.app.mapreduce.am.command-opts=org.apache.hadoop.mapreduce.v2.app.RssMRAppMaster
+   
-Dmapreduce.job.map.output.collector.class=org.apache.hadoop.mapred.RssMapOutputCollector
+   
-Dmapreduce.job.reduce.shuffle.consumer.plugin.class=org.apache.hadoop.mapreduce.task.reduce.RssShuffle
+   ```
+Note that the RssMRAppMaster will automatically disable slow start (i.e., 
`mapreduce.job.reduce.slowstart.complete

[incubator-uniffle] 02/04: [Doc] Update readme with features like multiple remote storage support etc (#192)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.5.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit af92c1ca1339d3353ba3f80d5d97ee0658977397
Author: Colin 
AuthorDate: Wed Jun 22 17:16:53 2022 +0800

[Doc] Update readme with features like multiple remote storage support etc 
(#192)

### What changes were proposed in this pull request?
Update Readme for latest features, eg, multiple remote storage support, 
dynamic client conf etc.

### Why are the changes needed?
Doc should be updated


### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No need
---
 README.md | 46 ++
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e134f0f..50903ce 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Coordinator will collect status of shuffle server and do the 
assignment for the
 
 Shuffle server will receive the shuffle data, merge them and write to storage.
 
-Depend on different situation, Firestorm supports Memory & Local, Memory & 
Remote Storage(eg, HDFS), Local only, Remote Storage only.
+Depend on different situation, Firestorm supports Memory & Local, Memory & 
Remote Storage(eg, HDFS), Memory & Local & Remote Storage(recommendation for 
production environment).
 
 ## Shuffle Process with Firestorm
 
@@ -74,9 +74,25 @@ rss-xxx.tgz will be generated for deployment
  rss.coordinator.server.heartbeat.timeout 3
  rss.coordinator.app.expired 6
  rss.coordinator.shuffle.nodes.max 5
- rss.coordinator.exclude.nodes.file.path RSS_HOME/conf/exclude_nodes
-   ```
-4. start Coordinator
+ # enable dynamicClientConf, and coordinator will be responsible for most 
of client conf
+ rss.coordinator.dynamicClientConf.enabled true
+ # config the path of client conf
+ rss.coordinator.dynamicClientConf.path /conf/dynamic_client.conf
+ # config the path of excluded shuffle server
+ rss.coordinator.exclude.nodes.file.path /conf/exclude_nodes
+   ```
+4. update /conf/dynamic_client.conf, rss client will get default 
conf from coordinator eg,
+   ```
+# MEMORY_LOCALFILE_HDFS is recommandation for production environment
+rss.storage.type MEMORY_LOCALFILE_HDFS
+# multiple remote storages are supported, and client will get assignment 
from coordinator
+rss.coordinator.remote.storage.path 
hdfs://cluster1/path,hdfs://cluster2/path
+rss.writer.require.memory.retryMax 1200
+rss.client.retry.max 100
+rss.writer.send.check.timeout 60
+rss.client.read.buffer.size 14m
+   ```
+5. start Coordinator
```
 bash RSS_HOME/bin/start-coordnator.sh
```
@@ -90,14 +106,17 @@ rss-xxx.tgz will be generated for deployment
  HADOOP_HOME=
  XMX_SIZE="80g"
```
-3. update RSS_HOME/conf/server.conf, the following demo is for memory + local 
storage only, eg,
+3. update RSS_HOME/conf/server.conf, eg,
```
  rss.rpc.server.port 1
  rss.jetty.http.port 19998
  rss.rpc.executor.size 2000
- rss.storage.type MEMORY_LOCALFILE
+ # it should be configed the same as in coordinator
+ rss.storage.type MEMORY_LOCALFILE_HDFS
  rss.coordinator.quorum :1,:1
+ # local storage path for shuffle server
  rss.storage.basePath /data1/rssdata,/data2/rssdata
+ # it's better to config thread num according to local disk num
  rss.server.flush.thread.alive 5
  rss.server.flush.threadPool.size 10
  rss.server.buffer.capacity 40g
@@ -108,6 +127,10 @@ rss-xxx.tgz will be generated for deployment
  rss.server.preAllocation.expired 12
  rss.server.commit.timeout 60
  rss.server.app.expired.withoutHeartbeat 12
+ # note: the default value of rss.server.flush.cold.storage.threshold.size 
is 64m
+ # there will be no data written to DFS if set it as 100g even 
rss.storage.type=MEMORY_LOCALFILE_HDFS
+ # please set proper value if DFS is used, eg, 64m, 128m.
+ rss.server.flush.cold.storage.threshold.size 100g
```
 4. start Shuffle Server
```
@@ -121,12 +144,11 @@ rss-xxx.tgz will be generated for deployment
 
The jar for Spark3 is located in 
/jars/client/spark3/rss-client-X-shaded.jar
 
-2. Update Spark conf to enable Firestorm, the following demo is for local 
storage only, eg,
+2. Update Spark conf to enable Firestorm, eg,
 
```
spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
spark.rss.coordinator.quorum :1,:1
-   spark.rss.storage.type MEMORY_LOCALFILE
```
 
 ### Support Spark dynamic allocation
@@ -140,17 +162,16 @@ After apply the patch and rebuild spark, add following 
configuration in spark co
   spark.dynamicAllocation.enabled true
   ```
 
-## Deploy MapReduce Client
+### Deploy MapReduce Client
 
 1. 

[incubator-uniffle] branch branch-0.5.0 created (now 55cb16f)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch branch-0.5.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 55cb16f  [Bugfix] [0.5] Fix MR don't have remote storage information 
when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196)

This branch includes the following new commits:

 new 5985668  [Bugfix] [0.5] Fix spark2 executor stop NPE problem (#188)
 new af92c1c  [Doc] Update readme with features like multiple remote 
storage support etc (#192)
 new e049863  upgrade to 0.5.0 (#189)
 new 55cb16f  [Bugfix] [0.5] Fix MR don't have remote storage information 
when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195) (#196)

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[incubator-uniffle] 03/04: upgrade to 0.5.0 (#189)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.5.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit e049863dec022d86a3aa95706c2bb93896a94c4f
Author: roryqi 
AuthorDate: Wed Jun 22 17:17:55 2022 +0800

upgrade to 0.5.0 (#189)

### What changes were proposed in this pull request?
upgrade version number

### Why are the changes needed?
upgrade to 0.5.0

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
no
---
 client-mr/pom.xml | 4 ++--
 client-spark/common/pom.xml   | 4 ++--
 client-spark/spark2/pom.xml   | 4 ++--
 client-spark/spark3/pom.xml   | 4 ++--
 client/pom.xml| 4 ++--
 common/pom.xml| 2 +-
 coordinator/pom.xml   | 2 +-
 integration-test/common/pom.xml   | 4 ++--
 integration-test/mr/pom.xml   | 4 ++--
 integration-test/spark-common/pom.xml | 4 ++--
 integration-test/spark2/pom.xml   | 4 ++--
 integration-test/spark3/pom.xml   | 4 ++--
 internal-client/pom.xml   | 4 ++--
 pom.xml   | 2 +-
 proto/pom.xml | 2 +-
 server/pom.xml| 2 +-
 storage/pom.xml   | 2 +-
 17 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/client-mr/pom.xml b/client-mr/pom.xml
index c15ffba..1dc433e 100644
--- a/client-mr/pom.xml
+++ b/client-mr/pom.xml
@@ -23,13 +23,13 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.5.0
 ../pom.xml
 
 
 com.tencent.rss
 rss-client-mr
-0.5.0-snapshot
+0.5.0
 jar
 
 
diff --git a/client-spark/common/pom.xml b/client-spark/common/pom.xml
index 61c4b1f..fdf3b84 100644
--- a/client-spark/common/pom.xml
+++ b/client-spark/common/pom.xml
@@ -25,12 +25,12 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
 
 
 rss-client-spark-common
-0.5.0-snapshot
+0.5.0
 jar
 
 
diff --git a/client-spark/spark2/pom.xml b/client-spark/spark2/pom.xml
index 41a4432..bef2028 100644
--- a/client-spark/spark2/pom.xml
+++ b/client-spark/spark2/pom.xml
@@ -24,13 +24,13 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
   
 
   com.tencent.rss
   rss-client-spark2
-  0.5.0-snapshot
+  0.5.0
   jar
 
   
diff --git a/client-spark/spark3/pom.xml b/client-spark/spark3/pom.xml
index 5674613..acc4fd7 100644
--- a/client-spark/spark3/pom.xml
+++ b/client-spark/spark3/pom.xml
@@ -24,13 +24,13 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
 
 
 com.tencent.rss
 rss-client-spark3
-0.5.0-snapshot
+0.5.0
 jar
 
 
diff --git a/client/pom.xml b/client/pom.xml
index e6134ce..a6ebf91 100644
--- a/client/pom.xml
+++ b/client/pom.xml
@@ -24,12 +24,12 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.5.0
   
 
   com.tencent.rss
   rss-client
-  0.5.0-snapshot
+  0.5.0
   jar
 
   
diff --git a/common/pom.xml b/common/pom.xml
index b4b65f8..6bf0143 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -22,7 +22,7 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.5.0
 ../pom.xml
   
 
diff --git a/coordinator/pom.xml b/coordinator/pom.xml
index e860a50..ceefda3 100644
--- a/coordinator/pom.xml
+++ b/coordinator/pom.xml
@@ -24,7 +24,7 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.5.0
 ../pom.xml
   
 
diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml
index 2a759a4..773f383 100644
--- a/integration-test/common/pom.xml
+++ b/integration-test/common/pom.xml
@@ -24,13 +24,13 @@
 
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
 
 
 com.tencent.rss
 rss-integration-common-test
-0.5.0-snapshot
+0.5.0
 jar
 
 
diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml
index 489ffd5..4879eea 100644
--- a/integration-test/mr/pom.xml
+++ b/integration-test/mr/pom.xml
@@ -22,14 +22,14 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
 
 4.0.0
 
 com.tencent.rss
 rss-integration-mr-test
-0.5.0-snapshot
+0.5.0
 jar
 
 
diff --git a/integration-test/spark-common/pom.xml 
b/integration-test/spark-common/pom.xml
index 284ca2b..f82e915 100644
--- a/integration-test/spark-common/pom.xml
+++ b/integration-test/spark-common/pom.xml
@@ -23,14 +23,14 @@
   
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.5.0
 ../../pom.xml
   
   4.0.0
 
   com.tencent.rss
   rss-integration-spark-common-test
-  0.5.0-snapshot
+  0.5.0
   jar
 
   
diff --git

[incubator-uniffle] 15/17: [Minor] Make clearResourceThread and processEventThread daemon (#207)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit ba47aa017f67e681af7c311c4ef8578eef740d4b
Author: Zhen Wang <643348...@qq.com>
AuthorDate: Thu Jun 30 14:56:54 2022 +0800

[Minor] Make clearResourceThread and processEventThread daemon (#207)

### What changes were proposed in this pull request?
Make clearResourceThread daemon and processEventThread daemon.

### Why are the changes needed?
`clearResourceThread` and `processEventThread` never exits, we can make it 
daemon.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Nod
---
 .../java/com/tencent/rss/server/ShuffleFlushManager.java | 12 
 .../main/java/com/tencent/rss/server/ShuffleTaskManager.java |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java 
b/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java
index e246b02..be941ac 100644
--- a/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java
+++ b/server/src/main/java/com/tencent/rss/server/ShuffleFlushManager.java
@@ -29,6 +29,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Queues;
 import com.google.common.collect.RangeMap;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.google.common.util.concurrent.Uninterruptibles;
 import org.apache.hadoop.conf.Configuration;
 import org.roaringbitmap.longlong.Roaring64NavigableMap;
@@ -60,7 +61,6 @@ public class ShuffleFlushManager {
   private Map>> 
handlers = Maps.newConcurrentMap();
   // appId -> shuffleId -> committed shuffle blockIds
   private Map> committedBlockIds = 
Maps.newConcurrentMap();
-  private Runnable processEventThread;
   private final int retryMax;
 
   private final StorageManager storageManager;
@@ -84,11 +84,12 @@ public class ShuffleFlushManager {
 BlockingQueue waitQueue = 
Queues.newLinkedBlockingQueue(waitQueueSize);
 int poolSize = 
shuffleServerConf.getInteger(ShuffleServerConf.SERVER_FLUSH_THREAD_POOL_SIZE);
 long keepAliveTime = 
shuffleServerConf.getLong(ShuffleServerConf.SERVER_FLUSH_THREAD_ALIVE);
-threadPoolExecutor = new ThreadPoolExecutor(poolSize, poolSize, 
keepAliveTime, TimeUnit.SECONDS, waitQueue);
+threadPoolExecutor = new ThreadPoolExecutor(poolSize, poolSize, 
keepAliveTime, TimeUnit.SECONDS, waitQueue,
+new 
ThreadFactoryBuilder().setDaemon(true).setNameFormat("FlushEventThreadPool").build());
 storageBasePaths = 
shuffleServerConf.getString(ShuffleServerConf.RSS_STORAGE_BASE_PATH).split(",");
 pendingEventTimeoutSec = 
shuffleServerConf.getLong(ShuffleServerConf.PENDING_EVENT_TIMEOUT_SEC);
 // the thread for flush data
-processEventThread = () -> {
+Runnable processEventRunnable = () -> {
   while (true) {
 try {
   ShuffleDataFlushEvent event = flushQueue.take();
@@ -103,7 +104,10 @@ public class ShuffleFlushManager {
 }
   }
 };
-new Thread(processEventThread).start();
+Thread processEventThread = new Thread(processEventRunnable);
+processEventThread.setName("ProcessEventThread");
+processEventThread.setDaemon(true);
+processEventThread.start();
 // todo: extract a class named Service, and support stop method
 Thread thread = new Thread("PendingEventProcessThread") {
   @Override
diff --git 
a/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java 
b/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java
index e847779..fc37a19 100644
--- a/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java
+++ b/server/src/main/java/com/tencent/rss/server/ShuffleTaskManager.java
@@ -123,6 +123,7 @@ public class ShuffleTaskManager {
 };
 Thread thread = new Thread(clearResourceThread);
 thread.setName("clearResourceThread");
+thread.setDaemon(true);
 thread.start();
   }
 



[incubator-uniffle] 10/17: [MINOR] Close clusterManager resources (#202)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 8b5f363fa296312042130b73c8dd8f5a15b5e0ae
Author: Junfan Zhang 
AuthorDate: Mon Jun 27 17:34:13 2022 +0800

[MINOR] Close clusterManager resources (#202)

### What changes were proposed in this pull request?
1. Change the method of shutdown to close
2. Close resources of clustermanager in test cases

### Why are the changes needed?
Close resources to reduce the resource occupying in test cases.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test cases
---
 .../java/com/tencent/rss/coordinator/ClusterManager.java|  5 ++---
 .../java/com/tencent/rss/coordinator/CoordinatorServer.java |  2 +-
 .../com/tencent/rss/coordinator/SimpleClusterManager.java   | 10 --
 .../rss/coordinator/BasicAssignmentStrategyTest.java|  5 -
 .../coordinator/PartitionBalanceAssignmentStrategyTest.java |  4 +++-
 .../tencent/rss/coordinator/SimpleClusterManagerTest.java   | 13 +++--
 .../test/java/com/tencent/rss/test/CoordinatorGrpcTest.java |  1 +
 7 files changed, 30 insertions(+), 10 deletions(-)

diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java 
b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java
index 4249a03..9f5915e 100644
--- a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java
+++ b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManager.java
@@ -18,10 +18,11 @@
 
 package com.tencent.rss.coordinator;
 
+import java.io.Closeable;
 import java.util.List;
 import java.util.Set;
 
-public interface ClusterManager {
+public interface ClusterManager extends Closeable {
 
   /**
* Add a server to the cluster.
@@ -49,6 +50,4 @@ public interface ClusterManager {
   List list();
 
   int getShuffleNodesMax();
-
-  void shutdown();
 }
diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java 
b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
index 7ba7e1c..3b79221 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
@@ -94,7 +94,7 @@ public class CoordinatorServer {
   jettyServer.stop();
 }
 if (clusterManager != null) {
-  clusterManager.shutdown();
+  clusterManager.close();
 }
 if (accessManager != null) {
   accessManager.close();
diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
index 10af74d..fcfd1dc 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
@@ -21,6 +21,7 @@ package com.tencent.rss.coordinator;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -186,8 +187,13 @@ public class SimpleClusterManager implements 
ClusterManager {
   }
 
   @Override
-  public void shutdown() {
-scheduledExecutorService.shutdown();
+  public void close() throws IOException {
+if (scheduledExecutorService != null) {
+  scheduledExecutorService.shutdown();
+}
+if (checkNodesExecutorService != null) {
+  checkNodesExecutorService.shutdown();
+}
   }
 
   @Override
diff --git 
a/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java
 
b/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java
index 97afabf..7a95d76 100644
--- 
a/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java
+++ 
b/coordinator/src/test/java/com/tencent/rss/coordinator/BasicAssignmentStrategyTest.java
@@ -24,6 +24,8 @@ import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import com.google.common.collect.Sets;
 import com.tencent.rss.common.PartitionRange;
+
+import java.io.IOException;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -49,8 +51,9 @@ public class BasicAssignmentStrategyTest {
   }
 
   @AfterEach
-  public void tearDown() {
+  public void tearDown() throws IOException {
 clusterManager.clear();
+clusterManager.close();
   }
 
   @Test
diff --git 
a/coordinator/src/test/java/com/tencent/rss/coordinator/PartitionBalanceAssignmentStrategyTest.java
 
b/coordinator/src/test/java/com/tencent/rss/coordinator/PartitionBalanceAssignmentStrategyTest.java
index 018aa62..9ca4146 100644
--- 
a/coordinator/src/test/java/com/tencent/rss

[incubator-uniffle] 16/17: Support using remote fs path to specify the excludeNodesFilePath (#200)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 5ec04b89348ca9c28c9ddce571ffa528969d2f8a
Author: Junfan Zhang 
AuthorDate: Thu Jun 30 19:12:36 2022 +0800

Support using remote fs path to specify the excludeNodesFilePath (#200)

What changes were proposed in this pull request?
Support using remote fs path to specify the excludeNodesFilePath

Why are the changes needed?
When existing two coordinators serving for online, we hope they can read 
the consistent exclude nodes file insteading of using the localfile syncing 
manually.

Does this PR introduce any user-facing change?
Yes. It's an incompatible change.

When the default fs is HDFS in the core-site.xml, and the excludeFilePath 
is specified to "/user/x" in coordinator server.
After applied this patch, filesystem will be initialized to remote HDFS due 
to lacking scheme.

How was this patch tested?
Unit tests.
---
 .../rss/coordinator/ClusterManagerFactory.java | 10 +++-
 .../tencent/rss/coordinator/CoordinatorServer.java |  2 +-
 .../rss/coordinator/SimpleClusterManager.java  | 68 +-
 .../coordinator/BasicAssignmentStrategyTest.java   |  6 +-
 .../PartitionBalanceAssignmentStrategyTest.java|  6 +-
 .../rss/coordinator/SimpleClusterManagerTest.java  | 13 +++--
 6 files changed, 63 insertions(+), 42 deletions(-)

diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java
 
b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java
index 2ec2b12..b2723f9 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/ClusterManagerFactory.java
@@ -18,15 +18,19 @@
 
 package com.tencent.rss.coordinator;
 
+import org.apache.hadoop.conf.Configuration;
+
 public class ClusterManagerFactory {
 
   CoordinatorConf conf;
+  Configuration hadoopConf;
 
-  public ClusterManagerFactory(CoordinatorConf conf) {
+  public ClusterManagerFactory(CoordinatorConf conf, Configuration hadoopConf) 
{
 this.conf = conf;
+this.hadoopConf = hadoopConf;
   }
 
-  public ClusterManager getClusterManager() {
-return new SimpleClusterManager(conf);
+  public ClusterManager getClusterManager() throws Exception {
+return new SimpleClusterManager(conf, hadoopConf);
   }
 }
diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java 
b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
index 3b79221..2dbe06f 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/CoordinatorServer.java
@@ -111,7 +111,7 @@ public class CoordinatorServer {
 registerMetrics();
 this.applicationManager = new ApplicationManager(coordinatorConf);
 
-ClusterManagerFactory clusterManagerFactory = new 
ClusterManagerFactory(coordinatorConf);
+ClusterManagerFactory clusterManagerFactory = new 
ClusterManagerFactory(coordinatorConf, new Configuration());
 this.clusterManager = clusterManagerFactory.getClusterManager();
 this.clientConfManager = new ClientConfManager(coordinatorConf, new 
Configuration(), applicationManager);
 AssignmentStrategyFactory assignmentStrategyFactory =
diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
index fcfd1dc..972ea5f 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
@@ -19,9 +19,10 @@
 package com.tencent.rss.coordinator;
 
 import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -36,6 +37,10 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -52,8 +57,9 @@ public class SimpleClusterManager implements ClusterManager {
   private int shuffleNodesMax;
   private ScheduledExecutorService scheduledExecutorService;
   private ScheduledExecutorService checkNodesExecutorService;
+  private FileSystem hadoopFileSystem;
 
-  public Simpl

[incubator-uniffle] 13/17: [Improvement] Add RSS_IP environment variable support for K8S (#204)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 6937631876052425b8d808d26caf78c79b24536a
Author: roryqi 
AuthorDate: Wed Jun 29 10:06:31 2022 +0800

[Improvement] Add RSS_IP environment variable support for K8S (#204)

### What changes were proposed in this pull request?
Method `getHostIp` can acquire IP by environment variable.

### Why are the changes needed?
For K8S, there are too many IPs, it's hard to decide which we should use. 
So we use the environment variable to tell RSS to use which one.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
UT
---
 .../java/com/tencent/rss/common/util/RssUtils.java | 10 +
 .../com/tencent/rss/common/util/RssUtilsTest.java  | 26 ++
 2 files changed, 36 insertions(+)

diff --git a/common/src/main/java/com/tencent/rss/common/util/RssUtils.java 
b/common/src/main/java/com/tencent/rss/common/util/RssUtils.java
index 1b7200e..7ecae6b 100644
--- a/common/src/main/java/com/tencent/rss/common/util/RssUtils.java
+++ b/common/src/main/java/com/tencent/rss/common/util/RssUtils.java
@@ -41,6 +41,7 @@ import java.util.Map;
 import java.util.Properties;
 
 import com.google.common.collect.Lists;
+import com.google.common.net.InetAddresses;
 import org.roaringbitmap.longlong.Roaring64NavigableMap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -102,6 +103,15 @@ public class RssUtils {
   // loop back, etc.). If the network interface in the machine is more than 
one, we
   // will choose the first IP.
   public static String getHostIp() throws Exception {
+// For K8S, there are too many IPs, it's hard to decide which we should 
use.
+// So we use the environment variable to tell RSS to use which one.
+String ip = System.getenv("RSS_IP");
+if (ip != null) {
+  if (!InetAddresses.isInetAddress(ip)) {
+throw new RuntimeException("Environment RSS_IP: " + ip + " is wrong 
format");
+  }
+  return ip;
+}
 Enumeration nif = 
NetworkInterface.getNetworkInterfaces();
 String siteLocalAddress = null;
 while (nif.hasMoreElements()) {
diff --git a/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java 
b/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java
index 95fd55f..220cb5c 100644
--- a/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java
+++ b/common/src/test/java/com/tencent/rss/common/util/RssUtilsTest.java
@@ -18,6 +18,7 @@
 
 package com.tencent.rss.common.util;
 
+import java.lang.reflect.Field;
 import java.net.InetAddress;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
@@ -62,6 +63,18 @@ public class RssUtilsTest {
   if (!address.equals("127.0.0.1")) {
 assertEquals(address, realIp);
   }
+  setEnv("RSS_IP", "8.8.8.8");
+  assertEquals("8.8.8.8", RssUtils.getHostIp());
+  setEnv("RSS_IP", "");
+  boolean isException = false;
+  try {
+RssUtils.getHostIp();
+  } catch (Exception e) {
+isException = true;
+  }
+  setEnv("RSS_IP", realIp);
+  RssUtils.getHostIp();
+  assertTrue(isException);
 } catch (Exception e) {
   fail(e.getMessage());
 }
@@ -185,6 +198,19 @@ public class RssUtilsTest {
 }
   }
 
+  public static void setEnv(String key, String value) {
+try {
+  Map env = System.getenv();
+  Class cl = env.getClass();
+  Field field = cl.getDeclaredField("m");
+  field.setAccessible(true);
+  Map writableEnv = (Map) field.get(env);
+  writableEnv.put(key, value);
+} catch (Exception e) {
+  throw new IllegalStateException("Failed to set environment variable", e);
+}
+  }
+
   public static class RssUtilTestDummySuccess implements RssUtilTestDummy {
 private final String s;
 



[incubator-uniffle] 07/17: [Minor] Remove serverNode from tags structure when heartbeart timeout (#193)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit d92208ddb1edca13fcb6cb31a8980b2052f29d7b
Author: Junfan Zhang 
AuthorDate: Thu Jun 23 15:30:19 2022 +0800

[Minor] Remove serverNode from tags structure when heartbeart timeout (#193)

### What changes were proposed in this pull request?
Remove serverNode from tags structure when heartbeart timeout

### Why are the changes needed?
Remove serverNode from tags structure when heartbeart timeout

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT
---
 .../com/tencent/rss/coordinator/ServerNode.java|  7 ++
 .../rss/coordinator/SimpleClusterManager.java  |  9 ++--
 .../rss/coordinator/SimpleClusterManagerTest.java  | 27 ++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java 
b/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java
index ef09298..816f080 100644
--- a/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java
+++ b/coordinator/src/main/java/com/tencent/rss/coordinator/ServerNode.java
@@ -115,6 +115,13 @@ public class ServerNode implements Comparable {
 + ", healthy[" + isHealthy + "]";
   }
 
+  /**
+   * Only for test case
+   */
+  void setTimestamp(long timestamp) {
+this.timestamp = timestamp;
+  }
+
   @Override
   public int compareTo(ServerNode other) {
 if (availableMemory > other.getAvailableMemory()) {
diff --git 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
index d3fe789..10af74d 100644
--- 
a/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
+++ 
b/coordinator/src/main/java/com/tencent/rss/coordinator/SimpleClusterManager.java
@@ -72,7 +72,7 @@ public class SimpleClusterManager implements ClusterManager {
 }
   }
 
-  private void nodesCheck() {
+  void nodesCheck() {
 try {
   long timestamp = System.currentTimeMillis();
   Set deleteIds = Sets.newHashSet();
@@ -83,7 +83,12 @@ public class SimpleClusterManager implements ClusterManager {
 }
   }
   for (String serverId : deleteIds) {
-servers.remove(serverId);
+ServerNode sn = servers.remove(serverId);
+if (sn != null) {
+  for (Set nodesWithTag : tagToNodes.values()) {
+nodesWithTag.remove(sn);
+  }
+}
   }
 
   CoordinatorMetrics.gaugeTotalServerNum.set(servers.size());
diff --git 
a/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java
 
b/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java
index a5040bf..bed9081 100644
--- 
a/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java
+++ 
b/coordinator/src/test/java/com/tencent/rss/coordinator/SimpleClusterManagerTest.java
@@ -27,6 +27,7 @@ import java.util.Set;
 
 import com.google.common.collect.Sets;
 import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
@@ -142,6 +143,32 @@ public class SimpleClusterManagerTest {
 assertEquals(0, serverNodes.size());
   }
 
+  @Test
+  public void testGetCorrectServerNodesWhenOneNodeRemoved() {
+CoordinatorConf ssc = new CoordinatorConf();
+ssc.setLong(CoordinatorConf.COORDINATOR_HEARTBEAT_TIMEOUT, 30 * 1000L);
+SimpleClusterManager clusterManager = new SimpleClusterManager(ssc);
+ServerNode sn1 = new ServerNode("sn1", "ip", 0, 100L, 50L, 20,
+10, testTags, true);
+ServerNode sn2 = new ServerNode("sn2", "ip", 0, 100L, 50L, 21,
+10, testTags, true);
+ServerNode sn3 = new ServerNode("sn3", "ip", 0, 100L, 50L, 20,
+11, testTags, true);
+clusterManager.add(sn1);
+clusterManager.add(sn2);
+clusterManager.add(sn3);
+List serverNodes = clusterManager.getServerList(testTags);
+assertEquals(3, serverNodes.size());
+
+sn3.setTimestamp(System.currentTimeMillis() - 60 * 1000L);
+clusterManager.nodesCheck();
+
+Map> tagToNodes = clusterManager.getTagToNodes();
+List serverList = clusterManager.getServerList(testTags);
+Assertions.assertEquals(2, 
tagToNodes.get(testTags.iterator().next()).size());
+Assertions.assertEquals(2, serverList.size());
+  }
+
   @Test
   public void updateExcludeNodesTest() throws Exception {
 String excludeNodesFolder = (new 
File(ClassLoader.getSystemResource("empty").getFile())).getParent();



[incubator-uniffle] 17/17: [Improvement] Modify configuration template (#209)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 166f3f8c7c5f14eb75daca843f992e908bd3c938
Author: roryqi 
AuthorDate: Fri Jul 1 11:49:34 2022 +0800

[Improvement] Modify configuration template (#209)

### What changes were proposed in this pull request?
I modify the file `conf/server.conf` and `conf/coordinator.conf`. Some 
configurations are not recommended. I modify them

### Why are the changes needed?
Give users a better configuration template

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No need.
---
 conf/coordinator.conf |  2 +-
 conf/server.conf  | 16 
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/conf/coordinator.conf b/conf/coordinator.conf
index 294f14e..c66e302 100644
--- a/conf/coordinator.conf
+++ b/conf/coordinator.conf
@@ -21,4 +21,4 @@ rss.jetty.http.port 19998
 rss.coordinator.server.heartbeat.timeout 3
 rss.coordinator.app.expired 6
 rss.coordinator.shuffle.nodes.max 13
-rss.coordinator.exclude.nodes.file.path /xxx
+rss.coordinator.exclude.nodes.file.path file:///xxx
diff --git a/conf/server.conf b/conf/server.conf
index 3c347e1..6ab6571 100644
--- a/conf/server.conf
+++ b/conf/server.conf
@@ -19,18 +19,10 @@
 rss.rpc.server.port 1
 rss.jetty.http.port 19998
 rss.storage.basePath /xxx,/xxx
-rss.storage.type LOCALFILE_AND_HDFS
+rss.storage.type MEMORY_LOCALFILE_HDFS
 rss.coordinator.quorum xxx:1,xxx:1
 rss.server.buffer.capacity 40gb
-rss.server.buffer.spill.threshold 22gb
-rss.server.partition.buffer.size 150mb
 rss.server.read.buffer.capacity 20gb
-rss.server.flush.thread.alive 50
-rss.server.flush.threadPool.size 100
-
-# multistorage config
-rss.server.multistorage.enable true
-rss.server.uploader.enable true
-rss.server.uploader.base.path hdfs://xxx
-rss.server.uploader.thread.number 32
-rss.server.disk.capacity 1011550697553
+rss.server.flush.thread.alive 5
+rss.server.flush.threadPool.size 10
+rss.server.disk.capacity 1t



[incubator-uniffle] 04/17: [Doc] Update readme with features like multiple remote storage support etc (#191)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 11a8594e868db3aaf55af9baa1903e8cbd17413e
Author: Colin 
AuthorDate: Wed Jun 22 16:38:27 2022 +0800

[Doc] Update readme with features like multiple remote storage support etc 
(#191)

What changes were proposed in this pull request?
Update Readme for latest features, eg, multiple remote storage support, 
dynamic client conf etc.

Why are the changes needed?
Doc should be updated

Does this PR introduce any user-facing change?
No

How was this patch tested?
No need
---
 README.md | 46 ++
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e134f0f..50903ce 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Coordinator will collect status of shuffle server and do the 
assignment for the
 
 Shuffle server will receive the shuffle data, merge them and write to storage.
 
-Depend on different situation, Firestorm supports Memory & Local, Memory & 
Remote Storage(eg, HDFS), Local only, Remote Storage only.
+Depend on different situation, Firestorm supports Memory & Local, Memory & 
Remote Storage(eg, HDFS), Memory & Local & Remote Storage(recommendation for 
production environment).
 
 ## Shuffle Process with Firestorm
 
@@ -74,9 +74,25 @@ rss-xxx.tgz will be generated for deployment
  rss.coordinator.server.heartbeat.timeout 3
  rss.coordinator.app.expired 6
  rss.coordinator.shuffle.nodes.max 5
- rss.coordinator.exclude.nodes.file.path RSS_HOME/conf/exclude_nodes
-   ```
-4. start Coordinator
+ # enable dynamicClientConf, and coordinator will be responsible for most 
of client conf
+ rss.coordinator.dynamicClientConf.enabled true
+ # config the path of client conf
+ rss.coordinator.dynamicClientConf.path /conf/dynamic_client.conf
+ # config the path of excluded shuffle server
+ rss.coordinator.exclude.nodes.file.path /conf/exclude_nodes
+   ```
+4. update /conf/dynamic_client.conf, rss client will get default 
conf from coordinator eg,
+   ```
+# MEMORY_LOCALFILE_HDFS is recommandation for production environment
+rss.storage.type MEMORY_LOCALFILE_HDFS
+# multiple remote storages are supported, and client will get assignment 
from coordinator
+rss.coordinator.remote.storage.path 
hdfs://cluster1/path,hdfs://cluster2/path
+rss.writer.require.memory.retryMax 1200
+rss.client.retry.max 100
+rss.writer.send.check.timeout 60
+rss.client.read.buffer.size 14m
+   ```
+5. start Coordinator
```
 bash RSS_HOME/bin/start-coordnator.sh
```
@@ -90,14 +106,17 @@ rss-xxx.tgz will be generated for deployment
  HADOOP_HOME=
  XMX_SIZE="80g"
```
-3. update RSS_HOME/conf/server.conf, the following demo is for memory + local 
storage only, eg,
+3. update RSS_HOME/conf/server.conf, eg,
```
  rss.rpc.server.port 1
  rss.jetty.http.port 19998
  rss.rpc.executor.size 2000
- rss.storage.type MEMORY_LOCALFILE
+ # it should be configed the same as in coordinator
+ rss.storage.type MEMORY_LOCALFILE_HDFS
  rss.coordinator.quorum :1,:1
+ # local storage path for shuffle server
  rss.storage.basePath /data1/rssdata,/data2/rssdata
+ # it's better to config thread num according to local disk num
  rss.server.flush.thread.alive 5
  rss.server.flush.threadPool.size 10
  rss.server.buffer.capacity 40g
@@ -108,6 +127,10 @@ rss-xxx.tgz will be generated for deployment
  rss.server.preAllocation.expired 12
  rss.server.commit.timeout 60
  rss.server.app.expired.withoutHeartbeat 12
+ # note: the default value of rss.server.flush.cold.storage.threshold.size 
is 64m
+ # there will be no data written to DFS if set it as 100g even 
rss.storage.type=MEMORY_LOCALFILE_HDFS
+ # please set proper value if DFS is used, eg, 64m, 128m.
+ rss.server.flush.cold.storage.threshold.size 100g
```
 4. start Shuffle Server
```
@@ -121,12 +144,11 @@ rss-xxx.tgz will be generated for deployment
 
The jar for Spark3 is located in 
/jars/client/spark3/rss-client-X-shaded.jar
 
-2. Update Spark conf to enable Firestorm, the following demo is for local 
storage only, eg,
+2. Update Spark conf to enable Firestorm, eg,
 
```
spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
spark.rss.coordinator.quorum :1,:1
-   spark.rss.storage.type MEMORY_LOCALFILE
```
 
 ### Support Spark dynamic allocation
@@ -140,17 +162,16 @@ After apply the patch and rebuild spark, add following 
configuration in spark co
   spark.dynamicAllocation.enabled true
   ```
 
-## Deploy MapReduce Client
+### Deploy MapReduce Client
 
 1. Add client jar to the classpath of

[incubator-uniffle] 01/17: [Improvement] Avoid using the default forkjoin pool by parallelStream directly (#180)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 46b62b2406a547dca6f6b933ee187047e3618202
Author: Junfan Zhang 
AuthorDate: Tue Jun 21 14:15:59 2022 +0800

[Improvement] Avoid using the default forkjoin pool by parallelStream 
directly (#180)

### What changes were proposed in this pull request?
As we know that parallelStream will use the default forkjoin pool in entire 
jvm. To avoid it, use the custom pool and allow to specify the pool size.

### Why are the changes needed?
use separate forkjoin pool to send shuffle data

### Does this PR introduce _any_ user-facing change?
Yes, introduce the configuration to control the size of forkjoinpool.
mapreduce.rss.client.data.transfer.pool.size for MapReduce
spark.rss.client.data.transfer.pool.size for Spark

### How was this patch tested?
GA passed.
---
 .../org/apache/hadoop/mapreduce/RssMRConfig.java   |  4 
 .../org/apache/hadoop/mapreduce/RssMRUtils.java|  5 -
 .../org/apache/spark/shuffle/RssSparkConfig.java   |  4 
 .../apache/spark/shuffle/RssShuffleManager.java|  5 -
 .../apache/spark/shuffle/RssShuffleManager.java| 14 ++---
 .../rss/client/factory/ShuffleClientFactory.java   |  4 ++--
 .../rss/client/impl/ShuffleWriteClientImpl.java| 24 ++
 .../tencent/rss/client/util/RssClientConfig.java   |  2 ++
 .../client/impl/ShuffleWriteClientImplTest.java|  2 +-
 .../test/java/com/tencent/rss/test/QuorumTest.java |  2 +-
 .../tencent/rss/test/ShuffleServerGrpcTest.java|  2 +-
 .../tencent/rss/test/ShuffleWithRssClientTest.java |  2 +-
 12 files changed, 50 insertions(+), 20 deletions(-)

diff --git 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java
index a191e2f..3447f09 100644
--- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java
+++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRConfig.java
@@ -52,6 +52,10 @@ public class RssMRConfig {
   RssClientConfig.RSS_DATA_REPLICA_READ_DEFAULT_VALUE;
   public static final String RSS_DATA_REPLICA_SKIP_ENABLED =
   MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_REPLICA_SKIP_ENABLED;
+  public static final String RSS_DATA_TRANSFER_POOL_SIZE =
+  MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_TRANSFER_POOL_SIZE;
+  public static final int RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE =
+  RssClientConfig.RSS_DATA_TRANFER_POOL_SIZE_DEFAULT_VALUE;
   public static final String RSS_CLIENT_SEND_THREAD_NUM =
   MR_RSS_CONFIG_PREFIX + RssClientConfig.RSS_CLIENT_SEND_THREAD_NUM;
   public static final int RSS_CLIENT_DEFAULT_SEND_THREAD_NUM =
diff --git 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java
index 1d8b4d6..16613e1 100644
--- a/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java
+++ b/client-mr/src/main/java/org/apache/hadoop/mapreduce/RssMRUtils.java
@@ -90,10 +90,13 @@ public class RssMRUtils {
 RssMRConfig.RSS_DATA_REPLICA_DEFAULT_VALUE);
 boolean replicaSkipEnabled = 
jobConf.getBoolean(RssMRConfig.RSS_DATA_REPLICA_SKIP_ENABLED,
 RssMRConfig.RSS_DATA_REPLICA_SKIP_ENABLED_DEFAULT_VALUE);
+int dataTransferPoolSize = 
jobConf.getInt(RssMRConfig.RSS_DATA_TRANSFER_POOL_SIZE,
+RssMRConfig.RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE);
 ShuffleWriteClient client = ShuffleClientFactory
 .getInstance()
 .createShuffleWriteClient(clientType, retryMax, retryIntervalMax,
-heartBeatThreadNum, replica, replicaWrite, replicaRead, 
replicaSkipEnabled);
+heartBeatThreadNum, replica, replicaWrite, replicaRead, 
replicaSkipEnabled,
+dataTransferPoolSize);
 return client;
   }
 
diff --git 
a/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java
 
b/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java
index 9720ff0..8d5dda9 100644
--- 
a/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java
+++ 
b/client-spark/common/src/main/java/org/apache/spark/shuffle/RssSparkConfig.java
@@ -106,6 +106,10 @@ public class RssSparkConfig {
   public static final int RSS_DATA_REPLICA_READ_DEFAULT_VALUE = 
RssClientConfig.RSS_DATA_REPLICA_READ_DEFAULT_VALUE;
   public static final String RSS_DATA_REPLICA_SKIP_ENABLED =
   SPARK_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_REPLICA_SKIP_ENABLED;
+  public static final String RSS_DATA_TRANSFER_POOL_SIZE =
+  SPARK_RSS_CONFIG_PREFIX + RssClientConfig.RSS_DATA_TRANSFER_POOL_SIZE;
+  public static final int RSS_DATA_TRANSFER_POOL_SIZE_DEFAULT_VALUE =
+  RssClientConfig.RSS_DATA_TRANFER_POOL_SIZE_DEFAULT_VALUE

[incubator-uniffle] 02/17: [Bugfix] Fix spark2 executor stop NPE problem (#187)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 7fa8b52e5739a0c2ded7f2eca84b086713765418
Author: roryqi 
AuthorDate: Wed Jun 22 14:30:15 2022 +0800

[Bugfix] Fix spark2 executor stop NPE problem (#187)

backport 0.5.0

### What changes were proposed in this pull request?
We need to judge heartbeatExecutorService whether is null when we will stop 
it.

### Why are the changes needed?
#177 pr introduce this problem, when we run Spark applications on our 
cluster, the executor will throw NPE when method `stop` is called.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
index 5d11c39..8a2c385 100644
--- 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
+++ 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
@@ -373,7 +373,9 @@ public class RssShuffleManager implements ShuffleManager {
 
   @Override
   public void stop() {
-heartBeatScheduledExecutorService.shutdownNow();
+if (heartBeatScheduledExecutorService != null) {
+  heartBeatScheduledExecutorService.shutdownNow();
+}
 threadPoolExecutor.shutdownNow();
 shuffleWriteClient.close();
   }



[incubator-uniffle] 14/17: [Improvement] Close coordinatorClients when DelegationRssShuffleManager stops (#205)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 15a6ea65ede6a2bc07824855801573a5d0cad512
Author: Zhen Wang <643348...@qq.com>
AuthorDate: Thu Jun 30 11:34:40 2022 +0800

[Improvement] Close coordinatorClients when DelegationRssShuffleManager 
stops (#205)

### What changes were proposed in this pull request?
Close coordinatorClients when DelegationRssShuffleManager stops.

### Why are the changes needed?
The coordinatorClients in DelegationRssShuffleManager are never closed.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No
---
 .../main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java  | 1 +
 .../main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java  | 1 +
 2 files changed, 2 insertions(+)

diff --git 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
index e0a30e7..03320c0 100644
--- 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
+++ 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
@@ -173,6 +173,7 @@ public class DelegationRssShuffleManager implements 
ShuffleManager {
   @Override
   public void stop() {
 delegate.stop();
+coordinatorClients.forEach(CoordinatorClient::close);
   }
 
   @Override
diff --git 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
index 4ed6cce..32d58d2 100644
--- 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
+++ 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/DelegationRssShuffleManager.java
@@ -248,6 +248,7 @@ public class DelegationRssShuffleManager implements 
ShuffleManager {
   @Override
   public void stop() {
 delegate.stop();
+coordinatorClients.forEach(CoordinatorClient::close);
   }
 
   @Override



[incubator-uniffle] branch master created (now 166f3f8)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 166f3f8  [Improvement] Modify configuration template (#209)

This branch includes the following new commits:

 new 46b62b2  [Improvement] Avoid using the default forkjoin pool by 
parallelStream directly (#180)
 new 7fa8b52  [Bugfix] Fix spark2 executor stop NPE problem (#187)
 new 924dac7  [Bugfix] Fix spark2 executor stop NPE problem (#186)
 new 11a8594  [Doc] Update readme with features like multiple remote 
storage support etc (#191)
 new 8d8e6bf  upgrade to 0.6.0-snapshot (#190)
 new cf731f2  [Bugfix] Fix MR don't have remote storage information when we 
use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195)
 new d92208d  [Minor] Remove serverNode from tags structure when heartbeart 
timeout (#193)
 new 6bdf49e  [Improvement] Check ADAPTIVE_EXECUTION_ENABLED in 
RssShuffleManager (#197)
 new a253b1f  [Improvement] Add dynamic allocation patch for Spark 3.2 
(#199)
 new 8b5f363  [MINOR] Close clusterManager resources (#202)
 new 392c881  Support build_distribution.sh to specify different mvn build 
options for Spark2 and Spark3 (#203)
 new 2c1c554  [Improvement] Move detailed client configuration to 
individual doc (#201)
 new 6937631  [Improvement] Add RSS_IP environment variable support for K8S 
(#204)
 new 15a6ea6  [Improvement] Close coordinatorClients when 
DelegationRssShuffleManager stops (#205)
 new ba47aa0  [Minor] Make clearResourceThread and processEventThread 
daemon (#207)
 new 5ec04b8  Support using remote fs path to specify the 
excludeNodesFilePath (#200)
 new 166f3f8  [Improvement] Modify configuration template (#209)

The 17 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[incubator-uniffle] 11/17: Support build_distribution.sh to specify different mvn build options for Spark2 and Spark3 (#203)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 392c88129f2706043ebb87cc89e9e2cde5733647
Author: cxzl25 
AuthorDate: Tue Jun 28 10:09:01 2022 +0800

Support build_distribution.sh to specify different mvn build options for 
Spark2 and Spark3 (#203)

What changes were proposed in this pull request?
Add --spark2-mvn, --spark3-mvn parameters in build_distribution.sh to 
support compiling different profiles, we can pass in different maven 
parameters, such as profile, spark version.
Add --help parameters in build_distribution.sh, fix typo.
gitignore ignores the tar package generated by build.
README added how to use build_distribution.sh.
Why are the changes needed?
If we use such a command to build, Spark2 will also use the Spark3 version 
to compile, so we'd better distinguish the build options of different versions.

./build_distribution.sh -Pspark3.2
Does this PR introduce any user-facing change?
No

How was this patch tested?
local test
---
 .gitignore|  1 +
 README.md | 16 
 build_distribution.sh | 53 +++
 pom.xml   |  4 ++--
 4 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5c39d59..b6164b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,5 @@ reports/
 metastore_db/
 derby.log
 dependency-reduced-pom.xml
+rss-*.tgz
 
diff --git a/README.md b/README.md
index 9ad8299..51a1ed0 100644
--- a/README.md
+++ b/README.md
@@ -50,10 +50,26 @@ To build it, run:
 
 mvn -DskipTests clean package
 
+Build against profile Spark2(2.4.6)
+
+mvn -DskipTests clean package -Pspark2
+
+Build against profile Spark3(3.1.2)
+
+mvn -DskipTests clean package -Pspark3
+
+Build against Spark 3.2.x
+
+mvn -DskipTests clean package -Pspark3.2
+
 To package the Firestorm, run:
 
 ./build_distribution.sh
 
+Package against Spark 3.2.x, run:
+
+./build_distribution.sh --spark3-profile 'spark3.2'
+
 rss-xxx.tgz will be generated for deployment
 
 ## Deploy
diff --git a/build_distribution.sh b/build_distribution.sh
index baf50e4..214a2ed 100755
--- a/build_distribution.sh
+++ b/build_distribution.sh
@@ -32,12 +32,57 @@ RSS_HOME="$(
 
 function exit_with_usage() {
   set +x
-  echo "$0 - tool for making binary distributions of Rmote Shuffle Service"
+  echo "./build_distribution.sh - Tool for making binary distributions of 
Remote Shuffle Service"
   echo ""
-  echo "usage:"
+  echo "Usage:"
+  echo 
"+--+"
+  echo "| ./build_distribution.sh [--spark2-profile ] 
[--spark2-mvn ] |"
+  echo "| [--spark3-profile ] 
[--spark3-mvn ] |"
+  echo "| 
|"
+  echo 
"+--+"
   exit 1
 }
 
+SPARK2_PROFILE_ID="spark2"
+SPARK2_MVN_OPTS=""
+SPARK3_PROFILE_ID="spark3"
+SPARK3_MVN_OPTS=""
+while (( "$#" )); do
+  case $1 in
+--spark2-profile)
+  SPARK2_PROFILE_ID="$2"
+  shift
+  ;;
+--spark2-mvn)
+  SPARK2_MVN_OPTS=$2
+  shift
+  ;;
+--spark3-profile)
+  SPARK3_PROFILE_ID="$2"
+  shift
+  ;;
+--spark3-mvn)
+  SPARK3_MVN_OPTS=$2
+  shift
+  ;;
+--help)
+  exit_with_usage
+  ;;
+--*)
+  echo "Error: $1 is not supported"
+  exit_with_usage
+  ;;
+-*)
+  break
+  ;;
+*)
+  echo "Error: $1 is not supported"
+  exit_with_usage
+  ;;
+  esac
+  shift
+done
+
 cd $RSS_HOME
 
 if [ -z "$JAVA_HOME" ]; then
@@ -99,7 +144,7 @@ cp "${RSS_HOME}"/coordinator/target/jars/* 
${COORDINATOR_JAR_DIR}
 CLIENT_JAR_DIR="${DISTDIR}/jars/client"
 mkdir -p $CLIENT_JAR_DIR
 
-BUILD_COMMAND_SPARK2=("$MVN" clean package -Pspark2 -pl client-spark/spark2 
-DskipTests -am $@)
+BUILD_COMMAND_SPARK2=("$MVN" clean package -P$SPARK2_PROFILE_ID -pl 
client-spark/spark2 -DskipTests -am $@ $SPARK2_MVN_OPTS)
 
 # Actually build the jar
 echo -e "\nBuilding with..."
@@ -114,7 +159,7 @@ 
SPARK_CLIENT2_JAR="${RSS_HOME}/client-spark/spark2/target/shaded/rss-client-spar
 echo "copy $SPARK_CLIENT2_JAR to ${SPARK_CLIENT2_JAR_DIR}"
 cp $SPARK_CLIENT2_JAR ${SPARK_CLIENT2_JAR_DIR}
 
-BUILD_COMMAND_SPARK3=("$MVN" clean package -Pspark3 -pl client-spark/spark3 
-DskipTests -am $@)
+BUILD_COMMAND

[incubator-uniffle] 09/17: [Improvement] Add dynamic allocation patch for Spark 3.2 (#199)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit a253b1fed2e947e397b45b1db8f56d856eabc9fc
Author: roryqi 
AuthorDate: Mon Jun 27 10:07:13 2022 +0800

[Improvement] Add dynamic allocation patch for Spark 3.2 (#199)

### What changes were proposed in this pull request?
Add the dynamic allocation patch for Spark 3.2, solve issue #106

### Why are the changes needed?
If we don't have this patch, users can't use dynamic allocation in Spark 
3.2.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 README.md  |  2 +-
 .../spark-3.2.1_dynamic_allocation_support.patch   | 92 ++
 2 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0fb65e5..9ad8299 100644
--- a/README.md
+++ b/README.md
@@ -155,7 +155,7 @@ rss-xxx.tgz will be generated for deployment
 ### Support Spark dynamic allocation
 
 To support spark dynamic allocation with Firestorm, spark code should be 
updated.
-There are 2 patches for spark-2.4.6 and spark-3.1.2 in spark-patches folder 
for reference.
+There are 3 patches for spark (2.4.6/3.1.2/3.2.1) in spark-patches folder for 
reference.
 
 After apply the patch and rebuild spark, add following configuration in spark 
conf to enable dynamic allocation:
   ```
diff --git a/spark-patches/spark-3.2.1_dynamic_allocation_support.patch 
b/spark-patches/spark-3.2.1_dynamic_allocation_support.patch
new file mode 100644
index 000..1e195df
--- /dev/null
+++ b/spark-patches/spark-3.2.1_dynamic_allocation_support.patch
@@ -0,0 +1,92 @@
+diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala 
b/core/src/main/scala/org/apache/spark/Dependency.scala
+index 1b4e7ba5106..95818ff72ca 100644
+--- a/core/src/main/scala/org/apache/spark/Dependency.scala
 b/core/src/main/scala/org/apache/spark/Dependency.scala
+@@ -174,8 +174,10 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: 
ClassTag](
+   !rdd.isBarrier()
+   }
+ 
+-  _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
+-  _rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId)
++  if (!_rdd.context.getConf.isRssEnable()) {
++_rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
++_rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId)
++  }
+ }
+ 
+ 
+diff --git 
a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala 
b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+index c4b619300b5..821a01985d9 100644
+--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
 b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+@@ -207,7 +207,9 @@ private[spark] class ExecutorAllocationManager(
+   // If dynamic allocation shuffle tracking or worker decommissioning 
along with
+   // storage shuffle decommissioning is enabled we have *experimental* 
support for
+   // decommissioning without a shuffle service.
+-  if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED) ||
++  if (conf.isRssEnable()) {
++logInfo("Dynamic allocation will use remote shuffle service")
++  } else if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED) ||
+   (decommissionEnabled &&
+ conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED))) {
+ logWarning("Dynamic allocation without a shuffle service is an 
experimental feature.")
+diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala 
b/core/src/main/scala/org/apache/spark/SparkConf.scala
+index 5f37a1abb19..af4bee1e1bb 100644
+--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
 b/core/src/main/scala/org/apache/spark/SparkConf.scala
+@@ -580,6 +580,10 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable 
with Logging with Seria
+ Utils.redact(this, getAll).sorted.map { case (k, v) => k + "=" + v 
}.mkString("\n")
+   }
+ 
++  /**
++   * Return true if remote shuffle service is enabled.
++   */
++  def isRssEnable(): Boolean = get("spark.shuffle.manager", 
"sort").contains("RssShuffleManager")
+ }
+ 
+ private[spark] object SparkConf extends Logging {
+diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala 
b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+index a82d261d545..72e54940ca2 100644
+--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
 b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+@@ -2231,7 +2231,8 @@ private[spark] class DAGScheduler(
+ // if the cluster manager explicitly tells us that the entire worker was 
lost, then
+ // we 

[incubator-uniffle] 08/17: [Improvement] Check ADAPTIVE_EXECUTION_ENABLED in RssShuffleManager (#197)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 6bdf49e1a68131545a8385123da558be287a196f
Author: xunxunmimi5577 <52647492+xunxunmimi5...@users.noreply.github.com>
AuthorDate: Fri Jun 24 02:12:40 2022 +0800

[Improvement] Check ADAPTIVE_EXECUTION_ENABLED in RssShuffleManager (#197)

### What changes were proposed in this pull request?
 1. Add checking of spark.sql.adaptive.enabled=false in RssShuffleManager's 
constructor for spark2.
 2. Add a description of this parameter in the Deploy Spark Client section 
of the readme.

### Why are the changes needed?
 When use firestorm+spark2+spark.sql.adaptive.enabled=true,the result is 
wrong,but we didn't get any hints.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 README.md  | 1 +
 .../src/main/java/org/apache/spark/shuffle/RssShuffleManager.java  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 50903ce..0fb65e5 100644
--- a/README.md
+++ b/README.md
@@ -149,6 +149,7 @@ rss-xxx.tgz will be generated for deployment
```
spark.shuffle.manager org.apache.spark.shuffle.RssShuffleManager
spark.rss.coordinator.quorum :1,:1
+   # Note: For Spark2, spark.sql.adaptive.enabled should be false because 
Spark2 doesn't support AQE.
```
 
 ### Support Spark dynamic allocation
diff --git 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
index 8a2c385..28f1a8d 100644
--- 
a/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
+++ 
b/client-spark/spark2/src/main/java/org/apache/spark/shuffle/RssShuffleManager.java
@@ -136,6 +136,9 @@ public class RssShuffleManager implements ShuffleManager {
   };
 
   public RssShuffleManager(SparkConf sparkConf, boolean isDriver) {
+if (sparkConf.getBoolean("spark.sql.adaptive.enabled", false)) {
+  throw new IllegalArgumentException("Spark2 doesn't support AQE, 
spark.sql.adaptive.enabled should be false.");
+}
 this.sparkConf = sparkConf;
 
 // set & check replica config



[incubator-uniffle] 03/17: [Bugfix] Fix spark2 executor stop NPE problem (#186)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 924dac7f093d0b3f581e521fc71bc30ea0963907
Author: roryqi 
AuthorDate: Wed Jun 22 14:34:06 2022 +0800

[Bugfix] Fix spark2 executor stop NPE problem (#186)

### What changes were proposed in this pull request?
We need to judge heartbeatExecutorService whether is null when we will stop 
it.

### Why are the changes needed?
#177 pr introduce this problem, when we run Spark applications on our 
cluster, the executor will throw NPE when method `stop` is called.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test



[incubator-uniffle] 05/17: upgrade to 0.6.0-snapshot (#190)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 8d8e6bf81ebf0bbb669642a46d13581927f9cec9
Author: roryqi 
AuthorDate: Wed Jun 22 17:36:33 2022 +0800

upgrade to 0.6.0-snapshot (#190)

### What changes were proposed in this pull request?
upgrade version number

### Why are the changes needed?
upgrade to 0.6.0-snapshot

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
no
---
 client-mr/pom.xml | 4 ++--
 client-spark/common/pom.xml   | 4 ++--
 client-spark/spark2/pom.xml   | 4 ++--
 client-spark/spark3/pom.xml   | 4 ++--
 client/pom.xml| 4 ++--
 common/pom.xml| 2 +-
 coordinator/pom.xml   | 2 +-
 integration-test/common/pom.xml   | 4 ++--
 integration-test/mr/pom.xml   | 4 ++--
 integration-test/spark-common/pom.xml | 4 ++--
 integration-test/spark2/pom.xml   | 4 ++--
 integration-test/spark3/pom.xml   | 4 ++--
 internal-client/pom.xml   | 4 ++--
 pom.xml   | 2 +-
 proto/pom.xml | 2 +-
 server/pom.xml| 2 +-
 storage/pom.xml   | 2 +-
 17 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/client-mr/pom.xml b/client-mr/pom.xml
index c15ffba..650a771 100644
--- a/client-mr/pom.xml
+++ b/client-mr/pom.xml
@@ -23,13 +23,13 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.6.0-snapshot
 ../pom.xml
 
 
 com.tencent.rss
 rss-client-mr
-0.5.0-snapshot
+0.6.0-snapshot
 jar
 
 
diff --git a/client-spark/common/pom.xml b/client-spark/common/pom.xml
index 61c4b1f..e79a671 100644
--- a/client-spark/common/pom.xml
+++ b/client-spark/common/pom.xml
@@ -25,12 +25,12 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.6.0-snapshot
 ../../pom.xml
 
 
 rss-client-spark-common
-0.5.0-snapshot
+0.6.0-snapshot
 jar
 
 
diff --git a/client-spark/spark2/pom.xml b/client-spark/spark2/pom.xml
index 41a4432..54434d5 100644
--- a/client-spark/spark2/pom.xml
+++ b/client-spark/spark2/pom.xml
@@ -24,13 +24,13 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.6.0-snapshot
 ../../pom.xml
   
 
   com.tencent.rss
   rss-client-spark2
-  0.5.0-snapshot
+  0.6.0-snapshot
   jar
 
   
diff --git a/client-spark/spark3/pom.xml b/client-spark/spark3/pom.xml
index 5674613..8cd091e 100644
--- a/client-spark/spark3/pom.xml
+++ b/client-spark/spark3/pom.xml
@@ -24,13 +24,13 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.6.0-snapshot
 ../../pom.xml
 
 
 com.tencent.rss
 rss-client-spark3
-0.5.0-snapshot
+0.6.0-snapshot
 jar
 
 
diff --git a/client/pom.xml b/client/pom.xml
index e6134ce..1b4e3d7 100644
--- a/client/pom.xml
+++ b/client/pom.xml
@@ -24,12 +24,12 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.6.0-snapshot
   
 
   com.tencent.rss
   rss-client
-  0.5.0-snapshot
+  0.6.0-snapshot
   jar
 
   
diff --git a/common/pom.xml b/common/pom.xml
index b4b65f8..9d6b2df 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -22,7 +22,7 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.6.0-snapshot
 ../pom.xml
   
 
diff --git a/coordinator/pom.xml b/coordinator/pom.xml
index e860a50..28b5b5c 100644
--- a/coordinator/pom.xml
+++ b/coordinator/pom.xml
@@ -24,7 +24,7 @@
   
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.6.0-snapshot
 ../pom.xml
   
 
diff --git a/integration-test/common/pom.xml b/integration-test/common/pom.xml
index 2a759a4..179ecb8 100644
--- a/integration-test/common/pom.xml
+++ b/integration-test/common/pom.xml
@@ -24,13 +24,13 @@
 
 com.tencent.rss
 rss-main
-0.5.0-snapshot
+0.6.0-snapshot
 ../../pom.xml
 
 
 com.tencent.rss
 rss-integration-common-test
-0.5.0-snapshot
+0.6.0-snapshot
 jar
 
 
diff --git a/integration-test/mr/pom.xml b/integration-test/mr/pom.xml
index 489ffd5..6ae8a17 100644
--- a/integration-test/mr/pom.xml
+++ b/integration-test/mr/pom.xml
@@ -22,14 +22,14 @@
 
 rss-main
 com.tencent.rss
-0.5.0-snapshot
+0.6.0-snapshot
 ../../pom.xml
 
 4.0.0
 
 com.tencent.rss
 rss-integration-mr-test
-0.5.0-snapshot
+0.6.0-snapshot
 jar
 
 
diff --git a/integration-test/spark-common/pom.xml 
b/integration-test/spark-common/pom.xml
index 284ca2b..8f642a5 100644
--- a/integration-test/spark-common/pom.xml
+++ b/integration-test/spark-common/pom.xml
@@ -23,14 +23,14 @@
   
 rss-main
 com.tencent.rss
-0.5.0

[incubator-uniffle] 06/17: [Bugfix] Fix MR don't have remote storage information when we use dynamic conf and MEMORY_LOCALE_HDFS storageType (#195)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit cf731f24ef3f10bb24c57475131c04355c9d7e64
Author: roryqi 
AuthorDate: Thu Jun 23 09:49:16 2022 +0800

[Bugfix] Fix MR don't have remote storage information when we use dynamic 
conf and MEMORY_LOCALE_HDFS storageType (#195)

### What changes were proposed in this pull request?
We should aquire the storageType from extraConf.
### Why are the changes needed?
If we don't have this patch, MR don't work when we use dynamic conf and 
MEMORY_LOCALE_HDFS storageType.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual test
---
 .../main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
index 7511104..976b03c 100644
--- 
a/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
+++ 
b/client-mr/src/main/java/org/apache/hadoop/mapreduce/v2/app/RssMRAppMaster.java
@@ -180,7 +180,7 @@ public class RssMRAppMaster extends MRAppMaster {
 RssMRUtils.applyDynamicClientConf(extraConf, clusterClientConf);
   }
 
-  String storageType = conf.get(RssMRConfig.RSS_STORAGE_TYPE);
+  String storageType = RssMRUtils.getString(extraConf, conf, 
RssMRConfig.RSS_STORAGE_TYPE);
   RemoteStorageInfo defaultRemoteStorage =
   new RemoteStorageInfo(conf.get(RssMRConfig.RSS_REMOTE_STORAGE_PATH, 
""));
   RemoteStorageInfo remoteStorage = ClientUtils.fetchRemoteStorage(



[incubator-uniffle] branch branch-0.1.0 created (now 36343ec)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch branch-0.1.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 36343ec  Upgrade the version to 0.1.0

No new revisions were added by this update.



[incubator-uniffle] 01/02: [Feature] [0.2] Support Spark 3.2 (#88)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 7f3c44a9a051310e991034162ef53e2835490e71
Author: roryqi 
AuthorDate: Tue Mar 1 20:33:34 2022 +0800

[Feature] [0.2] Support Spark 3.2 (#88)

### What changes were proposed in this pull request?
Support Spark 3.2

### Why are the changes needed?
We need support more Spark Versions

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
GA passed (include profiles spark2,spark3,spark3.0,spark3.1,spark3.2)

Co-authored-by: roryqi 
---
 README.md  |   2 +-
 .../spark/shuffle/writer/WriteBufferManager.java   |   3 +-
 .../spark/shuffle/writer/RssShuffleWriter.java |   5 +
 .../tencent/rss/test/SparkIntegrationTestBase.java |   4 +
 integration-test/spark3/pom.xml|   2 +
 pom.xml| 106 -
 6 files changed, 119 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index a785f47..ac3e92a 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ The shuffle data is stored with index file and data file. 
Data file has all bloc
 ![Rss Shuffle_Write](docs/asset/rss_data_format.png)
 
 ## Supported Spark Version
-Current support Spark 2.3.x, Spark 2.4.x, Spark3.0.x, Spark 3.1.x
+Current support Spark 2.3.x, Spark 2.4.x, Spark3.0.x, Spark 3.1.x, Spark 3.2.x
 
 Note: To support dynamic allocation, the patch(which is included in 
client-spark/patch folder) should be applied to Spark
 
diff --git 
a/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java
 
b/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java
index 1b26f0b..91cc6a7 100644
--- 
a/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java
+++ 
b/client-spark/common/src/main/java/org/apache/spark/shuffle/writer/WriteBufferManager.java
@@ -28,6 +28,7 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Maps;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.SerializationStream;
 import org.apache.spark.serializer.Serializer;
@@ -86,7 +87,7 @@ public class WriteBufferManager extends MemoryConsumer {
   Map> partitionToServers,
   TaskMemoryManager taskMemoryManager,
   ShuffleWriteMetrics shuffleWriteMetrics) {
-super(taskMemoryManager);
+super(taskMemoryManager, taskMemoryManager.pageSizeBytes(), 
MemoryMode.ON_HEAP);
 this.bufferSize = bufferManagerOptions.getBufferSize();
 this.spillSize = bufferManagerOptions.getBufferSpillThreshold();
 this.instance = serializer.newInstance();
diff --git 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java
 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java
index 2a4beb6..a7e4480 100644
--- 
a/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java
+++ 
b/client-spark/spark3/src/main/java/org/apache/spark/shuffle/writer/RssShuffleWriter.java
@@ -171,6 +171,11 @@ public class RssShuffleWriter extends 
ShuffleWriter {
 + bufferManager.getManagerCostInfo());
   }
 
+  // only push-based shuffle use this interface, but rss won't be used when 
push-based shuffle is enabled.
+  public long[] getPartitionLengths() {
+return new long[0];
+  }
+
   private void processShuffleBlockInfos(List 
shuffleBlockInfoList, Set blockIds) {
 if (shuffleBlockInfoList != null && !shuffleBlockInfoList.isEmpty()) {
   shuffleBlockInfoList.forEach(sbi -> {
diff --git 
a/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java
 
b/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java
index 06789d2..1e15ba6 100644
--- 
a/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java
+++ 
b/integration-test/spark-common/src/test/java/com/tencent/rss/test/SparkIntegrationTestBase.java
@@ -21,6 +21,9 @@ package com.tencent.rss.test;
 import static org.junit.Assert.assertEquals;
 
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.util.concurrent.Uninterruptibles;
 import org.apache.spark.SparkConf;
 import org.apache.spark.shuffle.RssClientConfig;
 import org.apache.spark.sql.SparkSession;
@@ -50,6 +53,7 @@ public abstract class SparkIntegrationTestBase extends 
IntegrationTestBase {
 Map resultWithoutRss = runSparkApp(sparkConf, fileName);
 long durationWithoutRss = System.currentT

[incubator-uniffle] branch branch-0.2.0 created (now 75b5376)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch branch-0.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git


  at 75b5376  [Bugfix] Fix uncorrect index file (#92)

This branch includes the following new commits:

 new 7f3c44a  [Feature] [0.2] Support Spark 3.2 (#88)
 new 75b5376  [Bugfix] Fix uncorrect index file (#92)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.




[incubator-uniffle] 02/02: [Bugfix] Fix uncorrect index file (#92)

2022-06-30 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-uniffle.git

commit 75b537661f1a29291f199974c6e7fa1e39197d72
Author: roryqi 
AuthorDate: Tue Mar 8 16:31:33 2022 +0800

[Bugfix] Fix uncorrect index file (#92)

### What changes were proposed in this pull request?
Modify the method that calculate the offset in the index file.

### Why are the changes needed?
If we don't have this patch, we run 10TB tpcds, query24a will fail.
https://user-images.githubusercontent.com/8159038/157178756-d8a39b3f-0ea6-4864-ac68-ee382a88bb0f.png";>
When we write many data to dataOutputStream, dataOutputStream.size() won't 
increase again. dataOutputStream.size() will
always be Integer.MAX_VALUE.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new uts.

Co-authored-by: roryqi 
---
 .../rss/storage/handler/impl/LocalFileWriter.java   |  6 ++
 .../rss/storage/handler/impl/LocalFileHandlerTest.java  | 17 +
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git 
a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
 
b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
index 10185a4..609db7e 100644
--- 
a/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
+++ 
b/storage/src/main/java/com/tencent/rss/storage/handler/impl/LocalFileWriter.java
@@ -30,21 +30,19 @@ public class LocalFileWriter implements Closeable {
 
   private DataOutputStream dataOutputStream;
   private FileOutputStream fileOutputStream;
-  private long initSize;
   private long nextOffset;
 
   public LocalFileWriter(File file) throws IOException {
 fileOutputStream = new FileOutputStream(file, true);
 // init fsDataOutputStream
 dataOutputStream = new DataOutputStream(fileOutputStream);
-initSize = file.length();
-nextOffset = initSize;
+nextOffset = file.length();
   }
 
   public void writeData(byte[] data) throws IOException {
 if (data != null && data.length > 0) {
   dataOutputStream.write(data);
-  nextOffset = initSize + dataOutputStream.size();
+  nextOffset = nextOffset + data.length;
 }
   }
 
diff --git 
a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
 
b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
index 969944d..ce8915b 100644
--- 
a/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
+++ 
b/storage/src/test/java/com/tencent/rss/storage/handler/impl/LocalFileHandlerTest.java
@@ -39,6 +39,7 @@ import com.tencent.rss.storage.handler.api.ServerReadHandler;
 import com.tencent.rss.storage.handler.api.ShuffleWriteHandler;
 import com.tencent.rss.storage.util.ShuffleStorageUtils;
 import java.io.File;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
@@ -53,6 +54,7 @@ public class LocalFileHandlerTest {
   @Test
   public void writeTest() throws Exception {
 File tmpDir = Files.createTempDir();
+tmpDir.deleteOnExit();
 File dataDir1 = new File(tmpDir, "data1");
 File dataDir2 = new File(tmpDir, "data2");
 String[] basePaths = new String[]{dataDir1.getAbsolutePath(),
@@ -111,6 +113,21 @@ public class LocalFileHandlerTest {
 }
   }
 
+  @Test
+  public void writeBigDataTest() throws IOException  {
+File tmpDir = Files.createTempDir();
+tmpDir.deleteOnExit();
+File writeFile = new File(tmpDir, "writetest");
+LocalFileWriter writer = new LocalFileWriter(writeFile);
+int  size = Integer.MAX_VALUE / 100;
+byte[] data = new byte[size];
+for (int i = 0; i < 200; i++) {
+  writer.writeData(data);
+}
+long totalSize = 200L * size;
+assertEquals(writer.nextOffset(), totalSize);
+  }
+
 
   private void writeTestData(
   ShuffleWriteHandler writeHandler,



svn commit: r46186 - /dev/incubator/livy/0.7.1-incubating-rc1/ /release/incubator/livy/0.7.1-incubating/

2021-02-18 Thread jshao
Author: jshao
Date: Fri Feb 19 01:57:45 2021
New Revision: 46186

Log:
Livy 0.7.1-incubating release

Added:
release/incubator/livy/0.7.1-incubating/
  - copied from r46185, dev/incubator/livy/0.7.1-incubating-rc1/
Removed:
dev/incubator/livy/0.7.1-incubating-rc1/



svn commit: r45798 - /dev/incubator/livy/0.7.1-incubating-rc1/

2021-02-03 Thread jshao
Author: jshao
Date: Thu Feb  4 05:00:19 2021
New Revision: 45798

Log:
Apache Livy 0.7.1-incubating-rc1

Added:
dev/incubator/livy/0.7.1-incubating-rc1/

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip   
(with props)

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc
   (with props)

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip   
(with props)

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc
   (with props)

dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip
==
Binary file - no diff available.

Propchange: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip
--
svn:mime-type = application/zip

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc
==
Binary file - no diff available.

Propchange: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.asc
--
svn:mime-type = application/pgp-signature

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512
==
--- 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512
 (added)
+++ 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-bin.zip.sha512
 Thu Feb  4 05:00:19 2021
@@ -0,0 +1,4 @@
+apache-livy-0.7.1-incubating-bin.zip: C4987855 FDCD7220 ABC0FA19 63359019
+  34B2AB6C 76BF54C3 7AF14D97 4FD0BB44
+  05D58AD3 B10C64B8 1E1C0B73 5017822E
+  2030CB57 41C232B3 4E492181 E49002A4

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip
==
Binary file - no diff available.

Propchange: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip
--
svn:mime-type = application/zip

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc
==
Binary file - no diff available.

Propchange: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.asc
--
svn:mime-type = application/pgp-signature

Added: 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512
==
--- 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512
 (added)
+++ 
dev/incubator/livy/0.7.1-incubating-rc1/apache-livy-0.7.1-incubating-src.zip.sha512
 Thu Feb  4 05:00:19 2021
@@ -0,0 +1,4 @@
+apache-livy-0.7.1-incubating-src.zip: 03E6F489 518930F5 906F793D A88A6DC0
+  F9735D87 5BCE0E2F 1818AEAA B1C0150D
+  EA9FEB69 9690938A FA6C1648 291FC90D
+  6A9AF132 D4E88C8B CFF2F327 A9CF8AB1




[incubator-livy] branch branch-0.7 updated: [BUILD] Update version for 0.7.2-incubating-SNAPSHOT

2021-02-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.7
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/branch-0.7 by this push:
 new 972b600  [BUILD] Update version for 0.7.2-incubating-SNAPSHOT
972b600 is described below

commit 972b600d72629884140aec315ea925858eb67884
Author: jerryshao 
AuthorDate: Thu Feb 4 11:00:54 2021 +0800

[BUILD] Update version for 0.7.2-incubating-SNAPSHOT
---
 api/pom.xml  | 4 ++--
 assembly/pom.xml | 4 ++--
 client-common/pom.xml| 4 ++--
 client-http/pom.xml  | 4 ++--
 core/pom.xml | 4 ++--
 core/scala-2.11/pom.xml  | 4 ++--
 coverage/pom.xml | 4 ++--
 examples/pom.xml | 4 ++--
 integration-test/pom.xml | 4 ++--
 pom.xml  | 2 +-
 python-api/pom.xml   | 4 ++--
 python-api/setup.py  | 2 +-
 repl/pom.xml | 4 ++--
 repl/scala-2.11/pom.xml  | 4 ++--
 rsc/pom.xml  | 2 +-
 scala-api/pom.xml| 4 ++--
 scala-api/scala-2.11/pom.xml | 4 ++--
 scala/pom.xml| 4 ++--
 server/pom.xml   | 4 ++--
 test-lib/pom.xml | 4 ++--
 thriftserver/client/pom.xml  | 2 +-
 thriftserver/server/pom.xml  | 2 +-
 thriftserver/session/pom.xml | 2 +-
 23 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/api/pom.xml b/api/pom.xml
index fbd8496..e160690 100644
--- a/api/pom.xml
+++ b/api/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
   
 
   org.apache.livy
   livy-api
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 36bb48c..113a704 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
 ../pom.xml
   
 
   livy-assembly
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   pom
 
   
diff --git a/client-common/pom.xml b/client-common/pom.xml
index 3897c2b..540d68d 100644
--- a/client-common/pom.xml
+++ b/client-common/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
   
 
   org.apache.livy
   livy-client-common
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
diff --git a/client-http/pom.xml b/client-http/pom.xml
index a053d8d..1625a54 100644
--- a/client-http/pom.xml
+++ b/client-http/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
   
 
   org.apache.livy
   livy-client-http
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
diff --git a/core/pom.xml b/core/pom.xml
index 2b21dec..6c76db4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -22,12 +22,12 @@
   
 org.apache.livy
 multi-scala-project-root
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
 ../scala/pom.xml
   
 
   livy-core-parent
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   pom
 
   
diff --git a/core/scala-2.11/pom.xml b/core/scala-2.11/pom.xml
index 5100e19..e703896 100644
--- a/core/scala-2.11/pom.xml
+++ b/core/scala-2.11/pom.xml
@@ -19,13 +19,13 @@
   4.0.0
   org.apache.livy
   livy-core_2.11
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
 org.apache.livy
 livy-core-parent
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
 ../pom.xml
   
 
diff --git a/coverage/pom.xml b/coverage/pom.xml
index 9c23dca..d358671 100644
--- a/coverage/pom.xml
+++ b/coverage/pom.xml
@@ -23,11 +23,11 @@
 org.apache.livy
 livy-main
 ../pom.xml
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
   
 
   livy-coverage-report
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   pom
 
   
diff --git a/examples/pom.xml b/examples/pom.xml
index 9692224..7ddc525 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -23,13 +23,13 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
 ../pom.xml
   
 
   org.apache.livy
   livy-examples
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
diff --git a/integration-test/pom.xml b/integration-test/pom.xml
index 80a9c29..a658b81 100644
--- a/integration-test/pom.xml
+++ b/integration-test/pom.xml
@@ -23,11 +23,11 @@
 org.apache.livy
 livy-main
 ../pom.xml
-0.7.1-incubating
+0.7.2-incubating-SNAPSHOT
   
 
   livy-integration-test
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   jar
 
   
diff --git a/pom.xml b/pom.xml
index 20b1a55..9eb1967 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,7 +22,7 @@
 
   org.apache.livy
   livy-main
-  0.7.1-incubating
+  0.7.2-incubating-SNAPSHOT
   pom
   Livy Project Parent POM
   Livy Project
diff --git a/python-api/pom.xml b/python-api/pom.xml
index 3d7b178..2679ab3 100644
--- a

[incubator-livy] tag v0.7.1-incubating-rc1 created (now 7c3d341)

2021-02-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to tag v0.7.1-incubating-rc1
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git.


  at 7c3d341  (commit)
No new revisions were added by this update.



[incubator-livy] branch branch-0.7 updated: [BUILD] Update version for 0.7.1-incubating

2021-02-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.7
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/branch-0.7 by this push:
 new 7c3d341  [BUILD] Update version for 0.7.1-incubating
7c3d341 is described below

commit 7c3d341926db69fb57a4978b15d4e96f06312267
Author: jerryshao 
AuthorDate: Thu Feb 4 10:31:32 2021 +0800

[BUILD] Update version for 0.7.1-incubating
---
 api/pom.xml  | 4 ++--
 assembly/pom.xml | 4 ++--
 client-common/pom.xml| 4 ++--
 client-http/pom.xml  | 4 ++--
 core/pom.xml | 4 ++--
 core/scala-2.11/pom.xml  | 4 ++--
 coverage/pom.xml | 4 ++--
 examples/pom.xml | 4 ++--
 integration-test/pom.xml | 4 ++--
 pom.xml  | 2 +-
 python-api/pom.xml   | 4 ++--
 python-api/setup.py  | 2 +-
 repl/pom.xml | 4 ++--
 repl/scala-2.11/pom.xml  | 4 ++--
 rsc/pom.xml  | 2 +-
 scala-api/pom.xml| 4 ++--
 scala-api/scala-2.11/pom.xml | 4 ++--
 scala/pom.xml| 4 ++--
 server/pom.xml   | 4 ++--
 test-lib/pom.xml | 4 ++--
 thriftserver/client/pom.xml  | 2 +-
 thriftserver/server/pom.xml  | 2 +-
 thriftserver/session/pom.xml | 2 +-
 23 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/api/pom.xml b/api/pom.xml
index 66f175c..fbd8496 100644
--- a/api/pom.xml
+++ b/api/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
   
 
   org.apache.livy
   livy-api
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
diff --git a/assembly/pom.xml b/assembly/pom.xml
index b94f0da..36bb48c 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
 ../pom.xml
   
 
   livy-assembly
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   pom
 
   
diff --git a/client-common/pom.xml b/client-common/pom.xml
index dac522c..3897c2b 100644
--- a/client-common/pom.xml
+++ b/client-common/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
   
 
   org.apache.livy
   livy-client-common
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
diff --git a/client-http/pom.xml b/client-http/pom.xml
index ad31b41..a053d8d 100644
--- a/client-http/pom.xml
+++ b/client-http/pom.xml
@@ -20,12 +20,12 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
   
 
   org.apache.livy
   livy-client-http
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
diff --git a/core/pom.xml b/core/pom.xml
index 5623220..2b21dec 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -22,12 +22,12 @@
   
 org.apache.livy
 multi-scala-project-root
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
 ../scala/pom.xml
   
 
   livy-core-parent
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   pom
 
   
diff --git a/core/scala-2.11/pom.xml b/core/scala-2.11/pom.xml
index 041f9c1..5100e19 100644
--- a/core/scala-2.11/pom.xml
+++ b/core/scala-2.11/pom.xml
@@ -19,13 +19,13 @@
   4.0.0
   org.apache.livy
   livy-core_2.11
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
 org.apache.livy
 livy-core-parent
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
 ../pom.xml
   
 
diff --git a/coverage/pom.xml b/coverage/pom.xml
index 6419bc4..9c23dca 100644
--- a/coverage/pom.xml
+++ b/coverage/pom.xml
@@ -23,11 +23,11 @@
 org.apache.livy
 livy-main
 ../pom.xml
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
   
 
   livy-coverage-report
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   pom
 
   
diff --git a/examples/pom.xml b/examples/pom.xml
index 1f4aa32..9692224 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -23,13 +23,13 @@
   
 org.apache.livy
 livy-main
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
 ../pom.xml
   
 
   org.apache.livy
   livy-examples
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
diff --git a/integration-test/pom.xml b/integration-test/pom.xml
index 9fa230b..80a9c29 100644
--- a/integration-test/pom.xml
+++ b/integration-test/pom.xml
@@ -23,11 +23,11 @@
 org.apache.livy
 livy-main
 ../pom.xml
-0.7.1-incubating-SNAPSHOT
+0.7.1-incubating
   
 
   livy-integration-test
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   jar
 
   
diff --git a/pom.xml b/pom.xml
index 938bdbf..20b1a55 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,7 +22,7 @@
 
   org.apache.livy
   livy-main
-  0.7.1-incubating-SNAPSHOT
+  0.7.1-incubating
   pom
   Livy Project Parent POM
   Livy Project
diff --git a/python-api/pom.xml b/python-api/pom.xml
index 62850c0..3d7b178 100644
--- a/python-api/pom.xml

[incubator-livy] branch branch-0.7 updated: Add html escape to session name

2021-02-03 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.7
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/branch-0.7 by this push:
 new 9f1ba47  Add html escape to session name
9f1ba47 is described below

commit 9f1ba47a2f0d8accc435b133b42c3a76aa9ac846
Author: Marco Gaido 
AuthorDate: Fri Aug 14 17:25:54 2020 -0700

Add html escape to session name

## What changes were proposed in this pull request?

The PR adds HTML escaping to session names.

## How was this patch tested?

Manual test.

Author: Marco Gaido 

Closes #302 from mgaido91/escape_html.
---
 .../org/apache/livy/server/ui/static/js/all-sessions.js| 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git 
a/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js 
b/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js
index 6e35702..d8a84a7 100644
--- 
a/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js
+++ 
b/server/src/main/resources/org/apache/livy/server/ui/static/js/all-sessions.js
@@ -15,13 +15,17 @@
  * limitations under the License.
  */
 
+function escapeHtml(unescapedText) {
+  return $("").text(unescapedText).html()
+}
+
 function loadSessionsTable(sessions) {
   $.each(sessions, function(index, session) {
 $("#interactive-sessions .sessions-table-body").append(
   "" +
 tdWrap(uiLink("session/" + session.id, session.id)) +
 tdWrap(appIdLink(session)) +
-tdWrap(session.name) +
+tdWrap(escapeHtml(session.name)) +
 tdWrap(session.owner) +
 tdWrap(session.proxyUser) +
 tdWrap(session.kind) +
@@ -38,7 +42,7 @@ function loadBatchesTable(sessions) {
   "" +
 tdWrap(session.id) +
 tdWrap(appIdLink(session)) +
-tdWrap(session.name) +
+tdWrap(escapeHtml(session.name)) +
 tdWrap(session.owner) +
 tdWrap(session.proxyUser) +
 tdWrap(session.state) +
@@ -79,4 +83,4 @@ $(document).ready(function () {
   $("#all-sessions").append('No Sessions or Batches have been created 
yet.');
 }
   });
-});
\ No newline at end of file
+});



[incubator-livy] branch master updated: [LIVY-756] Add Spark 3.0 and Scala 2.12 support

2020-07-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new 97cf2f7  [LIVY-756] Add Spark 3.0 and Scala 2.12 support
97cf2f7 is described below

commit 97cf2f75929ef6c152afc468adbead269bd0758f
Author: jerryshao 
AuthorDate: Thu Jul 2 15:44:12 2020 +0800

[LIVY-756] Add Spark 3.0 and Scala 2.12 support

## What changes were proposed in this pull request?

This PR is based tprelle 's PR #289 , and address all the left issues in 
that PR:

1. multi-scala version support in one build (Scala 2.11 and 2.12 support).
2. make SparkR work.

Also reverts most of the unnecessary changes. Besides this PR remove the 
build below 2.4 (2.2, 2.3), since Spark 2.2 and 2.3 only ships with Scala 2.11, 
hard to maintain multiple version. But user could still use 2.2 and 2.3 without 
changes.

All credits to tprelle.

## How was this patch tested?

Run UT and IT with Spark 2.4.5 and 3.0.0 locally.

Author: jerryshao 

Closes #300 from jerryshao/LIVY-756.
---
 .gitignore |  1 +
 .rat-excludes  |  1 +
 .travis.yml| 24 +++---
 README.md  |  4 +-
 assembly/assembly.xml  |  7 ++
 assembly/pom.xml   | 23 ++
 client-common/pom.xml  |  2 +-
 .../org/apache/livy/client/common/Serializer.java  |  8 +-
 {client-common => core/scala-2.12}/pom.xml | 52 ++---
 .../org/apache/livy/LivyBaseUnitTestSuite.scala|  4 +-
 coverage/pom.xml   | 35 +
 .../org/apache/livy/examples/WordCountApp.scala|  2 +-
 integration-test/pom.xml   |  2 +-
 integration-test/src/test/resources/rtest.R|  9 +--
 .../scala/org/apache/livy/test/InteractiveIT.scala |  6 +-
 .../src/test/spark2/scala/Spark2JobApiIT.scala | 26 +--
 pom.xml| 88 +-
 repl/pom.xml   |  3 +
 repl/scala-2.11/pom.xml|  1 +
 .../org/apache/livy/repl/SparkInterpreter.scala|  5 +-
 repl/{scala-2.11 => scala-2.12}/pom.xml| 11 +--
 .../org/apache/livy/repl/SparkInterpreter.scala| 17 ++---
 .../apache/livy/repl/SparkInterpreterSpec.scala| 68 +
 .../main/scala/org/apache/livy/repl/Session.scala  |  4 +-
 .../org/apache/livy/repl/SQLInterpreterSpec.scala  |  4 +-
 rsc/pom.xml|  6 +-
 .../org/apache/livy/rsc/driver/SparkEntries.java   |  7 +-
 .../org/apache/livy/rsc/rpc/KryoMessageCodec.java  |  7 --
 {repl/scala-2.11 => scala-api/scala-2.12}/pom.xml  | 17 ++---
 scala-api/src/main/resources/build.marker  |  0
 .../org/apache/livy/scalaapi/ScalaJobHandle.scala  |  8 ++
 server/pom.xml |  9 ++-
 .../org/apache/livy/server/SessionServlet.scala|  2 +-
 .../server/interactive/InteractiveSession.scala|  6 +-
 .../org/apache/livy/utils/LivySparkUtils.scala |  4 +-
 .../apache/livy/server/BaseJsonServletSpec.scala   |  3 +-
 .../apache/livy/server/SessionServletSpec.scala|  2 +-
 .../livy/server/batch/BatchServletSpec.scala   |  2 +-
 .../livy/server/batch/BatchSessionSpec.scala   |  6 +-
 .../InteractiveSessionServletSpec.scala|  3 +-
 .../interactive/InteractiveSessionSpec.scala   |  2 +-
 .../livy/server/interactive/JobApiSpec.scala   |  2 +-
 .../server/interactive/SessionHeartbeatSpec.scala  |  2 +-
 .../server/recovery/FileSystemStateStoreSpec.scala |  2 +-
 .../livy/server/recovery/SessionStoreSpec.scala|  2 +-
 .../livy/server/recovery/StateStoreSpec.scala  |  2 -
 .../server/recovery/ZooKeeperStateStoreSpec.scala  |  2 +-
 .../apache/livy/sessions/SessionManagerSpec.scala  |  2 +-
 .../apache/livy/utils/LivySparkUtilsSuite.scala|  5 ++
 .../org/apache/livy/utils/SparkYarnAppSpec.scala   |  2 +-
 .../org/apache/livy/test/jobs/SQLGetTweets.java|  2 +-
 .../livy/thriftserver/types/DataTypeUtils.scala|  5 +-
 .../livy/thriftserver/ThriftServerSuites.scala |  3 +-
 thriftserver/session/pom.xml   | 13 
 .../thriftserver/session/ColumnBufferTest.java | 16 ++--
 55 files changed, 362 insertions(+), 189 deletions(-)

diff --git a/.gitignore b/.gitignore
index d46d49f..b1045ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ metastore_db/
 derby.log
 dependency-reduced-pom.xml
 release-staging/
+venv/
 
 # For python setup.py, which pollutes the source dirs.
 python-api/dist
diff --git a/.rat-excludes b/.rat-excludes
index ac

[incubator-livy] branch master updated: [MINOR] Modify the description of POST /sessions/{sessionId}/completion

2020-03-26 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new ee7fdfc  [MINOR] Modify the description of POST 
/sessions/{sessionId}/completion
ee7fdfc is described below

commit ee7fdfc45d90c0478dcd446bc8a19a217eebe04d
Author: Shingo Furuyama 
AuthorDate: Thu Mar 26 14:59:21 2020 +0800

[MINOR] Modify the description of POST /sessions/{sessionId}/completion

## What changes were proposed in this pull request?

Just modified a description of POST /sessions/{sessionId}/completion in the 
api-doc.

## How was this patch tested?

Since the change is quite small, I didn't test the patch. If I have an 
instruction, I will follow it.

Author: Shingo Furuyama 

Closes #285 from marblejenka/mod-doc-completion.
---
 docs/rest-api.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rest-api.md b/docs/rest-api.md
index cca937f..d80e77d 100644
--- a/docs/rest-api.md
+++ b/docs/rest-api.md
@@ -312,7 +312,7 @@ Cancel the specified statement in this session.
 
 ### POST /sessions/{sessionId}/completion
 
-Runs a statement in a session.
+Returns code completion candidates for the specified code in the session.
 
  Request Body
 



[incubator-livy] branch master updated: [LIVY-751] Livy server should allow to customize LIVY_CLASSPATH

2020-03-25 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new e39d8fe  [LIVY-751] Livy server should allow to customize 
LIVY_CLASSPATH
e39d8fe is described below

commit e39d8fee43adbddf88acb2e04b470aa14b713785
Author: Shingo Furuyama 
AuthorDate: Thu Mar 26 14:07:42 2020 +0800

[LIVY-751] Livy server should allow to customize LIVY_CLASSPATH

## What changes were proposed in this pull request?

The purpose and background is https://issues.apache.org/jira/browse/LIVY-751

## How was this patch tested?

I tested the following two manually.

1. To confirm there is no degradation, I run 0.7.0-incubating livy server 
with sources in this PR. I also run an example jobs, and it completed without 
error.
2.  To confirm our workaround works, I build 0.7.0-incubating branch with 
specifying `-Dhadoop.scope=provided` and sources with this PR. After that, I 
added `export LIVY_CLASSPATH="$LIVY_HOME/jars/*:$(hadoop classpath)"` in 
conf/livy-env.sh and boot livy server.  I also run an example jobs, and it 
completed without error.

Author: Shingo Furuyama 
Author: Shingo Furuyama 

Closes #282 from marblejenka/livy-classpath.
---
 bin/livy-server   | 2 +-
 conf/livy-env.sh.template | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/livy-server b/bin/livy-server
index 8d27d4e..a0e2fb7 100755
--- a/bin/livy-server
+++ b/bin/livy-server
@@ -90,7 +90,7 @@ start_livy_server() {
 fi
   fi
 
-  LIVY_CLASSPATH="$LIBDIR/*:$LIVY_CONF_DIR"
+  LIVY_CLASSPATH="${LIVY_CLASSPATH:-${LIBDIR}/*:${LIVY_CONF_DIR}}"
 
   if [ -n "$SPARK_CONF_DIR" ]; then
 LIVY_CLASSPATH="$LIVY_CLASSPATH:$SPARK_CONF_DIR"
diff --git a/conf/livy-env.sh.template b/conf/livy-env.sh.template
index 7cba5c3..14f22c3 100644
--- a/conf/livy-env.sh.template
+++ b/conf/livy-env.sh.template
@@ -30,3 +30,4 @@
 # names. (Default: name of the user starting Livy).
 # - LIVY_MAX_LOG_FILES Max number of log file to keep in the log directory. 
(Default: 5.)
 # - LIVY_NICENESS   Niceness of the Livy server process when running in the 
background. (Default: 0.)
+# - LIVY_CLASSPATH  Override if the additional classpath is required.



[incubator-livy] branch master updated: [MINOR] Add description of POST /batches

2020-03-25 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new d07d103  [MINOR] Add description of POST /batches
d07d103 is described below

commit d07d103f22941525d3cfa2f07f647e310ffb34a1
Author: Shingo Furuyama 
AuthorDate: Thu Mar 26 13:55:51 2020 +0800

[MINOR] Add description of POST /batches

## What changes were proposed in this pull request?

Just added a description of POST /batches in the api-doc.

## How was this patch tested?

Since the change is quite small, I didn't test the patch. If I have an 
instruction, I will follow it.

Author: Shingo Furuyama 

Closes #283 from marblejenka/add-description.
---
 docs/rest-api.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/rest-api.md b/docs/rest-api.md
index f1ff9b4..cca937f 100644
--- a/docs/rest-api.md
+++ b/docs/rest-api.md
@@ -389,6 +389,8 @@ Returns all the active batch sessions.
 
 ### POST /batches
 
+Creates a new batch session.   
+
  Request Body
 
 



[incubator-livy] branch master updated (3a26856 -> 06a8d4f)

2020-03-01 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git.


from 3a26856  [LIVY-745] Ensure that a single RSCClientFactory gets loaded.
 add 06a8d4f  [LIVY-748] Add support for running Livy Integration tests 
against secure external clusters

No new revisions were added by this update.

Summary of changes:
 .../apache/livy/client/http/LivyConnection.java|   5 +
 integration-test/pom.xml   |   4 +-
 .../test/framework/BaseIntegrationTestSuite.scala  |  57 ++-
 .../org/apache/livy/test/framework/Cluster.scala   |  44 +++-
 .../livy/test/framework/ExternalCluster.scala  | 103 +++
 .../livy/test/framework/LivyRestClient.scala   | 113 +
 .../apache/livy/test/framework/MiniCluster.scala   |  60 +++
 .../resources/{rtest.R => cluster.spec.template}   |  36 ---
 .../src/test/resources/test_python_api.py  |  34 +--
 .../test/scala/org/apache/livy/test/BatchIT.scala  |   2 +-
 .../scala/org/apache/livy/test/InteractiveIT.scala |   8 +-
 .../test/scala/org/apache/livy/test/JobApiIT.scala |  21 +++-
 .../src/test/spark2/scala/Spark2JobApiIT.scala |  17 +++-
 pom.xml|   6 +-
 14 files changed, 401 insertions(+), 109 deletions(-)
 create mode 100644 
integration-test/src/main/scala/org/apache/livy/test/framework/ExternalCluster.scala
 copy integration-test/src/test/resources/{rtest.R => cluster.spec.template} 
(52%)



[incubator-livy] branch branch-0.7 updated: [MINOR] Fix CI breakage in python-api unit tests.

2020-02-04 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch branch-0.7
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/branch-0.7 by this push:
 new 7867d50  [MINOR] Fix CI breakage in python-api unit tests.
7867d50 is described below

commit 7867d5034a27582583c5f96157871554a9172de7
Author: Wing Yew Poon 
AuthorDate: Tue Feb 4 16:01:52 2020 +0800

[MINOR] Fix CI breakage in python-api unit tests.

## What changes were proposed in this pull request?

Freeze python mock library at 3.0.5 to avoid pulling in 4.0.0b1.

## How was this patch tested?

Existing unit tests.

Author: Wing Yew Poon 

Closes #279 from wypoon/CI_fix.

(cherry picked from commit f4ab5ef5d389d5743410f5839ffc79aea8943c9c)
Signed-off-by: jerryshao 
---
 python-api/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python-api/setup.py b/python-api/setup.py
index 48edcc3..8ea624e 100644
--- a/python-api/setup.py
+++ b/python-api/setup.py
@@ -32,6 +32,7 @@ requirements = [
 'configparser>=3.5.0',
 'future>=0.15.2',
 'futures>=3.0.5',
+'mock~=3.0.5',
 'requests>=2.10.0',
 'responses>=0.5.1',
 'requests-kerberos>=0.11.0',



[incubator-livy] branch master updated: [MINOR] Fix CI breakage in python-api unit tests.

2020-02-04 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new f4ab5ef  [MINOR] Fix CI breakage in python-api unit tests.
f4ab5ef is described below

commit f4ab5ef5d389d5743410f5839ffc79aea8943c9c
Author: Wing Yew Poon 
AuthorDate: Tue Feb 4 16:01:52 2020 +0800

[MINOR] Fix CI breakage in python-api unit tests.

## What changes were proposed in this pull request?

Freeze python mock library at 3.0.5 to avoid pulling in 4.0.0b1.

## How was this patch tested?

Existing unit tests.

Author: Wing Yew Poon 

Closes #279 from wypoon/CI_fix.
---
 python-api/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python-api/setup.py b/python-api/setup.py
index 3eeb323..709ff1a 100644
--- a/python-api/setup.py
+++ b/python-api/setup.py
@@ -32,6 +32,7 @@ requirements = [
 'configparser>=3.5.0',
 'future>=0.15.2',
 'futures>=3.0.5',
+'mock~=3.0.5',
 'requests>=2.10.0',
 'responses>=0.5.1',
 'requests-kerberos>=0.11.0',



[incubator-livy-website] branch master updated: Update website for 0.7.0 release

2020-02-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy-website.git


The following commit(s) were added to refs/heads/master by this push:
 new 8e7efe2  Update website for 0.7.0 release
8e7efe2 is described below

commit 8e7efe289d9ec6a8e248a55e751562e2db23fa84
Author: jerryshao 
AuthorDate: Sun Feb 2 17:32:21 2020 +0800

Update website for 0.7.0 release
---
 site/_data/project.yml |  2 +-
 site/history.md| 18 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/site/_data/project.yml b/site/_data/project.yml
index 292cc79..1c7aa92 100644
--- a/site/_data/project.yml
+++ b/site/_data/project.yml
@@ -24,7 +24,7 @@ incubator_slash_name: incubator/livy
 description: A REST Service for Apache Spark
 
 download: download
-latest_release: 0.6.0-incubating
+latest_release: 0.7.0-incubating
 
 dev_list: d...@livy.incubator.apache.org
 dev_list_subscribe: dev-subscr...@livy.incubator.apache.org
diff --git a/site/history.md b/site/history.md
index 26583ed..1515bf0 100644
--- a/site/history.md
+++ b/site/history.md
@@ -32,6 +32,24 @@ For a full list of releases, see
 Downloads are available on the
 [downloads page]({{ site.baseurl }}/download).
 
+## https://github.com/apache/{{ site.data.project.incubator_name 
}}/releases/tag/v0.7.0-incubating">0.7.0-incubating / 2020-02-02
+{: #v0-7-0-incubating}
+
+New features
+
+* Livy 0.7.0 now requires Java 8, Scala 2.11 and Spark >= 2.2.0. With 0.7.0, 
JDBC/ODBC feature now becomes GA.
+
+* Added support for all current versions of Spark (2.2.x to 2.4.x).
+
+* [https://issues.apache.org/jira/browse/LIVY-575";>LIVY-575]
+  Hive-compatible JDBC / ODBC server GA.
+
+* [https://issues.apache.org/jira/browse/LIVY-678";>LIVY-678]
+  Add LDAP authorization support for REST, JDBC interface.
+
+* With various bugs fixed, details can be checked [https://issues.apache.org/jira/projects/LIVY/versions/12345179";>here].
+
+
 ## https://github.com/apache/{{ site.data.project.incubator_name 
}}/releases/tag/v0.6.0-incubating">0.6.0-incubating / 2019-04-01
 {: #v0-6-0-incubating}
 



[incubator-livy-website] branch asf-site updated: Livy 0.7.0 release website

2020-02-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-livy-website.git


The following commit(s) were added to refs/heads/asf-site by this push:
 new 1ee935a  Livy 0.7.0 release website
1ee935a is described below

commit 1ee935a40dacc45421efb20917e4f44f518ffa86
Author: jerryshao 
AuthorDate: Sun Feb 2 19:04:58 2020 +0800

Livy 0.7.0 release website
---
 content/community-members/index.html |  2 +-
 content/community/index.html |  2 +-
 content/download/index.html  | 18 
 content/examples/index.html  |  2 +-
 content/feed.xml |  4 ++--
 content/get-started/index.html   |  2 +-
 content/history/index.html   | 26 +++-
 content/index.html   |  4 ++--
 content/news/2017/09/01/release-0.4.0/index.html |  2 +-
 content/news/2018/02/05/release-0.5.0/index.html |  2 +-
 content/release-process/index.html   |  2 +-
 content/third-party-projects/index.html  |  2 +-
 12 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/content/community-members/index.html 
b/content/community-members/index.html
index c6996e5..28840eb 100644
--- a/content/community-members/index.html
+++ b/content/community-members/index.html
@@ -270,7 +270,7 @@
 
   
   
-
+
 
 
 
diff --git a/content/community/index.html b/content/community/index.html
index 8cd28a5..f5e518d 100644
--- a/content/community/index.html
+++ b/content/community/index.html
@@ -250,7 +250,7 @@ the JIRA in your pull request.
 
   
   
-
+
 
 
 
diff --git a/content/download/index.html b/content/download/index.html
index 430c882..8ff5859 100644
--- a/content/download/index.html
+++ b/content/download/index.html
@@ -172,16 +172,16 @@
 
 
 
-Apache Livy 0.6.0-incubating (zip)
-https://www.apache.org/dyn/closer.lua/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip";>zip
-https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip.sha512";>SHA-512
-https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-bin.zip.asc";>ASC
+Apache Livy 0.7.0-incubating (zip)
+https://www.apache.org/dyn/closer.lua/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip";>zip
+https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip.sha512";>SHA-512
+https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-bin.zip.asc";>ASC
 
 
-Apache Livy 0.6.0-incubating (source zip)
-https://www.apache.org/dyn/closer.lua/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip";>zip
-https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip.sha512";>SHA-512
-https://www.apache.org/dist/incubator/livy/0.6.0-incubating/apache-livy-0.6.0-incubating-src.zip.asc";>ASC
+Apache Livy 0.7.0-incubating (source zip)
+https://www.apache.org/dyn/closer.lua/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip";>zip
+https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip.sha512";>SHA-512
+https://www.apache.org/dist/incubator/livy/0.7.0-incubating/apache-livy-0.7.0-incubating-src.zip.asc";>ASC
 
 
 
@@ -220,7 +220,7 @@ succeed.
 
   
   
-
+
 
 
 
diff --git a/content/examples/index.html b/content/examples/index.html
index c50c22c..db8c336 100644
--- a/content/examples/index.html
+++ b/content/examples/index.html
@@ -343,7 +343,7 @@ Pi. This is from the https://spark.apache.org/examples.html";>Spark Exam
 
   
   
-
+
 
 
 
diff --git a/content/feed.xml b/content/feed.xml
index 19e4d93..eab24ab 100644
--- a/content/feed.xml
+++ b/content/feed.xml
@@ -5,8 +5,8 @@
 
 /
 
-Wed, 03 Apr 2019 09:51:26 -0700
-Wed, 03 Apr 2019 09:51:26 -0700
+Sun, 02 Feb 2020 17:45:09 +0800
+Sun, 02 Feb 2020 17:45:09 +0800
 Jekyll v3.4.5
 
   
diff --git a/content/get-started/index.html b/content/get-started/index.html
index e1185de..fc2f911 100644
--- a/content/get-started/index.html
+++ b/content/get-started/index.html
@@ -213,7 +213,7 @@ or you can check out the API documentation:
 
   
   
-
+
 
 
 
diff --git a/content/histor

[incubator-livy] tag v0.7.0-incubating created (now 6645033)

2020-02-02 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a change to tag v0.7.0-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git.


  at 6645033  (commit)
No new revisions were added by this update.



svn commit: r37830 - /dev/incubator/livy/0.7.0-incubating-rc4/ /release/incubator/livy/0.7.0-incubating/

2020-02-01 Thread jshao
Author: jshao
Date: Sun Feb  2 02:22:45 2020
New Revision: 37830

Log:
Release Apache Livy 0.7.0-incubating

Added:
release/incubator/livy/0.7.0-incubating/
  - copied from r37829, dev/incubator/livy/0.7.0-incubating-rc4/
Removed:
dev/incubator/livy/0.7.0-incubating-rc4/



[incubator-livy] branch master updated: [LIVY-735][RSC] Fix rpc channel closed when multi clients connect to one driver

2020-01-08 Thread jshao
This is an automated email from the ASF dual-hosted git repository.

jshao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-livy.git


The following commit(s) were added to refs/heads/master by this push:
 new 66b5833  [LIVY-735][RSC] Fix rpc channel closed when multi clients 
connect to one driver
66b5833 is described below

commit 66b5833e413bc10e39e3b92b585f496444c147d4
Author: runzhiwang 
AuthorDate: Wed Jan 8 17:15:04 2020 +0800

[LIVY-735][RSC] Fix rpc channel closed when multi clients connect to one 
driver

## What changes were proposed in this pull request?

Currently, the driver tries to support communicating with multi-clients, by 
registering each client at 
https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java#L220.

But actually, if multi-clients connect to one driver, the rpc channel will 
close, the reason are as follows.

1.  In every communication, client sends two packages to driver: 
header{type, id}, and payload at 
https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L144.

2. If client1 sends header1, payload1, and client2 sends header2, payload2 
at the same time.
  The driver receives the package in the order: header1, header2, payload1, 
payload2.

3. When driver receives header1, driver assigns lastHeader at 
https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L73.

4. Then driver receives header2, driver process it as a payload at 
https://github.com/apache/incubator-livy/blob/master/rsc/src/main/java/org/apache/livy/rsc/rpc/RpcDispatcher.java#L78
 which cause exception and rpc channel closed.

In the muti-active HA mode, the design doc is at: 
https://docs.google.com/document/d/1bD3qYZpw14_NuCcSGUOfqQ0pqvSbCQsOLFuZp26Ohjc/edit?usp=sharing,
 the session is allocated among servers by consistent hashing. If a new livy 
joins, some session will be migrated from old livy to new livy. If the session 
client in new livy connect to driver before stoping session client in old livy, 
then two session clients will both connect to driver, and rpc channel close.  
In this case, it's hard to e [...]

How to fix:
1. Move the code of processing client message from `RpcDispatcher` to each 
`Rpc`.
2. Each `Rpc` registers itself to `channelRpc` in RpcDispatcher.
3. `RpcDispatcher` dispatches each message to `Rpc` according to  
`ctx.channel()`.

## How was this patch tested?

Existed UT and IT

Author: runzhiwang 

Closes #268 from runzhiwang/multi-client-one-driver.
---
 .../java/org/apache/livy/rsc/driver/RSCDriver.java |   1 +
 rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java | 185 -
 .../org/apache/livy/rsc/rpc/RpcDispatcher.java | 167 ++-
 3 files changed, 196 insertions(+), 157 deletions(-)

diff --git a/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java 
b/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java
index 0d8eec5..a8f31f7 100644
--- a/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java
+++ b/rsc/src/main/java/org/apache/livy/rsc/driver/RSCDriver.java
@@ -224,6 +224,7 @@ public class RSCDriver extends BaseProtocol {
   @Override
   public void onSuccess(Void unused) {
 clients.remove(client);
+client.unRegisterRpc();
 if (!inShutdown.get()) {
   setupIdleTimeout();
 }
diff --git a/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java 
b/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java
index 868dc6d..5fce164 100644
--- a/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java
+++ b/rsc/src/main/java/org/apache/livy/rsc/rpc/Rpc.java
@@ -19,10 +19,11 @@ package org.apache.livy.rsc.rpc;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.Map;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -208,6 +209,7 @@ public class Rpc implements Closeable {
 dispatcher);
 Rpc rpc = new Rpc(new RSCConf(null), c, ImmediateEventExecutor.INSTANCE);
 rpc.dispatcher = dispatcher;
+dispatcher.registerRpc(c, rpc);
 return rpc;
   }
 
@@ -218,6 +220,10 @@ public class Rpc implements Closeable {
   private final EventExecutorGroup egroup;
   private volatile RpcDispatcher dispatcher;
 
+  private final Map, Method> handlers = new ConcurrentHashMap<>();
+  private final Collection rpcCalls = new 
Concurr

<    1   2   3   4   5   6   7   8   9   10   >