This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2c18cf2d2d4 [test](cold hot separation) add a cold hot separation
regression test for hdfs (#48452)
2c18cf2d2d4 is described below
commit 2c18cf2d2d4fe345814659c8ac735cc3ad6fa892
Author: yagagagaga <[email protected]>
AuthorDate: Fri Mar 7 16:25:41 2025 +0800
[test](cold hot separation) add a cold hot separation regression test for
hdfs (#48452)
---
.../org/apache/doris/regression/suite/Suite.groovy | 12 ++
.../cold_data_compaction_by_hdfs.groovy | 129 +++++++++++++++++++++
2 files changed, 141 insertions(+)
diff --git
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index 831554361fe..971ea6b5965 100644
---
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -60,6 +60,7 @@ import org.apache.doris.regression.util.Http
import org.apache.doris.regression.util.SuiteUtils
import org.apache.doris.regression.util.DebugPoint
import org.apache.doris.regression.RunMode
+import org.apache.hadoop.fs.FileSystem
import org.codehaus.groovy.runtime.IOGroovyMethods
import org.jetbrains.annotations.NotNull
import org.junit.jupiter.api.Assertions
@@ -105,6 +106,7 @@ class Suite implements GroovyInterceptable {
static Boolean isTrinoConnectorDownloaded = false
private AmazonS3 s3Client = null
+ private FileSystem fs = null
Suite(String name, String group, SuiteContext context, SuiteCluster
cluster) {
this.name = name
@@ -925,6 +927,16 @@ class Suite implements GroovyInterceptable {
return enableHdfs.equals("true");
}
+ synchronized FileSystem getHdfs() {
+ if (fs == null) {
+ String hdfsFs = context.config.otherConfigs.get("hdfsFs")
+ String hdfsUser = context.config.otherConfigs.get("hdfsUser")
+ Hdfs hdfs = new Hdfs(hdfsFs, hdfsUser, context.config.dataPath +
"/")
+ fs = hdfs.fs
+ }
+ return fs
+ }
+
String uploadToHdfs(String localFile) {
// as group can be rewrite the origin data file not relate to group
String dataDir = context.config.dataPath + "/"
diff --git
a/regression-test/suites/cold_heat_separation/cold_data_compaction_by_hdfs.groovy
b/regression-test/suites/cold_heat_separation/cold_data_compaction_by_hdfs.groovy
new file mode 100644
index 00000000000..e2fba24ec23
--- /dev/null
+++
b/regression-test/suites/cold_heat_separation/cold_data_compaction_by_hdfs.groovy
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.hadoop.fs.Path
+import java.util.function.Supplier
+
+suite("test_cold_data_compaction_by_hdfs") {
+
+ if (!enableHdfs()) {
+ logger.info("Skip this case, because HDFS is not available")
+ return
+ }
+
+ def retryUntilTimeout = { int timeoutSecond, Supplier<Boolean> closure ->
+ long start = System.currentTimeMillis()
+ while (true) {
+ if (closure.get()) {
+ return
+ } else {
+ if (System.currentTimeMillis() - start > timeoutSecond * 1000)
{
+ throw new RuntimeException("" +
+ "Operation timeout, maybe you need to check " +
+ "remove_unused_remote_files_interval_sec and " +
+ "cold_data_compaction_interval_sec in be.conf")
+ } else {
+ sleep(10_000)
+ }
+ }
+ }
+ }
+
+ String suffix = UUID.randomUUID().hashCode().abs().toString()
+ String prefix = "${getHdfsDataDir()}/regression/cold_data_compaction"
+ multi_sql """
+ DROP TABLE IF EXISTS t_recycle_in_hdfs;
+ DROP STORAGE POLICY IF EXISTS test_policy_${suffix};
+ DROP RESOURCE IF EXISTS 'remote_hdfs_${suffix}';
+ CREATE RESOURCE "remote_hdfs_${suffix}"
+ PROPERTIES
+ (
+ "type"="hdfs",
+ "fs.defaultFS"="${getHdfsFs()}",
+ "hadoop.username"="${getHdfsUser()}",
+ "hadoop.password"="${getHdfsPasswd()}",
+ "root_path"="${prefix}"
+ );
+ CREATE STORAGE POLICY test_policy_${suffix}
+ PROPERTIES(
+ "storage_resource" = "remote_hdfs_${suffix}",
+ "cooldown_ttl" = "5"
+ );
+ CREATE TABLE IF NOT EXISTS t_recycle_in_hdfs
+ (
+ k1 BIGINT,
+ k2 LARGEINT,
+ v1 VARCHAR(2048)
+ )
+ DISTRIBUTED BY HASH (k1) BUCKETS 1
+ PROPERTIES(
+ "storage_policy" = "test_policy_${suffix}",
+ "disable_auto_compaction" = "true",
+ "replication_num" = "1"
+ );
+ """
+
+ // insert 5 RowSets
+ multi_sql """
+ insert into t_recycle_in_hdfs values(1, 1, 'Tom');
+ insert into t_recycle_in_hdfs values(2, 2, 'Jelly');
+ insert into t_recycle_in_hdfs values(3, 3, 'Spike');
+ insert into t_recycle_in_hdfs values(4, 4, 'Tyke');
+ insert into t_recycle_in_hdfs values(5, 5, 'Tuffy');
+ """
+
+ // wait until files upload to S3
+ retryUntilTimeout(1800, {
+ def res = sql_return_maparray "show data from t_recycle_in_hdfs"
+ String size = ""
+ String remoteSize = ""
+ for (final def line in res) {
+ if ("t_recycle_in_hdfs".equals(line.TableName)) {
+ size = line.Size
+ remoteSize = line.RemoteSize
+ break
+ }
+ }
+ logger.info("waiting for data to be uploaded to HDFS:
t_recycle_in_hdfs's local data size: ${size}, remote data size: ${remoteSize}")
+ return size.startsWith("0") && !remoteSize.startsWith("0")
+ })
+
+ String tabletId = sql_return_maparray("show tablets from
t_recycle_in_hdfs")[0].TabletId
+ // check number of remote files
+ def filesBeforeCompaction = getHdfs().listStatus(new Path(prefix +
"/data/${tabletId}"))
+
+ // 5 RowSets + 1 meta
+ assertEquals(6, filesBeforeCompaction.size())
+
+ // trigger cold data compaction
+ sql """alter table t_recycle_in_hdfs set ("disable_auto_compaction" =
"false")"""
+
+ // wait until compaction finish
+ retryUntilTimeout(1800, {
+ def filesAfterCompaction = getHdfs().listStatus(new Path(prefix +
"/data/${tabletId}"))
+ logger.info("t_recycle_in_hdfs's remote file number is
${filesAfterCompaction.size()}")
+ // 1 RowSet + 1 meta
+ return filesAfterCompaction.size() == 2
+ })
+
+ sql "drop table t_recycle_in_hdfs force"
+ retryUntilTimeout(1800, {
+ def pathExists = getHdfs().exists(new Path(prefix +
"/data/${tabletId}"))
+ logger.info("after drop t_recycle_in_hdfs, the remote file path
${pathExists ? "exists" : "not exists"}")
+ return !pathExists
+ })
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]