This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6d4ba9eb382 [fix](maxcompute)fix maxcompute partition column schema
order (#48325)
6d4ba9eb382 is described below
commit 6d4ba9eb3826ca1474a2284e8a8f50011d11d076
Author: daidai <[email protected]>
AuthorDate: Wed Feb 26 17:30:12 2025 +0800
[fix](maxcompute)fix maxcompute partition column schema order (#48325)
### What problem does this PR solve?
Problem Summary:
```sql
create table in MaxCompute :
CREATE TABLE `mc_parts` (`id` STRING COMMENT 'Id')
PARTITIONED BY (
`ds` STRING ,
`audit_flag` STRING );
query in doris :
before:
mysql> desc mc_parts;
+------------+------+------+------+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+------------+------+------+------+---------+-------+
| id | text | Yes | true | NULL | |
| audit_flag | text | Yes | true | NULL | |
| ds | text | Yes | true | NULL | |
+------------+------+------+------+---------+-------+
mysql> select * from mc_parts;
ERROR 1105 (HY000): errCode = 2, detailMessage = java.io.IOException:
ODPS-0420061: Invalid parameter in HTTP request - The requested columns should
be in order with table schema
The reason is because this ds column needs to be before audit_flag, and
this order is also required when doing partition pruning ( eg:
ds=2024-01-01/audit_flag=Y ).
after:
mysql> desc mc_parts;
+------------+------+------+------+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+------------+------+------+------+---------+-------+
| id | text | Yes | true | NULL | |
| ds | text | Yes | true | NULL | |
| audit_flag | text | Yes | true | NULL | |
+------------+------+------+------+---------+-------+
```
---
.../maxcompute/MaxComputeExternalTable.java | 37 ++++++++++-----------
.../maxcompute/MaxComputeSchemaCacheValue.java | 21 ++++++------
.../test_external_catalog_maxcompute.out | Bin 3747 -> 4745 bytes
.../test_external_catalog_maxcompute.groovy | 26 +++++++++++++--
4 files changed, 52 insertions(+), 32 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
index 96eeb76684e..7f626655442 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
@@ -128,11 +128,11 @@ public class MaxComputeExternalTable extends
ExternalTable {
private TablePartitionValues
loadPartitionValues(MaxComputeSchemaCacheValue schemaCacheValue) {
List<String> partitionSpecs = schemaCacheValue.getPartitionSpecs();
List<Type> partitionTypes = schemaCacheValue.getPartitionTypes();
+ List<String> partitionColumnNames =
schemaCacheValue.getPartitionColumnNames();
TablePartitionValues partitionValues = new TablePartitionValues();
partitionValues.addPartitions(partitionSpecs,
partitionSpecs.stream()
- .map(p ->
parsePartitionValues(getPartitionColumns().stream().map(c ->
c.getName()).collect(
- Collectors.toList()), p))
+ .map(p -> parsePartitionValues(partitionColumnNames,
p))
.collect(Collectors.toList()),
partitionTypes);
return partitionValues;
@@ -187,9 +187,21 @@ public class MaxComputeExternalTable extends ExternalTable
{
}
List<com.aliyun.odps.Column> partitionColumns =
odpsTable.getSchema().getPartitionColumns();
+ List<String> partitionColumnNames = new
ArrayList<>(partitionColumns.size());
+ List<Type> partitionTypes = new ArrayList<>(partitionColumns.size());
- for (com.aliyun.odps.Column partitionColumn : partitionColumns) {
- columnNameToOdpsColumn.put(partitionColumn.getName(),
partitionColumn);
+ // sort partition columns to align partitionTypes and partitionName.
+ List<Column> partitionDorisColumns = new ArrayList<>();
+ for (com.aliyun.odps.Column partColumn : partitionColumns) {
+ Type partitionType = mcTypeToDorisType(partColumn.getTypeInfo());
+ Column dorisCol = new Column(partColumn.getName(), partitionType,
true, null,
+ true, partColumn.getComment(), true, -1);
+
+ columnNameToOdpsColumn.put(partColumn.getName(), partColumn);
+ partitionColumnNames.add(partColumn.getName());
+ partitionDorisColumns.add(dorisCol);
+ partitionTypes.add(partitionType);
+ schema.add(dorisCol);
}
List<String> partitionSpecs;
@@ -200,22 +212,9 @@ public class MaxComputeExternalTable extends ExternalTable
{
} else {
partitionSpecs = ImmutableList.of();
}
- // sort partition columns to align partitionTypes and partitionName.
- Map<String, Column> partitionNameToColumns = Maps.newHashMap();
- for (com.aliyun.odps.Column partColumn : partitionColumns) {
- Column dorisCol = new Column(partColumn.getName(),
- mcTypeToDorisType(partColumn.getTypeInfo()), true, null,
- true, partColumn.getComment(), true, -1);
- partitionNameToColumns.put(dorisCol.getName(), dorisCol);
- }
- List<Type> partitionTypes = partitionNameToColumns.values()
- .stream()
- .map(Column::getType)
- .collect(Collectors.toList());
- schema.addAll(partitionNameToColumns.values());
- return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable,
partitionSpecs, partitionNameToColumns,
- partitionTypes));
+ return Optional.of(new MaxComputeSchemaCacheValue(schema, odpsTable,
partitionColumnNames,
+ partitionSpecs, partitionDorisColumns, partitionTypes));
}
private Type mcTypeToDorisType(TypeInfo typeInfo) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
index b8337d96120..0d0fb69a3e4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeSchemaCacheValue.java
@@ -22,36 +22,35 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.datasource.SchemaCacheValue;
import com.aliyun.odps.Table;
-import com.google.common.collect.Lists;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
-import java.util.Map;
-import java.util.Set;
@Getter
@Setter
public class MaxComputeSchemaCacheValue extends SchemaCacheValue {
private Table odpsTable;
+ private List<String> partitionColumnNames;
private List<String> partitionSpecs;
- private Map<String, Column> partitionNameToColumns;
+ private List<Column> partitionColumns;
private List<Type> partitionTypes;
- public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable,
List<String> partitionSpecs,
- Map<String, Column> partitionNameToColumns, List<Type>
partitionTypes) {
+ public MaxComputeSchemaCacheValue(List<Column> schema, Table odpsTable,
List<String> partitionColumnNames,
+ List<String> partitionSpecs, List<Column> partitionColumns,
List<Type> partitionTypes) {
super(schema);
this.odpsTable = odpsTable;
this.partitionSpecs = partitionSpecs;
- this.partitionNameToColumns = partitionNameToColumns;
+ this.partitionColumnNames = partitionColumnNames;
+ this.partitionColumns = partitionColumns;
this.partitionTypes = partitionTypes;
}
- public Set<String> getPartitionColNames() {
- return partitionNameToColumns.keySet();
+ public List<Column> getPartitionColumns() {
+ return partitionColumns;
}
- public List<Column> getPartitionColumns() {
- return Lists.newArrayList(partitionNameToColumns.values());
+ public List<String> getPartitionColumnNames() {
+ return partitionColumnNames;
}
}
diff --git
a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
index 63677f14720..c388e4eddf6 100644
Binary files
a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
and
b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out
differ
diff --git
a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
index 81133270fb6..6843a586d86 100644
---
a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
+++
b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy
@@ -314,8 +314,17 @@
INSERT INTO `other_db_mc_parts` PARTITION (dt='e') VALUES
(1005, 'Sample data 5');
-
-
+ CREATE TABLE `mc_parts2` (
+ `id` STRING COMMENT 'Id'
+ )
+ PARTITIONED BY (
+ `ds` STRING ,
+ `audit_flag` STRING
+ );
+ INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-01-01',
`audit_flag`='Y') values ('1');
+ INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-04-04',
`audit_flag`='N') values ('2');
+ INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2027-01-09',
`audit_flag`='Y') values ('3');
+ INSERT INTO TABLE `mc_parts2` PARTITION (`ds`='2024-01-01',
`audit_flag`='N') values ('4');
*/
suite("test_external_catalog_maxcompute",
"p2,external,maxcompute,external_remote,external_remote_maxcompute") {
String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
@@ -353,6 +362,7 @@ suite("test_external_catalog_maxcompute",
"p2,external,maxcompute,external_remot
order_qt_q5 """ select * from mc_parts where dt > '2023-08-03'
order by mc_bigint """
order_qt_q6 """ select * from mc_parts where dt > '2023-08-03' and
mc_bigint > 1002 """
order_qt_q7 """ select * from mc_parts where dt < '2023-08-03' or
(mc_bigint > 1003 and dt > '2023-08-04') order by mc_bigint, dt; """
+ qt_q8 """ desc mc_parts """
}
sql """ switch `${mc_catalog_name}`; """
@@ -380,6 +390,8 @@ suite("test_external_catalog_maxcompute",
"p2,external,maxcompute,external_remot
sql """ refresh catalog ${mc_catalog_name} """
sql """ switch `${mc_catalog_name}`; """
sql """ use `${mc_db}`; """
+
+ qt_multi_partition_q1 """ desc multi_partitions """
order_qt_multi_partition_q1 """ show partitions from multi_partitions;
"""
order_qt_multi_partition_q2 """ select pt, create_time, yy, mm, dd
from multi_partitions where pt>-1 and yy > '' and mm > '' and dd >'' order by
pt , dd; """
order_qt_multi_partition_q3 """ select sum(pt), create_time, yy, mm,
dd from multi_partitions where yy > '' and mm > '' and dd >'' group by
create_time, yy, mm, dd order by create_time,dd ; """
@@ -425,5 +437,15 @@ suite("test_external_catalog_maxcompute",
"p2,external,maxcompute,external_remot
order_qt_show_partition """ show partitions from mc_parts """
+ order_qt_part2_q1 """ select * from mc_parts2 """
+ order_qt_part2_q2 """ SELECT * FROM `mc_parts2` WHERE `ds` =
'2024-01-01' AND `audit_flag` = 'Y'; """
+ order_qt_part2_q3 """ SELECT * FROM `mc_parts2` WHERE `audit_flag` =
'Y';"""
+ order_qt_part2_q4 """ SELECT * FROM `mc_parts2` WHERE `ds` BETWEEN
'2024-01-01' AND '2027-01-01';"""
+ order_qt_part2_q5 """ SELECT ds FROM `mc_parts2` WHERE `ds` !=
'2027-01-09';"""
+ order_qt_part2_q6 """ SELECT ds,audit_flag,id FROM `mc_parts2` WHERE
`ds` != '2027-01-09';"""
+ order_qt_part2_q7 """ SELECT audit_flag,ds,ds,id,id,id FROM
`mc_parts2`;"""
+ order_qt_part2_q8 """ SELECT audit_flag FROM `mc_parts2` WHERE `ds` !=
'2027-01-09';"""
+ qt_part2_q9 """ desc mc_parts2 """
+
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]