This is an automated email from the ASF dual-hosted git repository.
junhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 00a8e59b27 [core] Add lazied fields in RowType to improve performance
(#6214)
00a8e59b27 is described below
commit 00a8e59b2743e0728b76aa8462bd11e972113593
Author: Jingsong Lee <[email protected]>
AuthorDate: Tue Sep 9 15:14:58 2025 +0800
[core] Add lazied fields in RowType to improve performance (#6214)
---
.../main/java/org/apache/paimon/types/RowType.java | 108 ++++++++++++++-------
1 file changed, 72 insertions(+), 36 deletions(-)
diff --git a/paimon-api/src/main/java/org/apache/paimon/types/RowType.java
b/paimon-api/src/main/java/org/apache/paimon/types/RowType.java
index bd1ca10995..5084f5e4f8 100644
--- a/paimon-api/src/main/java/org/apache/paimon/types/RowType.java
+++ b/paimon-api/src/main/java/org/apache/paimon/types/RowType.java
@@ -34,8 +34,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@@ -61,6 +63,12 @@ public final class RowType extends DataType {
private final List<DataField> fields;
+ private transient volatile Map<String, DataField> laziedNameToField;
+ private transient volatile Map<String, Integer> laziedNameToIndex;
+
+ private transient volatile Map<Integer, DataField> laziedFieldIdToField;
+ private transient volatile Map<Integer, Integer> laziedFieldIdToIndex;
+
public RowType(boolean isNullable, List<DataField> fields) {
super(isNullable, DataTypeRoot.ROW);
this.fields =
@@ -101,39 +109,23 @@ public final class RowType extends DataType {
}
public int getFieldIndex(String fieldName) {
- for (int i = 0; i < fields.size(); i++) {
- if (fields.get(i).name().equals(fieldName)) {
- return i;
- }
- }
- return -1;
+ return nameToIndex().getOrDefault(fieldName, -1);
}
public int[] getFieldIndices(List<String> projectFields) {
- List<String> fieldNames = getFieldNames();
int[] projection = new int[projectFields.size()];
for (int i = 0; i < projection.length; i++) {
- projection[i] = fieldNames.indexOf(projectFields.get(i));
+ projection[i] = getFieldIndex(projectFields.get(i));
}
return projection;
}
public boolean containsField(String fieldName) {
- for (DataField field : fields) {
- if (field.name().equals(fieldName)) {
- return true;
- }
- }
- return false;
+ return nameToField().containsKey(fieldName);
}
public boolean containsField(int fieldId) {
- for (DataField field : fields) {
- if (field.id() == fieldId) {
- return true;
- }
- }
- return false;
+ return fieldIdToField().containsKey(fieldId);
}
public boolean notContainsField(String fieldName) {
@@ -141,31 +133,27 @@ public final class RowType extends DataType {
}
public DataField getField(String fieldName) {
- for (DataField field : fields) {
- if (field.name().equals(fieldName)) {
- return field;
- }
+ DataField field = nameToField().get(fieldName);
+ if (field == null) {
+ throw new RuntimeException("Cannot find field: " + fieldName);
}
-
- throw new RuntimeException("Cannot find field: " + fieldName);
+ return field;
}
public DataField getField(int fieldId) {
- for (DataField field : fields) {
- if (field.id() == fieldId) {
- return field;
- }
+ DataField field = fieldIdToField().get(fieldId);
+ if (field == null) {
+ throw new RuntimeException("Cannot find field by field id: " +
fieldId);
}
- throw new RuntimeException("Cannot find field by field id: " +
fieldId);
+ return field;
}
public int getFieldIndexByFieldId(int fieldId) {
- for (int i = 0; i < fields.size(); i++) {
- if (fields.get(i).id() == fieldId) {
- return i;
- }
+ Integer index = fieldIdToIndex().get(fieldId);
+ if (index == null) {
+ throw new RuntimeException("Cannot find field index by FieldId " +
fieldId);
}
- throw new RuntimeException("Cannot find field index by FieldId " +
fieldId);
+ return index;
}
@Override
@@ -331,6 +319,54 @@ public final class RowType extends DataType {
return project(Arrays.asList(names));
}
+ private Map<String, DataField> nameToField() {
+ Map<String, DataField> nameToField = this.laziedNameToField;
+ if (nameToField == null) {
+ nameToField = new HashMap<>();
+ for (DataField field : fields) {
+ nameToField.put(field.name(), field);
+ }
+ this.laziedNameToField = nameToField;
+ }
+ return nameToField;
+ }
+
+ private Map<String, Integer> nameToIndex() {
+ Map<String, Integer> nameToIndex = this.laziedNameToIndex;
+ if (nameToIndex == null) {
+ nameToIndex = new HashMap<>();
+ for (int i = 0; i < fields.size(); i++) {
+ nameToIndex.put(fields.get(i).name(), i);
+ }
+ this.laziedNameToIndex = nameToIndex;
+ }
+ return nameToIndex;
+ }
+
+ private Map<Integer, DataField> fieldIdToField() {
+ Map<Integer, DataField> fieldIdToField = this.laziedFieldIdToField;
+ if (fieldIdToField == null) {
+ fieldIdToField = new HashMap<>();
+ for (DataField field : fields) {
+ fieldIdToField.put(field.id(), field);
+ }
+ this.laziedFieldIdToField = fieldIdToField;
+ }
+ return fieldIdToField;
+ }
+
+ private Map<Integer, Integer> fieldIdToIndex() {
+ Map<Integer, Integer> fieldIdToIndex = this.laziedFieldIdToIndex;
+ if (fieldIdToIndex == null) {
+ fieldIdToIndex = new HashMap<>();
+ for (int i = 0; i < fields.size(); i++) {
+ fieldIdToIndex.put(fields.get(i).id(), i);
+ }
+ this.laziedFieldIdToIndex = fieldIdToIndex;
+ }
+ return fieldIdToIndex;
+ }
+
public static RowType of() {
return new RowType(true, Collections.emptyList());
}