JingsongLi commented on code in PR #484:
URL: https://github.com/apache/flink-table-store/pull/484#discussion_r1091488105


##########
flink-table-store-core/src/main/java/org/apache/flink/table/store/table/system/FilesTable.java:
##########
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.table.store.table.system;
+
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.table.store.CoreOptions;
+import org.apache.flink.table.store.data.BinaryString;
+import org.apache.flink.table.store.data.GenericRow;
+import org.apache.flink.table.store.data.InternalRow;
+import org.apache.flink.table.store.file.casting.CastExecutor;
+import org.apache.flink.table.store.file.io.DataFileMeta;
+import org.apache.flink.table.store.file.io.DataFilePathFactory;
+import org.apache.flink.table.store.file.predicate.Predicate;
+import org.apache.flink.table.store.file.schema.SchemaEvolutionUtil;
+import org.apache.flink.table.store.file.schema.SchemaManager;
+import org.apache.flink.table.store.file.schema.TableSchema;
+import org.apache.flink.table.store.file.stats.BinaryTableStats;
+import org.apache.flink.table.store.file.stats.FieldStatsArraySerializer;
+import org.apache.flink.table.store.file.utils.IteratorRecordReader;
+import org.apache.flink.table.store.file.utils.RecordReader;
+import org.apache.flink.table.store.file.utils.SerializationUtils;
+import org.apache.flink.table.store.format.FieldStats;
+import org.apache.flink.table.store.table.FileStoreTable;
+import org.apache.flink.table.store.table.Table;
+import org.apache.flink.table.store.table.source.DataSplit;
+import org.apache.flink.table.store.table.source.DataTableScan;
+import org.apache.flink.table.store.table.source.Split;
+import org.apache.flink.table.store.table.source.TableRead;
+import org.apache.flink.table.store.table.source.TableScan;
+import org.apache.flink.table.store.types.BigIntType;
+import org.apache.flink.table.store.types.DataField;
+import org.apache.flink.table.store.types.IntType;
+import org.apache.flink.table.store.types.RowType;
+import org.apache.flink.table.store.utils.ProjectedRow;
+import org.apache.flink.table.store.utils.RowDataToObjectArrayConverter;
+
+import org.apache.flink.shaded.guava30.com.google.common.collect.Iterators;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.TreeMap;
+
+import static 
org.apache.flink.table.store.file.catalog.Catalog.SYSTEM_TABLE_SPLITTER;
+import static org.apache.flink.util.Preconditions.checkArgument;
+
+/** A {@link Table} for showing files of a snapshot in specific table. */
+public class FilesTable implements Table {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final String FILES = "files";
+
+    public static final RowType TABLE_TYPE =
+            new RowType(
+                    Arrays.asList(
+                            new DataField(0, "snapshot_id", new 
BigIntType(false)),
+                            new DataField(1, "partition", 
SerializationUtils.newStringType(true)),
+                            new DataField(2, "bucket", new IntType(false)),
+                            new DataField(3, "file_path", 
SerializationUtils.newStringType(false)),
+                            new DataField(
+                                    4, "file_format", 
SerializationUtils.newStringType(false)),
+                            new DataField(5, "schema_id", new 
BigIntType(false)),
+                            new DataField(6, "level", new IntType(false)),
+                            new DataField(7, "record_count", new 
BigIntType(false)),
+                            new DataField(8, "file_size_in_bytes", new 
BigIntType(false)),
+                            new DataField(9, "min_key", 
SerializationUtils.newStringType(true)),
+                            new DataField(10, "max_key", 
SerializationUtils.newStringType(true)),
+                            new DataField(
+                                    11,
+                                    "null_value_counts",
+                                    SerializationUtils.newStringType(false)),
+                            new DataField(
+                                    12, "min_value_stats", 
SerializationUtils.newStringType(false)),
+                            new DataField(
+                                    13,
+                                    "max_value_stats",
+                                    SerializationUtils.newStringType(false))));
+
+    private final FileStoreTable storeTable;
+    private final CoreOptions options;
+
+    public FilesTable(FileStoreTable storeTable, CoreOptions options) {
+        this.storeTable = storeTable;
+        this.options = options;
+    }
+
+    @Override
+    public String name() {
+        return storeTable.name() + SYSTEM_TABLE_SPLITTER + FILES;
+    }
+
+    @Override
+    public RowType rowType() {
+        return TABLE_TYPE;
+    }
+
+    @Override
+    public Path location() {
+        return storeTable.location();
+    }
+
+    @Override
+    public TableScan newScan() {
+        return new FilesScan(storeTable, options.scanSnapshotId());
+    }
+
+    @Override
+    public TableRead newRead() {
+        return new FilesRead(new SchemaManager(storeTable.location()));
+    }
+
+    @Override
+    public Table copy(Map<String, String> dynamicOptions) {
+        return new FilesTable(storeTable, new CoreOptions(dynamicOptions));
+    }
+
+    private static class FilesScan implements TableScan {
+        private final FileStoreTable storeTable;
+
+        @Nullable private final Long snapshotId;
+
+        private FilesScan(FileStoreTable storeTable, Long snapshotId) {
+            this.storeTable = storeTable;
+            this.snapshotId = snapshotId;
+        }
+
+        @Override
+        public TableScan withFilter(Predicate predicate) {
+            // TODO
+            return this;
+        }
+
+        @Override
+        public Plan plan() {
+            return () -> Collections.singletonList(new FilesSplit(snapshotId, 
storeTable));
+        }
+    }
+
+    private static class FilesSplit implements Split {
+
+        private static final long serialVersionUID = 1L;
+
+        @Nullable private final Long snapshotId;
+
+        private final FileStoreTable storeTable;
+
+        private FilesSplit(@Nullable Long snapshotId, FileStoreTable 
storeTable) {
+            this.snapshotId = snapshotId;
+            this.storeTable = storeTable;
+        }
+
+        @Override
+        public long rowCount() {
+            return dataFilePlan().splits.stream().mapToLong(s -> 
s.files().size()).sum();
+        }
+
+        private DataTableScan.DataFilePlan dataFilePlan() {
+            DataTableScan scan = storeTable.newScan();
+            if (snapshotId != null) {
+                scan.withSnapshot(snapshotId);
+            }
+            return scan.plan();
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) {
+                return true;
+            }
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+            FilesSplit that = (FilesSplit) o;
+            return Objects.equals(storeTable, that.storeTable)
+                    && Objects.equals(snapshotId, that.snapshotId);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(snapshotId, storeTable);
+        }
+    }
+
+    private static class FilesRead implements TableRead {
+        private final SchemaManager schemaManager;
+
+        private int[][] projection;
+
+        private FilesRead(SchemaManager schemaManager) {
+            this.schemaManager = schemaManager;
+        }
+
+        @Override
+        public TableRead withFilter(Predicate predicate) {
+            // TODO
+            return this;
+        }
+
+        @Override
+        public TableRead withProjection(int[][] projection) {
+            this.projection = projection;
+            return this;
+        }
+
+        @Override
+        public RecordReader<InternalRow> createReader(Split split) throws 
IOException {
+            if (!(split instanceof FilesSplit)) {
+                throw new IllegalArgumentException("Unsupported split: " + 
split.getClass());
+            }
+            FilesSplit filesSplit = (FilesSplit) split;
+            DataTableScan.DataFilePlan dataFilePlan = 
filesSplit.dataFilePlan();
+            List<Iterator<InternalRow>> iteratorList = new ArrayList<>();
+            for (DataSplit dataSplit : dataFilePlan.splits) {
+                iteratorList.add(
+                        Iterators.transform(
+                                dataSplit.files().iterator(),
+                                v ->
+                                        toRow(
+                                                dataSplit,
+                                                dataFilePlan.snapshotId,
+                                                v,
+                                                filesSplit.storeTable)));
+            }
+            Iterator<InternalRow> rows = 
Iterators.concat(iteratorList.iterator());
+            if (projection != null) {
+                rows =
+                        Iterators.transform(
+                                rows, row -> 
ProjectedRow.from(projection).replaceRow(row));
+            }
+            return new IteratorRecordReader<>(rows);
+        }
+
+        private InternalRow toRow(
+                DataSplit dataSplit,
+                Long snapshotId,
+                DataFileMeta dataFileMeta,
+                FileStoreTable storeTable) {
+            TableSchema tableSchema =
+                    schemaManager.schema(
+                            
storeTable.snapshotManager().snapshot(snapshotId).schemaId());
+            RowDataToObjectArrayConverter partitionConverter =
+                    new 
RowDataToObjectArrayConverter(tableSchema.logicalPartitionType());
+
+            TableSchema dataSchema = 
schemaManager.schema(dataFileMeta.schemaId());
+            RowType keysType = dataSchema.logicalTrimmedPrimaryKeysType();
+            RowDataToObjectArrayConverter keyConverter =
+                    keysType.getFieldCount() > 0
+                            ? new RowDataToObjectArrayConverter(
+                                    dataSchema.logicalTrimmedPrimaryKeysType())
+                            : new 
RowDataToObjectArrayConverter(dataSchema.logicalRowType());
+
+            int[] indexMapping =
+                    SchemaEvolutionUtil.createIndexMapping(

Review Comment:
   I don't want to expose `SchemaEvolutionUtil` things here.
   Can we extract a some class like `FieldStatsConverters` to do this work?
   ```
   class FieldStatsConverters {
      FieldStatsConverters(Function<Long, TableSchema> schemas, long 
tableSchemaId) {...}
      FieldStatsArraySerializer get0rCreate(long fileSchemaId);
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@flink.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to