lcspinter commented on code in PR #3552:
URL: https://github.com/apache/hive/pull/3552#discussion_r963624074
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
return conf;
}
}
+
+ @Override
+ public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
hmsTable) {
Review Comment:
Why cannot we use long as return type?
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java:
##########
@@ -489,4 +489,13 @@ default void validateSinkDesc(FileSinkDesc sinkDesc)
throws SemanticException {
*/
default void executeOperation(org.apache.hadoop.hive.ql.metadata.Table
table, AlterTableExecuteSpec executeSpec) {
}
+
+ /**
+ * Query the unique snapshot id of the passed table.
+ * @param table - {@link org.apache.hadoop.hive.ql.metadata.Table} which
snapshot id should be returned.
+ * @return String representation of the snapshotId or null if not supported.
+ */
+ default String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
table) {
Review Comment:
I would change this to `getCurrentSnapshotId`
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -32,16 +36,19 @@
import static java.util.Collections.unmodifiableSet;
public class MaterializedViewMetadata {
+ private static final Logger LOG =
LoggerFactory.getLogger(MaterializedViewMetadata.class);
+
final CreationMetadata creationMetadata;
MaterializedViewMetadata(CreationMetadata creationMetadata) {
this.creationMetadata = creationMetadata;
}
public MaterializedViewMetadata(
- String catalogName, String dbName, String mvName, Set<SourceTable>
sourceTables, String validTxnList) {
+ String catalogName, String dbName, String mvName, Set<SourceTable>
sourceTables,
+ MaterializationSnapshot snapshot) {
this.creationMetadata = new CreationMetadata(catalogName, dbName, mvName,
toFullTableNames(sourceTables));
- this.creationMetadata.setValidTxnList(validTxnList);
+ this.creationMetadata.setValidTxnList(snapshot.asJsonString());
Review Comment:
This is not a List anymore, but rather an object. Maybe we should consider
renaming it.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
return unmodifiableList(creationMetadata.getSourceTables());
}
- public String getValidTxnList() {
- return creationMetadata.getValidTxnList();
+ public MaterializationSnapshot getSnapshot() {
Review Comment:
Javadoc. Please cover what happens in case of native ACID table and
non-native table.
##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/views/HiveMaterializedViewUtils.java:
##########
@@ -98,7 +105,22 @@ public static Table extractTable(RelOptMaterialization
materialization) {
* materialized view definition uses external tables.
*/
public static Boolean isOutdatedMaterializedView(
- String validTxnsList, HiveTxnManager txnMgr,
+ String validTxnsList, HiveTxnManager txnMgr, Hive db,
+ Set<TableName> tablesUsed, Table materializedViewTable) throws
HiveException {
+
+ MaterializedViewMetadata mvMetadata =
materializedViewTable.getMVMetadata();
+ MaterializationSnapshot snapshot = mvMetadata.getSnapshot();
+
+ if (snapshot.getTableSnapshots() != null &&
!snapshot.getTableSnapshots().isEmpty()) {
Review Comment:
snapshot can be null
##########
iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java:
##########
@@ -977,4 +977,11 @@ public Configuration get() {
return conf;
}
}
+
+ @Override
+ public String getCurrentSnapshot(org.apache.hadoop.hive.ql.metadata.Table
hmsTable) {
+ TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
+ Table table = IcebergTableUtil.getTable(conf, tableDesc.getProperties());
+ return Long.toString(table.currentSnapshot().sequenceNumber());
Review Comment:
`table.currentSnashot()` can be null, if the table is empty.
##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewMetadata.java:
##########
@@ -81,15 +88,20 @@ public Collection<SourceTable> getSourceTables() {
return unmodifiableList(creationMetadata.getSourceTables());
}
- public String getValidTxnList() {
- return creationMetadata.getValidTxnList();
+ public MaterializationSnapshot getSnapshot() {
+ if (creationMetadata.getValidTxnList() == null ||
creationMetadata.getValidTxnList().isEmpty()) {
+ LOG.debug("Could not obtain materialization snapshot of materialized
view {}.{}",
+ creationMetadata.getDbName(), creationMetadata.getDbName());
Review Comment:
`getDbName` is passed twice
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString, null);
+ }
+ }
+
+ // snapshot of native ACID tables
+ private String validTxnList;
+ // snapshot of non-native ACID tables. Key is the fully qualified name of
the table.
Review Comment:
Materialized views are only supported for iceberg V2 tables? If not, we
should change this java doc.
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString, null);
+ }
+ }
+
+ // snapshot of native ACID tables
+ private String validTxnList;
+ // snapshot of non-native ACID tables. Key is the fully qualified name of
the table.
+ // Value is the unique id of the snapshot provided by the table's storage
HiveStorageHandler
+ private Map<String, String> tableSnapshots;
+
+ private MaterializationSnapshot() {
+ }
+
+ public MaterializationSnapshot(String validTxnList, Map<String, String>
tableSnapshots) {
+ this.validTxnList = validTxnList;
+ this.tableSnapshots = tableSnapshots;
+ }
+
+ public String asJsonString() {
Review Comment:
Few lines of javadoc wouldn't hurt
##########
iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_orc.q:
##########
@@ -0,0 +1,35 @@
+-- SORT_QUERY_RESULTS
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+create external table tbl_ice(a int, b string, c int) stored by iceberg stored
as orc tblproperties ('format-version'='2');
+
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52),
(4, 'four', 53), (5, 'five', 54);
+
+select * from tbl_ice;
+
+
+create materialized view mat1 as
+select b, c from tbl_ice where c > 52;
Review Comment:
How does this work with time travel queries? We should add a few tests
covering that as well.
##########
storage-api/src/java/org/apache/hadoop/hive/common/MaterializationSnapshot.java:
##########
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.UncheckedIOException;
+import java.io.Writer;
+import java.util.Map;
+
+/**
+ * Class to store snapshot data of Materialized view source tables.
+ * The data represents the state of the source tables when the view was
created/last rebuilt.
+ */
+public class MaterializationSnapshot {
+
+ public static MaterializationSnapshot fromJson(String jsonString) {
+ try {
+ return new ObjectMapper().readValue(jsonString,
MaterializationSnapshot.class);
+ } catch (JsonProcessingException e) {
+ // this is not a jsonString, fall back to treating it as
ValidTxnWriteIdList
+ return new MaterializationSnapshot(jsonString, null);
+ }
+ }
+
+ // snapshot of native ACID tables
+ private String validTxnList;
+ // snapshot of non-native ACID tables. Key is the fully qualified name of
the table.
+ // Value is the unique id of the snapshot provided by the table's storage
HiveStorageHandler
+ private Map<String, String> tableSnapshots;
+
+ private MaterializationSnapshot() {
+ }
+
+ public MaterializationSnapshot(String validTxnList, Map<String, String>
tableSnapshots) {
Review Comment:
Is `validTxnList` and `tableSnapshots` are mandatory parameters? If not, we
can use two separate constructors or a builder pattern.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]