ngsg commented on code in PR #5670: URL: https://github.com/apache/hive/pull/5670#discussion_r2179032529
########## iceberg/iceberg-handler/src/test/queries/positive/iceberg_bucket_map_join_8.q: ########## @@ -4,47 +4,122 @@ set hive.auto.convert.join=true; set hive.optimize.dynamic.partition.hashjoin=false; set hive.convert.join.bucket.mapjoin.tez=true; -CREATE TABLE srcbucket_big(key int, value string, id int) -PARTITIONED BY SPEC(bucket(4, key)) STORED BY ICEBERG; +CREATE TABLE srcbucket_big(key1 int, key2 string, value string, id int) +PARTITIONED BY SPEC(bucket(4, key1), bucket(8, key2)) STORED BY ICEBERG; INSERT INTO srcbucket_big VALUES -(101, 'val_101', 1), -(null, 'val_102', 2), -(103, 'val_103', 3), -(104, null, 4), -(105, 'val_105', 5), -(null, null, 6); - -CREATE TABLE src_small(key int, value string); +(101, '1001', 'val_101', 1), +(null, '1002', 'val_102', 2), +(103, null, 'val_103', 3), +(104, '1004', null, 4), +(105, '1005', 'val_105', 5), +(101, '1001', 'val_101', 6), +(null, '1002', 'val_102', 7), +(103, null, 'val_103', 8), +(104, '1004', null, 9), +(105, '1005', 'val_105', 10), +(101, '1001', 'val_101', 11), +(null, '1002', 'val_102', 12), +(103, null, 'val_103', 13), +(104, '1004', null, 14), +(105, '1005', 'val_105', 15), +(101, '1001', 'val_101', 16), +(null, '1002', 'val_102', 17), +(103, null, 'val_103', 18), +(104, '1004', null, 19), +(105, '1005', 'val_105', 20), +(null, null, null, 21); + +CREATE TABLE src_small(key1 int, key2 string, value string); INSERT INTO src_small VALUES -(101, 'val_101'), -(null, 'val_102'), -(103, 'val_103'), -(104, null), -(105, 'val_105'), -(null, null); +(101, '1001', 'val_101'), +(null, '1002', 'val_102'), +(103, null, 'val_103'), +(104, '1004', null), +(105, '1005', 'val_105'), +(null, null, null); SELECT * FROM srcbucket_big ORDER BY id; --- Using the bucket column +-- key1 EXPLAIN -SELECT * +SELECT a.key1, a.id FROM srcbucket_big a -JOIN src_small b ON a.key = b.key +JOIN src_small b ON a.key1 = b.key1 ORDER BY a.id; -SELECT * +SELECT a.key1, a.id FROM srcbucket_big a -JOIN src_small b ON a.key = b.key +JOIN src_small b ON a.key1 = b.key1 ORDER BY a.id; --- Using a non-bucket column +-- key2 Review Comment: I think `key2` and `key2 & non-partition` parts could be removed, as their query plans are identical to the `key1` plans. But that's just a minor thought, and I don't have a strong opinion about it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org