Re: [PR] HIVE-28798: Bucket Map Join partially using partition transforms [hive]

via GitHub Tue, 01 Jul 2025 21:37:13 -0700


ngsg commented on code in PR #5670:
URL: https://github.com/apache/hive/pull/5670#discussion_r2179032529



##########
iceberg/iceberg-handler/src/test/queries/positive/iceberg_bucket_map_join_8.q:
##########
@@ -4,47 +4,122 @@ set hive.auto.convert.join=true;
 set hive.optimize.dynamic.partition.hashjoin=false;
 set hive.convert.join.bucket.mapjoin.tez=true;
 
-CREATE TABLE srcbucket_big(key int, value string, id int)
-PARTITIONED BY SPEC(bucket(4, key)) STORED BY ICEBERG;
+CREATE TABLE srcbucket_big(key1 int, key2 string, value string, id int)
+PARTITIONED BY SPEC(bucket(4, key1), bucket(8, key2)) STORED BY ICEBERG;
 INSERT INTO srcbucket_big VALUES
-(101, 'val_101', 1),
-(null, 'val_102', 2),
-(103, 'val_103', 3),
-(104, null, 4),
-(105, 'val_105', 5),
-(null, null, 6);
-
-CREATE TABLE src_small(key int, value string);
+(101, '1001', 'val_101', 1),
+(null, '1002', 'val_102', 2),
+(103, null, 'val_103', 3),
+(104, '1004', null, 4),
+(105, '1005', 'val_105', 5),
+(101, '1001', 'val_101', 6),
+(null, '1002', 'val_102', 7),
+(103, null, 'val_103', 8),
+(104, '1004', null, 9),
+(105, '1005', 'val_105', 10),
+(101, '1001', 'val_101', 11),
+(null, '1002', 'val_102', 12),
+(103, null, 'val_103', 13),
+(104, '1004', null, 14),
+(105, '1005', 'val_105', 15),
+(101, '1001', 'val_101', 16),
+(null, '1002', 'val_102', 17),
+(103, null, 'val_103', 18),
+(104, '1004', null, 19),
+(105, '1005', 'val_105', 20),
+(null, null, null, 21);
+
+CREATE TABLE src_small(key1 int, key2 string, value string);
 INSERT INTO src_small VALUES
-(101, 'val_101'),
-(null, 'val_102'),
-(103, 'val_103'),
-(104, null),
-(105, 'val_105'),
-(null, null);
+(101, '1001', 'val_101'),
+(null, '1002', 'val_102'),
+(103, null, 'val_103'),
+(104, '1004', null),
+(105, '1005', 'val_105'),
+(null, null, null);
 
 SELECT * FROM srcbucket_big ORDER BY id;
 
--- Using the bucket column
+-- key1
 EXPLAIN
-SELECT *
+SELECT a.key1, a.id
 FROM srcbucket_big a
-JOIN src_small b ON a.key = b.key
+JOIN src_small b ON a.key1 = b.key1
 ORDER BY a.id;
 
-SELECT *
+SELECT a.key1, a.id
 FROM srcbucket_big a
-JOIN src_small b ON a.key = b.key
+JOIN src_small b ON a.key1 = b.key1
 ORDER BY a.id;
 
--- Using a non-bucket column
+-- key2

Review Comment:
   I think `key2` and `key2 & non-partition` parts could be removed, as their 
query plans are identical to the `key1` plans. But that's just a minor thought, 
and I don't have a strong opinion about it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] HIVE-28798: Bucket Map Join partially using partition transforms [hive]

Reply via email to