[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14496203#comment-14496203 ] Aihua Xu commented on HIVE-9580: Attached the new patch to fix testCliDriver_mapjoin_decimal unit test failure. The other failures seem to be unrelated. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1B_Name + values ('jklmnopqrs', 99);
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14496208#comment-14496208 ] Aihua Xu commented on HIVE-9580: [~szehon] Can you help review the code change? Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1B_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql);
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14496681#comment-14496681 ] Szehon Ho commented on HIVE-9580: - Hi Aihua, looks like this works, so you are making all the varchars (and even char) in the join comparison to be the maximum length to avoid this issue. But I'm not too familiar with this code, I think [~jdere] is the varchars expert, forwarding to him to take a look as well. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) {
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14496785#comment-14496785 ] Aihua Xu commented on HIVE-9580: That's right. For the key comparison, it will call UDF to do the key conversion if we are comparing different types, or I think we should pick the common type as the key type if the type conversion is not needed for the data types including char or varchar with different lengths. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) {
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14497008#comment-14497008 ] Jason Dere commented on HIVE-9580: -- I think this looks fine. I would just say to make sure there are tests to cover the types that would get affected by this change (char/varchar/decimal joins), which it looks like there already are. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); }
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14497023#comment-14497023 ] Szehon Ho commented on HIVE-9580: - Great thanks, +1 Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1B_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql);
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14487289#comment-14487289 ] Hive QA commented on HIVE-9580: --- {color:red}Overall{color}: -1 at least one tests failed Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12723937/HIVE-9580.patch {color:red}ERROR:{color} -1 due to 25 failed/errored test(s), 8666 tests executed *Failed tests:* {noformat} TestMinimrCliDriver-bucketmapjoin6.q-constprog_partitioner.q-infer_bucket_sort_dyn_part.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-external_table_with_space_in_location_path.q-infer_bucket_sort_merge.q-auto_sortmerge_join_16.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-groupby2.q-import_exported_table.q-bucketizedhiveinputformat.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-index_bitmap3.q-stats_counter_partitioned.q-temp_table_external.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-infer_bucket_sort_map_operators.q-join1.q-bucketmapjoin7.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-infer_bucket_sort_num_buckets.q-disable_merge_for_bucketing.q-uber_reduce.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-infer_bucket_sort_reducers_power_two.q-scriptfile1.q-scriptfile1_win.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-leftsemijoin_mr.q-load_hdfs_file_with_space_in_the_name.q-root_dir_external_table.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-list_bucket_dml_10.q-bucket_num_reducers.q-bucket6.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-load_fs2.q-file_with_header_footer.q-ql_rewrite_gbtoidx_cbo_1.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-parallel_orderby.q-reduce_deduplicate.q-ql_rewrite_gbtoidx_cbo_2.q-and-1-more - did not produce a TEST-*.xml file TestMinimrCliDriver-ql_rewrite_gbtoidx.q-smb_mapjoin_8.q - did not produce a TEST-*.xml file TestMinimrCliDriver-schemeAuthority2.q-bucket4.q-input16_cc.q-and-1-more - did not produce a TEST-*.xml file org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_auto_sortmerge_join_2 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_vector_char_mapjoin1 org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver_vector_varchar_mapjoin1 org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_mapjoin_decimal org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_char_mapjoin1 org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver_vector_varchar_mapjoin1 org.apache.hadoop.hive.cli.TestNegativeMinimrCliDriver.testNegativeCliDriver_minimr_broken_pipe org.apache.hadoop.hive.cli.TestSparkCliDriver.testCliDriver_mapjoin_decimal org.apache.hadoop.hive.thrift.TestHadoop20SAuthBridge.testMetastoreProxyUser org.apache.hadoop.hive.thrift.TestHadoop20SAuthBridge.testSaslWithHiveMetaStore org.apache.hive.jdbc.TestSSL.testSSLFetchHttp org.apache.hive.spark.client.TestSparkClient.testSyncRpc {noformat} Test results: http://ec2-174-129-184-35.compute-1.amazonaws.com/jenkins/job/PreCommit-HIVE-TRUNK-Build/3343/testReport Console output: http://ec2-174-129-184-35.compute-1.amazonaws.com/jenkins/job/PreCommit-HIVE-TRUNK-Build/3343/console Test logs: http://ec2-174-129-184-35.compute-1.amazonaws.com/logs/PreCommit-HIVE-TRUNK-Build-3343/ Messages: {noformat} Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 25 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12723937 - PreCommit-HIVE-TRUNK-Build Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu Attachments: HIVE-9580.patch The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2;
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14347507#comment-14347507 ] Aihua Xu commented on HIVE-9580: The issue is caused by: we always assume that the join keys have the same data type and save the one from the first table, then apply it when we serialize and deserialize the hashtables to/from files. It causes the issue when dealing with varchar datatypes. In table A, the column could be varchar(10) while in table B it could be varchar(20). Some times we write the incorrect data to the files if the varchar length of the first table is small. [~szehon] You have worked on join. I'm thinking that we should always choose the one with the largest length. When joining types like smallint and int, we should choose int. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) +
[jira] [Commented] (HIVE-9580) Server returns incorrect result from JOIN ON VARCHAR columns
[ https://issues.apache.org/jira/browse/HIVE-9580?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14347729#comment-14347729 ] Aihua Xu commented on HIVE-9580: Seems Hive can handle the join over any kind of data types. Server returns incorrect result from JOIN ON VARCHAR columns Key: HIVE-9580 URL: https://issues.apache.org/jira/browse/HIVE-9580 Project: Hive Issue Type: Bug Components: HiveServer2 Affects Versions: 0.12.0, 0.13.0, 0.14.0 Reporter: Mike Assignee: Aihua Xu The database erroneously returns rows when joining two tables which each contain a VARCHAR column and the join's ON condition uses the equality operator on the VARCHAR columns. **The following JDBC method exhibits the problem: static void joinIssue() throws SQLException { String sql; int rowsAffected; ResultSet rs; Statement stmt = con.createStatement(); String table1_Name = blahtab1; String table1A_Name = blahtab1A; String table1B_Name = blahtab1B; String table2_Name = blahtab2; try { sql = drop table + table1_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1_Name + ( + VCHARCOL VARCHAR(10) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1A_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1A_Name + ( + VCHARCOL VARCHAR(10) + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1A_Name + values ('jklmnopqrs'); System.out.println(\nsql= + sql); stmt.executeUpdate(sql); System.out.println(===); try { sql = drop table + table1B_Name; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(Drop table error: + se.getMessage()); } try { sql = CREATE TABLE + table1B_Name + ( + VCHARCOL VARCHAR(11) + ,INTEGERCOL INT + ) ; System.out.println(\nsql= + sql); rowsAffected = stmt.executeUpdate(sql); } catch (SQLException se) { println(create table error: + se.getMessage()); } sql = insert into + table1B_Name + values ('jklmnopqrs', 99); System.out.println(\nsql= + sql); stmt.executeUpdate(sql);