This is an automated email from the ASF dual-hosted git repository. tarmstrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3b763b5c3235ebdb445ad0a4ae1bf79385e8df02 Author: Andrew Sherman <asher...@cloudera.com> AuthorDate: Mon Jan 18 19:10:11 2021 -0800 IMPALA-10447: Add a newline when exporting shell output to a file. Impala shell outputs a batch of rows using OutputStream. Inside OutputStream, output to a file is handled slightly differently from output that is written to stdout. When writing to stdout we use print() (which appends a newline) while when writing to a file we use write() (which adds nothing). This difference was introduced in IMPALA-3343 so this bug may be a regression introduced then. To ensure that output is the same in either case we need to add a newline after writing each batch of rows to a file. TESTING: Added a new test for this case. Change-Id: I078a06c54e0834bc1f898626afbfff4ded579fa9 Reviewed-on: http://gerrit.cloudera.org:8080/16966 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- shell/shell_output.py | 1 + tests/shell/test_shell_commandline.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/shell/shell_output.py b/shell/shell_output.py index 31d91a0..bfb418c 100644 --- a/shell/shell_output.py +++ b/shell/shell_output.py @@ -117,6 +117,7 @@ class OutputStream(object): # Note that instances of this class do not persist, so it's fine to # close the we close the file handle after each write. out_file.write(formatted_data.encode('utf-8')) # file opened in binary mode + out_file.write(b'\n') except IOError as err: file_err_msg = "Error opening file %s: %s" % (self.filename, str(err)) print('{0} (falling back to stderr)'.format(file_err_msg), file=sys.stderr) diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index 666162f..30cd85d 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -105,6 +105,16 @@ def populated_table(empty_table, request): return fq_table_name +@pytest.yield_fixture +def tmp_file(): + """ + Test fixture which manages a temporary file + """ + _, tmp_file = tempfile.mkstemp() + yield tmp_file + os.remove(tmp_file) + + class TestImpalaShell(ImpalaTestSuite): """A set of sanity tests for the Impala shell commandline parameters. @@ -1071,3 +1081,21 @@ class TestImpalaShell(ImpalaTestSuite): expected_result = """anonymous\tanonymous\n""" assert result.stdout == expected_result assert result.stderr == "" + + def test_output_file(self, vector, tmp_file): + """Test that writing output to a file using '--output_file' produces the same output + as is written to stdout.""" + row_count = 6000 # Should be > 2048 to tickle IMPALA-10447. + query = "select * from tpcds.item order by i_item_sk limit %d" % row_count + # Run the query normally and keep the stdout. + output = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;']) + assert "Fetched %d row(s)" % row_count in output.stderr + rows_from_stdout = output.stdout.strip().split('\n') + # Run the query with output sent to a file using '--output_file'. + result = run_impala_shell_cmd(vector, ['-q', query, '-B', '--output_delimiter=;', + '--output_file=%s' % tmp_file]) + assert "Fetched %d row(s)" % row_count in result.stderr + # Check that the output from the file is the same as that written to stdout. + with open(tmp_file, "r") as f: + rows_from_file = [line.rstrip() for line in f] + assert rows_from_stdout == rows_from_file