This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3b763b5c3235ebdb445ad0a4ae1bf79385e8df02
Author: Andrew Sherman <asher...@cloudera.com>
AuthorDate: Mon Jan 18 19:10:11 2021 -0800

    IMPALA-10447: Add a newline when exporting shell output to a file.
    
    Impala shell outputs a batch of rows using OutputStream. Inside
    OutputStream, output to a file is handled slightly differently from
    output that is written to stdout. When writing to stdout we use print()
    (which appends a newline) while when writing to a file we use write()
    (which adds nothing). This difference was introduced in IMPALA-3343 so
    this bug may be a regression introduced then. To ensure that output is
    the same in either case we need to add a newline after writing each
    batch of rows to a file.
    
    TESTING:
        Added a new test for this case.
    
    Change-Id: I078a06c54e0834bc1f898626afbfff4ded579fa9
    Reviewed-on: http://gerrit.cloudera.org:8080/16966
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 shell/shell_output.py                 |  1 +
 tests/shell/test_shell_commandline.py | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/shell/shell_output.py b/shell/shell_output.py
index 31d91a0..bfb418c 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -117,6 +117,7 @@ class OutputStream(object):
           # Note that instances of this class do not persist, so it's fine to
           # close the we close the file handle after each write.
           out_file.write(formatted_data.encode('utf-8'))  # file opened in 
binary mode
+          out_file.write(b'\n')
       except IOError as err:
         file_err_msg = "Error opening file %s: %s" % (self.filename, str(err))
         print('{0} (falling back to stderr)'.format(file_err_msg), 
file=sys.stderr)
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 666162f..30cd85d 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -105,6 +105,16 @@ def populated_table(empty_table, request):
   return fq_table_name
 
 
+@pytest.yield_fixture
+def tmp_file():
+  """
+  Test fixture which manages a temporary file
+  """
+  _, tmp_file = tempfile.mkstemp()
+  yield tmp_file
+  os.remove(tmp_file)
+
+
 class TestImpalaShell(ImpalaTestSuite):
   """A set of sanity tests for the Impala shell commandline parameters.
 
@@ -1071,3 +1081,21 @@ class TestImpalaShell(ImpalaTestSuite):
     expected_result = """anonymous\tanonymous\n"""
     assert result.stdout == expected_result
     assert result.stderr == ""
+
+  def test_output_file(self, vector, tmp_file):
+    """Test that writing output to a file using '--output_file' produces the 
same output
+    as is written to stdout."""
+    row_count = 6000  # Should be > 2048 to tickle IMPALA-10447.
+    query = "select * from tpcds.item order by i_item_sk limit %d" % row_count
+    # Run the query normally and keep the stdout.
+    output = run_impala_shell_cmd(vector, ['-q', query, '-B', 
'--output_delimiter=;'])
+    assert "Fetched %d row(s)" % row_count in output.stderr
+    rows_from_stdout = output.stdout.strip().split('\n')
+    # Run the query with output sent to a file using '--output_file'.
+    result = run_impala_shell_cmd(vector, ['-q', query, '-B', 
'--output_delimiter=;',
+                                           '--output_file=%s' % tmp_file])
+    assert "Fetched %d row(s)" % row_count in result.stderr
+    # Check that the output from the file is the same as that written to 
stdout.
+    with open(tmp_file, "r") as f:
+      rows_from_file = [line.rstrip() for line in f]
+      assert rows_from_stdout == rows_from_file

Reply via email to