This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new ff6717b413 GH-36319: [Go][Parquet] Improved row group writer error 
messages (#36320)
ff6717b413 is described below

commit ff6717b413858730f282d57d9f22d08381ce86f0
Author: Mark Wolfe <[email protected]>
AuthorDate: Thu Jul 6 00:48:40 2023 +1000

    GH-36319: [Go][Parquet] Improved row group writer error messages (#36320)
    
    
    
    ### Rationale for this change
    
    ### What changes are included in this PR?
    
    Updated error messages for mismatched column row counts to help identify 
the column, or rowgroup index which failed the check, and how many rows were 
expected vs found.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * Closes: #36319
    
    Authored-by: Mark Wolfe <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/parquet/file/file_writer_test.go | 8 ++++++--
 go/parquet/file/row_group_writer.go | 8 ++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/go/parquet/file/file_writer_test.go 
b/go/parquet/file/file_writer_test.go
index 8ff202b947..bba0d2be28 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -139,7 +139,9 @@ func (t *SerializeTestSuite) unequalNumRows(maxRows int64, 
rowsPerCol []int64) {
                t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, 
t.DefLevels[:rowsPerCol[col]], nil)
                cw.Close()
        }
-       t.Error(rgw.Close())
+       err := rgw.Close()
+       t.Error(err)
+       t.ErrorContains(err, "row mismatch for unbuffered row group")
 }
 
 func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows int64, rowsPerCol 
[]int64) {
@@ -154,7 +156,9 @@ func (t *SerializeTestSuite) unequalNumRowsBuffered(maxRows 
int64, rowsPerCol []
                t.WriteBatchSubset(int(rowsPerCol[col]), 0, cw, 
t.DefLevels[:rowsPerCol[col]], nil)
                cw.Close()
        }
-       t.Error(rgw.Close())
+       err := rgw.Close()
+       t.Error(err)
+       t.ErrorContains(err, "row mismatch for buffered row group")
 }
 
 func (t *SerializeTestSuite) TestZeroRows() {
diff --git a/go/parquet/file/row_group_writer.go 
b/go/parquet/file/row_group_writer.go
index 8fbb04ad7a..410f48b477 100644
--- a/go/parquet/file/row_group_writer.go
+++ b/go/parquet/file/row_group_writer.go
@@ -110,13 +110,13 @@ func (rg *rowGroupWriter) checkRowsWritten() error {
                if rg.nrows == 0 {
                        rg.nrows = current
                } else if rg.nrows != current {
-                       return xerrors.New("row mismatch")
+                       return xerrors.Errorf("row mismatch for unbuffered row 
group: %d, count expected: %d, actual: %d", rg.ordinal, current, rg.nrows)
                }
        } else if rg.buffered {
                current := rg.columnWriters[0].RowsWritten()
-               for _, wr := range rg.columnWriters[1:] {
+               for i, wr := range rg.columnWriters[1:] {
                        if current != wr.RowsWritten() {
-                               return xerrors.New("row mismatch error")
+                               return xerrors.Errorf("row mismatch for 
buffered row group: %d, column: %d, count expected: %d, actual: %d", 
rg.ordinal, i+1, current, wr.RowsWritten())
                        }
                }
                rg.nrows = current
@@ -182,7 +182,7 @@ func (rg *rowGroupWriter) Column(i int) (ColumnChunkWriter, 
error) {
        if i >= 0 && i < len(rg.columnWriters) {
                return rg.columnWriters[i], nil
        }
-       return nil, xerrors.New("invalid column number requested")
+       return nil, xerrors.Errorf("invalid column number requested: %d", i)
 }
 
 func (rg *rowGroupWriter) CurrentColumn() int { return 
rg.metadata.CurrentColumn() }

Reply via email to