[ 
https://issues.apache.org/jira/browse/ARROW-18309?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17634106#comment-17634106
 ] 

jun wang commented on ARROW-18309:
----------------------------------

I tested Arrow V10,  it would not panic now, but the ValuesLeft() is broken. 
Here is the code I used
{code:java}
func TestDeltaBitPacking(t *testing.T) {
   f, err := os.Open("timestamp.data")
   if err != nil {
      t.Fatal(err)
   }
   defer f.Close()

   values := make([]int64, 0)

   scanner := bufio.NewScanner(f)
   for scanner.Scan() {
      v, err := strconv.ParseInt(scanner.Text(), 10, 64)
      if err != nil {
         t.Fatal(err)
      }
      values = append(values, v)
   }

   if err := scanner.Err(); err != nil {
      t.Fatal(err)
   }

   col := schema.NewColumn(schema.MustPrimitive(schema.NewPrimitiveNode("foo", 
parquet.Repetitions.Required,
      parquet.Types.Int64, -1, -1)), 0, 0)
   enc := encoding.NewEncoder(parquet.Types.Int64, 
parquet.Encodings.DeltaBinaryPacked, false, col, 
memory.DefaultAllocator).(encoding.Int64Encoder)

   enc.Put(values)
   buf, err := enc.FlushValues()
   if err != nil {
      t.Fatal(err)
   }
   defer buf.Release()

   dec := encoding.NewDecoder(parquet.Types.Int64, 
parquet.Encodings.DeltaBinaryPacked, col, 
memory.DefaultAllocator).(encoding.Int64Decoder)
   dec.SetData(len(values), buf.Bytes())

   for i := 0; i < len(values); i += 1024 {
      out := make([]int64, 1024)
      n, err := dec.Decode(out)
      if err != nil {
         t.Fatal(err)
      }
      assert.Equal(t, values[:n], out)
      values = values[n:]
      assert.Equal(t, dec.ValuesLeft(), len(values))
   }
}
 {code}
 

> [Go] delta_bit_packing Decode may panic
> ---------------------------------------
>
>                 Key: ARROW-18309
>                 URL: https://issues.apache.org/jira/browse/ARROW-18309
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Go
>    Affects Versions: 9.0.0
>         Environment: all release version
>            Reporter: jun wang
>            Assignee: Matthew Topol
>            Priority: Major
>             Fix For: 9.0.1
>
>         Attachments: @timestamp.data
>
>
> [https://github.com/apache/arrow/blob/master/go/parquet/internal/encoding/delta_bit_packing.go]
> The  DeltaBitPackInt32 and DeltaBitPackInt64 Decode method did not use 
> d.nvals subtract decoded number at end, which lead streaming decode panic. 
> Also, when copy the decoded value to out, the end value should be 
> shared_utils.MinInt(int(d.valsPerMini), start + len(out))
> When encode 68610 timestamp data, and decode 1024 value a batch, we encounter 
> the panic



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to