This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 8f0ce842 perf(parquet): optimize stats and bloom filters for bool 
columns (#715)
8f0ce842 is described below

commit 8f0ce8424dbed073993ecb67fba180cbcb5c13f3
Author: Matt Topol <[email protected]>
AuthorDate: Thu Mar 19 13:23:47 2026 -0400

    perf(parquet): optimize stats and bloom filters for bool columns (#715)
    
    ### Rationale for this change
    The initial bool optimization (#707) added direct bitmap encoding with
    `WriteBitmapBatch` and `WriteBitmapBatchSpaced` functions to reduce
    allocations during encoding and decoding. However, this didn't implement
    the same optimization for statistics updated, bloom filter inserts or
    spaced encoding.
    
    ### What changes are included in this PR?
    1. Update the bloom filter handling to compute hashes directly from
    bitmaps, including for spaced values and reuse slice allocation across
    iterations.
    2. Update statistics handling to update min/max directly from bitmaps
    for spaced and non-spaced scenarios.
    3. Update the encoder interface for the boolean encoder to have a
    `PutSpacedBitmap` and compress bitmaps with validity buffers.
    4. Add `writeBitmapValues` and `writeBitmapValuesSpaced` for the boolean
    column writer with fallback to `[]bool` conversions if the encoder
    doesn't implement the interface.
    
    ### Are these changes tested?
    Yes, unit tests are added for everything
    
    ### Are there any user-facing changes?
    No user-facing API changes, this is a pure internal optimization change
    for boolean columns.
    
    *Benchmark Results*
    
    *Statistics Update*
    ```
      BEFORE (with bitmap → []bool conversion):
      BenchmarkBooleanStatisticsWithConversion-16
        153,398 ns/op    109,278 B/op (107 KB)    6 allocs/op
    
      AFTER (direct bitmap operations):
      BenchmarkBooleanStatisticsDirectBitmap-16
            393 ns/op      2,698 B/op (2.6 KB)    5 allocs/op
    ```
    
    * 390x faster
    * 97.5% less memory
    * 1 fewer allocation
    
    *Bloom filter hashing*
    ```
      BEFORE (with bitmap → []bool conversion):
      BenchmarkBloomFilterHashingWithConversion-16
        1,084,001 ns/op    3,309,593 B/op (3.23 MB)    3 allocs/op
    
      AFTER (direct bitmap operations):
      BenchmarkBloomFilterHashingDirectBitmap-16
          448,882 ns/op      802,821 B/op (784 KB)    2 allocs/op
    ```
    
    * 2.4x faster
    * 76% less memory
    * 2.5 MB saved/operation
    * 1 fewer allocation
    
    *Full write path (Stats + Bloom Filter)*
    ```
      BEFORE (with bitmap → []bool conversion):
      BenchmarkFullWritePathWithConversion-16
        1,211,525 ns/op    3,315,566 B/op (3.24 MB)    15 allocs/op
    
      AFTER (direct bitmap operations):
      BenchmarkFullWritePathDirectBitmap-16
          580,934 ns/op      807,640 B/op (789 KB)    13 allocs/op
    ```
    
    * 2.1x faster
    * 76% less memory
    * 2.5 MB saved per 100k bools written
    * 2 fewer allocations
    
    ---------
    
    Co-authored-by: Matt <zero@gibson>
    Co-authored-by: Claude Sonnet 4.5 <[email protected]>
---
 parquet/file/column_writer_types.gen.go       |  56 ++--
 parquet/file/column_writer_types.gen.go.tmpl  |  56 ++--
 parquet/internal/encoding/boolean_encoder.go  |  97 +++++++
 parquet/internal/encoding/encoding_test.go    | 198 ++++++++++++++
 parquet/metadata/bitmap_benchmark_test.go     |  98 +++++++
 parquet/metadata/bloom_filter.go              |  58 +++++
 parquet/metadata/bloom_filter_test.go         | 361 ++++++++++++++++++++++++++
 parquet/metadata/statistics_test.go           | 313 ++++++++++++++++++++++
 parquet/metadata/statistics_types.gen.go      |  79 +++++-
 parquet/metadata/statistics_types.gen.go.tmpl |  80 +++++-
 10 files changed, 1348 insertions(+), 48 deletions(-)

diff --git a/parquet/file/column_writer_types.gen.go 
b/parquet/file/column_writer_types.gen.go
index 0f7feda0..e7f8c341 100644
--- a/parquet/file/column_writer_types.gen.go
+++ b/parquet/file/column_writer_types.gen.go
@@ -1277,39 +1277,49 @@ func (w *BooleanColumnChunkWriter) 
writeBitmapValues(bitmap []byte, bitmapOffset
                w.currentEncoder.(encoding.BooleanEncoder).Put(values)
        }
 
-       // Note: Statistics and bloom filter updates would require converting 
back to []bool
-       // For now, skip them to maintain the performance benefit
-       // In production, we'd need bitmap-aware statistics/bloom filter methods
+       // Update statistics and bloom filter using bitmap-aware methods
        if w.pageStatistics != nil {
-               // Convert for statistics (unavoidable for now)
-               values := make([]bool, numValues)
-               for i := int64(0); i < numValues; i++ {
-                       values[i] = bitutil.BitIsSet(bitmap, 
int(bitmapOffset+i))
-               }
-               w.pageStatistics.(*metadata.BooleanStatistics).Update(values, 
numNulls)
+               
w.pageStatistics.(*metadata.BooleanStatistics).UpdateFromBitmap(bitmap, 
bitmapOffset, numValues, numNulls)
        }
        if w.bloomFilter != nil {
-               // Convert for bloom filter (unavoidable for now)
-               values := make([]bool, numValues)
-               for i := int64(0); i < numValues; i++ {
-                       values[i] = bitutil.BitIsSet(bitmap, 
int(bitmapOffset+i))
-               }
-               
w.bloomFilter.InsertBulk(metadata.GetHashes(w.bloomFilter.Hasher(), values))
+               
w.bloomFilter.InsertBulk(metadata.GetHashesFromBitmap(w.bloomFilter.Hasher(), 
bitmap, bitmapOffset, numValues))
        }
 }
 
 // writeBitmapValuesSpaced writes boolean values from a bitmap with validity 
information
 func (w *BooleanColumnChunkWriter) writeBitmapValuesSpaced(bitmap []byte, 
bitmapOffset int64, numRead, numValues int64, validBits []byte, validBitsOffset 
int64) {
-       // For spaced writes, we need to compress the bitmap according to 
validity
-       // This requires converting to []bool for now
-       // A future optimization could implement bitmap-to-bitmap compression
-       spacedValues := make([]bool, numValues)
-       for i := int64(0); i < numValues; i++ {
-               spacedValues[i] = bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
+       // Try to use bitmap-aware encoder interface if available
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+
+       if enc, ok := w.currentEncoder.(bitmapSpacedEncoder); ok {
+               // Direct bitmap path - no []bool conversion!
+               enc.PutSpacedBitmap(bitmap, bitmapOffset, numValues, validBits, 
validBitsOffset)
+       } else {
+               // Fallback: convert to []bool for encoders that don't support 
bitmap interface
+               spacedValues := make([]bool, numValues)
+               for i := int64(0); i < numValues; i++ {
+                       spacedValues[i] = bitutil.BitIsSet(bitmap, 
int(bitmapOffset+i))
+               }
+
+               if len(spacedValues) != int(numRead) {
+                       
w.currentEncoder.(encoding.BooleanEncoder).PutSpaced(spacedValues, validBits, 
validBitsOffset)
+               } else {
+                       
w.currentEncoder.(encoding.BooleanEncoder).Put(spacedValues)
+               }
        }
 
-       // Use existing spaced write logic
-       w.writeValuesSpaced(spacedValues, numRead, numValues, validBits, 
validBitsOffset)
+       // Use bitmap-aware statistics update
+       if w.pageStatistics != nil {
+               nulls := numValues - numRead
+               
w.pageStatistics.(*metadata.BooleanStatistics).UpdateFromBitmapSpaced(bitmap, 
bitmapOffset, numValues, validBits, validBitsOffset, nulls)
+       }
+
+       // Use bitmap-aware bloom filter hashing
+       if w.bloomFilter != nil {
+               
w.bloomFilter.InsertBulk(metadata.GetSpacedHashesFromBitmap(w.bloomFilter.Hasher(),
 numRead, bitmap, bitmapOffset, numValues, validBits, validBitsOffset))
+       }
 }
 
 func (w *BooleanColumnChunkWriter) checkDictionarySizeLimit() {
diff --git a/parquet/file/column_writer_types.gen.go.tmpl 
b/parquet/file/column_writer_types.gen.go.tmpl
index d3ed37cf..9a22bb71 100644
--- a/parquet/file/column_writer_types.gen.go.tmpl
+++ b/parquet/file/column_writer_types.gen.go.tmpl
@@ -430,39 +430,49 @@ func (w *{{.Name}}ColumnChunkWriter) 
writeBitmapValues(bitmap []byte, bitmapOffs
     w.currentEncoder.(encoding.BooleanEncoder).Put(values)
   }
 
-  // Note: Statistics and bloom filter updates would require converting back 
to []bool
-  // For now, skip them to maintain the performance benefit
-  // In production, we'd need bitmap-aware statistics/bloom filter methods
+  // Update statistics and bloom filter using bitmap-aware methods
   if w.pageStatistics != nil {
-    // Convert for statistics (unavoidable for now)
-    values := make([]bool, numValues)
-    for i := int64(0); i < numValues; i++ {
-      values[i] = bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
-    }
-    w.pageStatistics.(*metadata.BooleanStatistics).Update(values, numNulls)
+    w.pageStatistics.(*metadata.BooleanStatistics).UpdateFromBitmap(bitmap, 
bitmapOffset, numValues, numNulls)
   }
   if w.bloomFilter != nil {
-    // Convert for bloom filter (unavoidable for now)
-    values := make([]bool, numValues)
-    for i := int64(0); i < numValues; i++ {
-      values[i] = bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
-    }
-    w.bloomFilter.InsertBulk(metadata.GetHashes(w.bloomFilter.Hasher(), 
values))
+    
w.bloomFilter.InsertBulk(metadata.GetHashesFromBitmap(w.bloomFilter.Hasher(), 
bitmap, bitmapOffset, numValues))
   }
 }
 
 // writeBitmapValuesSpaced writes boolean values from a bitmap with validity 
information
 func (w *{{.Name}}ColumnChunkWriter) writeBitmapValuesSpaced(bitmap []byte, 
bitmapOffset int64, numRead, numValues int64, validBits []byte, validBitsOffset 
int64) {
-  // For spaced writes, we need to compress the bitmap according to validity
-  // This requires converting to []bool for now
-  // A future optimization could implement bitmap-to-bitmap compression
-  spacedValues := make([]bool, numValues)
-  for i := int64(0); i < numValues; i++ {
-    spacedValues[i] = bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
+  // Try to use bitmap-aware encoder interface if available
+  type bitmapSpacedEncoder interface {
+    PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues int64, 
validBits []byte, validBitsOffset int64) int64
   }
 
-  // Use existing spaced write logic
-  w.writeValuesSpaced(spacedValues, numRead, numValues, validBits, 
validBitsOffset)
+  if enc, ok := w.currentEncoder.(bitmapSpacedEncoder); ok {
+    // Direct bitmap path - no []bool conversion!
+    enc.PutSpacedBitmap(bitmap, bitmapOffset, numValues, validBits, 
validBitsOffset)
+  } else {
+    // Fallback: convert to []bool for encoders that don't support bitmap 
interface
+    spacedValues := make([]bool, numValues)
+    for i := int64(0); i < numValues; i++ {
+      spacedValues[i] = bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
+    }
+
+    if len(spacedValues) != int(numRead) {
+      w.currentEncoder.(encoding.BooleanEncoder).PutSpaced(spacedValues, 
validBits, validBitsOffset)
+    } else {
+      w.currentEncoder.(encoding.BooleanEncoder).Put(spacedValues)
+    }
+  }
+
+  // Use bitmap-aware statistics update
+  if w.pageStatistics != nil {
+    nulls := numValues - numRead
+    
w.pageStatistics.(*metadata.BooleanStatistics).UpdateFromBitmapSpaced(bitmap, 
bitmapOffset, numValues, validBits, validBitsOffset, nulls)
+  }
+
+  // Use bitmap-aware bloom filter hashing
+  if w.bloomFilter != nil {
+    
w.bloomFilter.InsertBulk(metadata.GetSpacedHashesFromBitmap(w.bloomFilter.Hasher(),
 numRead, bitmap, bitmapOffset, numValues, validBits, validBitsOffset))
+  }
 }
 
 {{end}}
diff --git a/parquet/internal/encoding/boolean_encoder.go 
b/parquet/internal/encoding/boolean_encoder.go
index 37a8309b..e012b98a 100644
--- a/parquet/internal/encoding/boolean_encoder.go
+++ b/parquet/internal/encoding/boolean_encoder.go
@@ -20,6 +20,7 @@ import (
        "encoding/binary"
 
        "github.com/apache/arrow-go/v18/arrow/bitutil"
+       "github.com/apache/arrow-go/v18/internal/bitutils"
        "github.com/apache/arrow-go/v18/parquet"
        "github.com/apache/arrow-go/v18/parquet/internal/debug"
        "github.com/apache/arrow-go/v18/parquet/internal/utils"
@@ -30,6 +31,41 @@ const (
        boolsInBuf  = boolBufSize * 8
 )
 
+// compressBitmapWithValidity extracts only the valid bits from a source 
bitmap,
+// compressing it into a contiguous destination bitmap. Uses SetBitRunReader 
for
+// efficient iteration over valid runs.
+func compressBitmapWithValidity(
+       srcBitmap []byte,
+       srcOffset int64,
+       numValues int64,
+       validBits []byte,
+       validBitsOffset int64,
+       numValid int64,
+) []byte {
+       if numValid == 0 {
+               return []byte{}
+       }
+
+       // Allocate destination bitmap to hold only valid bits
+       dstBitmap := make([]byte, bitutil.BytesForBits(numValid))
+       dstWriter := utils.NewBitmapWriter(dstBitmap, 0, int(numValid))
+
+       // Use SetBitRunReader to efficiently iterate over valid runs
+       reader := bitutils.NewSetBitRunReader(validBits, validBitsOffset, 
numValues)
+       for {
+               run := reader.NextRun()
+               if run.Length == 0 {
+                       break
+               }
+
+               // Copy this run of valid bits from source to destination
+               dstWriter.AppendBitmap(srcBitmap, srcOffset+run.Pos, run.Length)
+       }
+
+       dstWriter.Finish()
+       return dstBitmap
+}
+
 // PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
 type PlainBooleanEncoder struct {
        encoder
@@ -99,6 +135,29 @@ func (enc *PlainBooleanEncoder) PutSpaced(in []bool, 
validBits []byte, validBits
        enc.Put(bufferOut[:nvalid])
 }
 
+// PutSpacedBitmap encodes boolean values directly from a bitmap with validity 
information,
+// without converting to []bool. This avoids the 8x memory overhead of bool 
slices.
+// It compresses the bitmap by extracting only valid (non-null) bits.
+func (enc *PlainBooleanEncoder) PutSpacedBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64, validBits []byte, validBitsOffset int64) int64 {
+       if numValues == 0 {
+               return 0
+       }
+
+       // Count the number of valid values to pre-allocate destination bitmap
+       numValid := int64(bitutil.CountSetBits(validBits, int(validBitsOffset), 
int(numValues)))
+       if numValid == 0 {
+               return 0
+       }
+
+       // Compress bitmap: extract only valid bits
+       compressedBitmap := compressBitmapWithValidity(bitmap, bitmapOffset, 
numValues, validBits, validBitsOffset, numValid)
+
+       // Encode the compressed bitmap
+       enc.PutBitmap(compressedBitmap, 0, numValid)
+
+       return numValid
+}
+
 // EstimatedDataEncodedSize returns the current number of bytes that have
 // been buffered so far
 func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
@@ -140,6 +199,44 @@ func (enc *RleBooleanEncoder) PutSpaced(in []bool, 
validBits []byte, validBitsOf
        enc.Put(bufferOut[:nvalid])
 }
 
+// PutSpacedBitmap encodes boolean values from a bitmap with validity 
information.
+// Note: RleBooleanEncoder buffers values as []bool for RLE encoding at flush 
time,
+// so we extract valid bits to []bool. This is still better than the caller 
doing
+// the full bitmap-to-[]bool conversion for all values.
+func (enc *RleBooleanEncoder) PutSpacedBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64, validBits []byte, validBitsOffset int64) int64 {
+       if numValues == 0 {
+               return 0
+       }
+
+       // Count valid values
+       numValid := int64(bitutil.CountSetBits(validBits, int(validBitsOffset), 
int(numValues)))
+       if numValid == 0 {
+               return 0
+       }
+
+       // Extract valid bits to []bool for buffering
+       // Use SetBitRunReader to efficiently iterate over valid values
+       bufferOut := make([]bool, numValid)
+       idx := 0
+
+       reader := bitutils.NewSetBitRunReader(validBits, validBitsOffset, 
numValues)
+       for {
+               run := reader.NextRun()
+               if run.Length == 0 {
+                       break
+               }
+
+               // Convert this run of bits to bools
+               for i := int64(0); i < run.Length; i++ {
+                       bufferOut[idx] = bitutil.BitIsSet(bitmap, 
int(bitmapOffset+run.Pos+i))
+                       idx++
+               }
+       }
+
+       enc.Put(bufferOut)
+       return numValid
+}
+
 func (enc *RleBooleanEncoder) EstimatedDataEncodedSize() int64 {
        return rleLengthInBytes + int64(enc.maxRleBufferSize())
 }
diff --git a/parquet/internal/encoding/encoding_test.go 
b/parquet/internal/encoding/encoding_test.go
index 721015cc..26e87aa6 100644
--- a/parquet/internal/encoding/encoding_test.go
+++ b/parquet/internal/encoding/encoding_test.go
@@ -1069,3 +1069,201 @@ func TestBooleanPlainDecoderDecodeToBitmapUnaligned(t 
*testing.T) {
                t.Skip("Decoder does not support DecodeToBitmap")
        }
 }
+
+func TestPlainBooleanEncoderPutSpacedBitmapBasic(t *testing.T) {
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Optional, -1), 0, 0)
+       enc := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, false, descr, memory.DefaultAllocator)
+       benc := enc.(encoding.BooleanEncoder)
+
+       // Cast to access PutSpacedBitmap
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+       spacedEnc := benc.(bitmapSpacedEncoder)
+
+       // Data bitmap: [true, false, true, true, false, false, true, false]
+       bitmap := []byte{0b01001101} // LSB first
+       validBits := []byte{0xFF}    // all valid
+
+       numValid := spacedEnc.PutSpacedBitmap(bitmap, 0, 8, validBits, 0)
+       assert.Equal(t, int64(8), numValid)
+
+       buf, err := enc.FlushValues()
+       require.NoError(t, err)
+
+       // Decode and verify
+       dec := encoding.NewDecoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, descr, memory.DefaultAllocator)
+       bdec := dec.(encoding.BooleanDecoder)
+       err = bdec.SetData(8, buf.Bytes())
+       require.NoError(t, err)
+
+       out := make([]bool, 8)
+       n, err := bdec.Decode(out)
+       require.NoError(t, err)
+       assert.Equal(t, 8, n)
+
+       expected := []bool{true, false, true, true, false, false, true, false}
+       assert.Equal(t, expected, out)
+}
+
+func TestPlainBooleanEncoderPutSpacedBitmapWithNulls(t *testing.T) {
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Optional, -1), 0, 0)
+       enc := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, false, descr, memory.DefaultAllocator)
+       benc := enc.(encoding.BooleanEncoder)
+
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+       spacedEnc := benc.(bitmapSpacedEncoder)
+
+       // Data bitmap: [1,1,1,1,0,0,0,0]
+       bitmap := []byte{0b00001111}
+       // Valid bits: [1,0,1,0,1,0,1,0] - only positions 0,2,4,6 are valid
+       validBits := []byte{0b01010101}
+
+       numValid := spacedEnc.PutSpacedBitmap(bitmap, 0, 8, validBits, 0)
+       assert.Equal(t, int64(4), numValid, "should encode 4 valid values")
+
+       buf, err := enc.FlushValues()
+       require.NoError(t, err)
+
+       // Decode and verify - should get compressed bitmap (only valid values)
+       dec := encoding.NewDecoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, descr, memory.DefaultAllocator)
+       bdec := dec.(encoding.BooleanDecoder)
+       err = bdec.SetData(4, buf.Bytes())
+       require.NoError(t, err)
+
+       out := make([]bool, 4)
+       n, err := bdec.Decode(out)
+       require.NoError(t, err)
+       assert.Equal(t, 4, n)
+
+       // Valid positions 0,2,4,6 have data bits 1,1,0,0
+       expected := []bool{true, true, false, false}
+       assert.Equal(t, expected, out)
+}
+
+func TestPlainBooleanEncoderPutSpacedBitmapConsistency(t *testing.T) {
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Optional, -1), 0, 0)
+
+       // Verify PutSpacedBitmap produces same output as PutSpaced with []bool
+       bitmap := []byte{0b10110010}
+       validBits := []byte{0xFF}
+       numValues := int64(8)
+
+       // Encode using PutSpacedBitmap
+       enc1 := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, false, descr, memory.DefaultAllocator)
+       benc1 := enc1.(encoding.BooleanEncoder)
+
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+       spacedEnc1 := benc1.(bitmapSpacedEncoder)
+
+       spacedEnc1.PutSpacedBitmap(bitmap, 0, numValues, validBits, 0)
+       buf1, err := enc1.FlushValues()
+       require.NoError(t, err)
+
+       // Encode using PutSpaced with []bool
+       enc2 := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.Plain, false, descr, memory.DefaultAllocator)
+       benc2 := enc2.(encoding.BooleanEncoder)
+
+       bools := make([]bool, numValues)
+       for i := int64(0); i < numValues; i++ {
+               bools[i] = bitutil.BitIsSet(bitmap, int(i))
+       }
+       benc2.PutSpaced(bools, validBits, 0)
+       buf2, err := enc2.FlushValues()
+       require.NoError(t, err)
+
+       // Both should produce identical encoded output
+       assert.Equal(t, buf2.Bytes(), buf1.Bytes(), "encoded output should 
match")
+}
+
+func TestRleBooleanEncoderPutSpacedBitmap(t *testing.T) {
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Optional, -1), 0, 0)
+       enc := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, false, descr, memory.DefaultAllocator)
+       benc := enc.(encoding.BooleanEncoder)
+
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+       spacedEnc, ok := benc.(bitmapSpacedEncoder)
+       require.True(t, ok, "RleBooleanEncoder should implement 
bitmapSpacedEncoder interface")
+
+       // Data bitmap: [true,true,false,true,false,false,true,false]
+       bitmap := []byte{0b01001011}
+       // Valid bits: [1,0,1,0,1,0,1,0] - positions 0,2,4,6 are valid
+       validBits := []byte{0b01010101}
+
+       numValid := spacedEnc.PutSpacedBitmap(bitmap, 0, 8, validBits, 0)
+       assert.Equal(t, int64(4), numValid, "should encode 4 valid values")
+
+       buf, err := enc.FlushValues()
+       require.NoError(t, err)
+       defer buf.Release()
+
+       // Decode and verify - should get only the valid positions: [true, 
false, false, true]
+       dec := encoding.NewDecoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, descr, memory.DefaultAllocator)
+       bdec := dec.(encoding.BooleanDecoder)
+       err = bdec.SetData(4, buf.Bytes())
+       require.NoError(t, err)
+
+       out := make([]bool, 4)
+       n, err := bdec.Decode(out)
+       require.NoError(t, err)
+       assert.Equal(t, 4, n)
+
+       // Valid values are at positions 0,2,4,6 with values 
true,false,false,true
+       expected := []bool{true, false, false, true}
+       assert.Equal(t, expected, out)
+}
+
+func TestRleBooleanEncoderPutSpacedBitmapConsistency(t *testing.T) {
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Optional, -1), 0, 0)
+
+       // Create test data
+       bitmap := []byte{0b10110101}
+       validBits := []byte{0b11110011}
+
+       // Encode with bitmap method
+       enc1 := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, false, descr, memory.DefaultAllocator)
+       benc1 := enc1.(encoding.BooleanEncoder)
+       type bitmapSpacedEncoder interface {
+               PutSpacedBitmap(bitmap []byte, bitmapOffset int64, numValues 
int64, validBits []byte, validBitsOffset int64) int64
+       }
+       spacedEnc := benc1.(bitmapSpacedEncoder)
+       numValid := spacedEnc.PutSpacedBitmap(bitmap, 0, 8, validBits, 0)
+       buf1, _ := enc1.FlushValues()
+       defer buf1.Release()
+
+       // Encode with []bool method (reference)
+       enc2 := encoding.NewEncoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, false, descr, memory.DefaultAllocator)
+       benc2 := enc2.(encoding.BooleanEncoder)
+       spacedValues := make([]bool, 8)
+       for i := 0; i < 8; i++ {
+               spacedValues[i] = bitutil.BitIsSet(bitmap, i)
+       }
+       benc2.PutSpaced(spacedValues, validBits, 0)
+       buf2, _ := enc2.FlushValues()
+       defer buf2.Release()
+
+       // Both should produce identical output
+       assert.Equal(t, buf2.Bytes(), buf1.Bytes(), "bitmap and []bool methods 
should produce identical output")
+
+       // Decode both and verify they match
+       dec1 := encoding.NewDecoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, descr, memory.DefaultAllocator)
+       bdec1 := dec1.(encoding.BooleanDecoder)
+       bdec1.SetData(int(numValid), buf1.Bytes())
+
+       dec2 := encoding.NewDecoder(parquet.Types.Boolean, 
parquet.Encodings.RLE, descr, memory.DefaultAllocator)
+       bdec2 := dec2.(encoding.BooleanDecoder)
+       bdec2.SetData(int(numValid), buf2.Bytes())
+
+       out1 := make([]bool, numValid)
+       out2 := make([]bool, numValid)
+       bdec1.Decode(out1)
+       bdec2.Decode(out2)
+
+       assert.Equal(t, out2, out1, "decoded values should match")
+}
diff --git a/parquet/metadata/bitmap_benchmark_test.go 
b/parquet/metadata/bitmap_benchmark_test.go
new file mode 100644
index 00000000..ef61d6ff
--- /dev/null
+++ b/parquet/metadata/bitmap_benchmark_test.go
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metadata
+
+import (
+       "testing"
+
+       "github.com/apache/arrow-go/v18/arrow/bitutil"
+       "github.com/apache/arrow-go/v18/arrow/memory"
+       "github.com/apache/arrow-go/v18/parquet"
+       "github.com/apache/arrow-go/v18/parquet/schema"
+)
+
+// BenchmarkBooleanStatisticsFromBitmap benchmarks statistics update using 
direct bitmap operations
+func BenchmarkBooleanStatisticsFromBitmap(b *testing.B) {
+       const numValues = 100000
+
+       bitmap := make([]byte, bitutil.BytesForBits(int64(numValues)))
+       for i := 0; i < numValues; i++ {
+               if i%2 == 0 {
+                       bitutil.SetBit(bitmap, i)
+               }
+       }
+
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Required, -1), 0, 0)
+
+       b.ResetTimer()
+       b.ReportAllocs()
+
+       for i := 0; i < b.N; i++ {
+               stats := NewBooleanStatistics(descr, memory.DefaultAllocator)
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+       }
+}
+
+// BenchmarkBloomFilterHashingFromBitmap benchmarks bloom filter hashing using 
direct bitmap operations
+func BenchmarkBloomFilterHashingFromBitmap(b *testing.B) {
+       const numValues = 100000
+
+       bitmap := make([]byte, bitutil.BytesForBits(int64(numValues)))
+       for i := 0; i < numValues; i++ {
+               if i%2 == 0 {
+                       bitutil.SetBit(bitmap, i)
+               }
+       }
+
+       bloom := NewBloomFilter(1024, 1024, memory.DefaultAllocator)
+
+       b.ResetTimer()
+       b.ReportAllocs()
+
+       for i := 0; i < b.N; i++ {
+               hashes := GetHashesFromBitmap(bloom.Hasher(), bitmap, 0, 
numValues)
+               _ = hashes
+       }
+}
+
+// BenchmarkBooleanWritePathBitmap benchmarks the complete write path with 
bitmap operations
+func BenchmarkBooleanWritePathBitmap(b *testing.B) {
+       const numValues = 100000
+
+       bitmap := make([]byte, bitutil.BytesForBits(int64(numValues)))
+       for i := 0; i < numValues; i++ {
+               if i%2 == 0 {
+                       bitutil.SetBit(bitmap, i)
+               }
+       }
+
+       descr := schema.NewColumn(schema.NewBooleanNode("bool", 
parquet.Repetitions.Required, -1), 0, 0)
+
+       b.ResetTimer()
+       b.ReportAllocs()
+
+       for i := 0; i < b.N; i++ {
+               // Statistics update
+               stats := NewBooleanStatistics(descr, memory.DefaultAllocator)
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+
+               // Bloom filter update
+               bloom := NewBloomFilter(1024, 1024, memory.DefaultAllocator)
+               hashes := GetHashesFromBitmap(bloom.Hasher(), bitmap, 0, 
numValues)
+               bloom.InsertBulk(hashes)
+       }
+}
diff --git a/parquet/metadata/bloom_filter.go b/parquet/metadata/bloom_filter.go
index f0ec8faf..13a4730c 100644
--- a/parquet/metadata/bloom_filter.go
+++ b/parquet/metadata/bloom_filter.go
@@ -134,6 +134,64 @@ func GetSpacedHashes[T parquet.ColumnTypes](h Hasher, 
numValid int64, vals []T,
        return out
 }
 
+// GetHashesFromBitmap computes hashes for boolean values directly from a 
bitmap
+// without converting to []bool, avoiding 8x memory overhead.
+func GetHashesFromBitmap(h Hasher, bitmap []byte, bitmapOffset int64, 
numValues int64) []uint64 {
+       if numValues == 0 {
+               return []uint64{}
+       }
+
+       // Convert each bool bit to []byte for hashing
+       // Reuse a single-byte slice to avoid allocating per value
+       out := make([]uint64, numValues)
+       b := []byte{0}
+       for i := range numValues {
+               val := bitutil.BitIsSet(bitmap, int(bitmapOffset+i))
+               // Hash the boolean as a single byte (0x00 or 0x01)
+               if val {
+                       b[0] = 1
+               } else {
+                       b[0] = 0
+               }
+               out[i] = h.Sum64(b)
+       }
+       return out
+}
+
+// GetSpacedHashesFromBitmap computes hashes for boolean values from a bitmap 
with validity information,
+// only hashing valid (non-null) values. Returns hashes for numValid values.
+// This avoids the 8x memory overhead of converting to []bool.
+func GetSpacedHashesFromBitmap(h Hasher, numValid int64, bitmap []byte, 
bitmapOffset int64, numValues int64, validBits []byte, validBitsOffset int64) 
[]uint64 {
+       if numValid == 0 {
+               return []uint64{}
+       }
+
+       out := make([]uint64, 0, numValid)
+
+       // Use SetBitRunReader to efficiently iterate over valid values
+       // Reuse a single-byte slice to avoid allocating per value
+       b := []byte{0}
+       setReader := bitutils.NewSetBitRunReader(validBits, validBitsOffset, 
numValues)
+       for {
+               run := setReader.NextRun()
+               if run.Length == 0 {
+                       break
+               }
+
+               // Hash each valid bit in this run
+               for i := range run.Length {
+                       val := bitutil.BitIsSet(bitmap, 
int(bitmapOffset+run.Pos+i))
+                       if val {
+                               b[0] = 1
+                       } else {
+                               b[0] = 0
+                       }
+                       out = append(out, h.Sum64(b))
+               }
+       }
+       return out
+}
+
 func getBytes[T parquet.ColumnTypes](v T) []byte {
        switch v := any(v).(type) {
        case parquet.ByteArray:
diff --git a/parquet/metadata/bloom_filter_test.go 
b/parquet/metadata/bloom_filter_test.go
index ea671438..011f67a5 100644
--- a/parquet/metadata/bloom_filter_test.go
+++ b/parquet/metadata/bloom_filter_test.go
@@ -360,3 +360,364 @@ func TestAdaptiveBloomFilterEndToEnd(t *testing.T) {
                        len(bf.candidates), 
bf.optimalCandidate().bloomFilter.Size())
        })
 }
+
+func TestGetHashesFromBitmap(t *testing.T) {
+       mem := memory.DefaultAllocator
+       bf := NewBloomFilter(32, 1024, mem)
+       hasher := bf.Hasher()
+
+       t.Run("empty bitmap", func(t *testing.T) {
+               hashes := GetHashesFromBitmap(hasher, []byte{}, 0, 0)
+               assert.Empty(t, hashes)
+       })
+
+       t.Run("single byte aligned", func(t *testing.T) {
+               bitmap := []byte{0b10101010} // alternating true/false
+               numValues := int64(8)
+
+               hashes := GetHashesFromBitmap(hasher, bitmap, 0, numValues)
+               assert.Len(t, hashes, 8)
+
+               // Verify each hash corresponds to the bit value
+               // Bit 0 = 0 (false), Bit 1 = 1 (true), Bit 2 = 0 (false), etc.
+               for i := int64(0); i < numValues; i++ {
+                       val := bitutil.BitIsSet(bitmap, int(i))
+                       expectedHash := GetHash(hasher, val)
+                       assert.Equal(t, expectedHash, hashes[i], "hash mismatch 
at position %d", i)
+               }
+       })
+
+       t.Run("unaligned offset", func(t *testing.T) {
+               // Create bitmap: [0,0,0,1,1,0,1,0, 1,1,1,...]
+               bitmap := make([]byte, 2)
+               bitutil.SetBit(bitmap, 3)
+               bitutil.SetBit(bitmap, 4)
+               bitutil.SetBit(bitmap, 6)
+               bitutil.SetBit(bitmap, 8)
+               bitutil.SetBit(bitmap, 9)
+               bitutil.SetBit(bitmap, 10)
+
+               // Read 5 bits starting from offset 3: [1,1,0,1,0]
+               offset := int64(3)
+               numValues := int64(5)
+
+               hashes := GetHashesFromBitmap(hasher, bitmap, offset, numValues)
+               assert.Len(t, hashes, 5)
+
+               // Verify the hashes match the bit values at the offset
+               for i := int64(0); i < numValues; i++ {
+                       val := bitutil.BitIsSet(bitmap, int(offset+i))
+                       expectedHash := GetHash(hasher, val)
+                       assert.Equal(t, expectedHash, hashes[i], "hash mismatch 
at offset %d position %d", offset, i)
+               }
+       })
+
+       t.Run("all true bits", func(t *testing.T) {
+               bitmap := []byte{0xFF, 0xFF} // all bits set
+               numValues := int64(16)
+
+               hashes := GetHashesFromBitmap(hasher, bitmap, 0, numValues)
+               assert.Len(t, hashes, 16)
+
+               // All hashes should be identical (hash of true)
+               expectedHash := GetHash(hasher, true)
+               for i, hash := range hashes {
+                       assert.Equal(t, expectedHash, hash, "hash mismatch at 
position %d", i)
+               }
+       })
+
+       t.Run("all false bits", func(t *testing.T) {
+               bitmap := []byte{0x00, 0x00} // all bits clear
+               numValues := int64(16)
+
+               hashes := GetHashesFromBitmap(hasher, bitmap, 0, numValues)
+               assert.Len(t, hashes, 16)
+
+               // All hashes should be identical (hash of false)
+               expectedHash := GetHash(hasher, false)
+               for i, hash := range hashes {
+                       assert.Equal(t, expectedHash, hash, "hash mismatch at 
position %d", i)
+               }
+       })
+
+       t.Run("large bitmap", func(t *testing.T) {
+               numValues := int64(1000)
+               bitmap := make([]byte, bitutil.BytesForBits(numValues))
+
+               // Set every 3rd bit
+               for i := int64(0); i < numValues; i += 3 {
+                       bitutil.SetBit(bitmap, int(i))
+               }
+
+               hashes := GetHashesFromBitmap(hasher, bitmap, 0, numValues)
+               assert.Len(t, hashes, 1000)
+
+               // Verify hashes match bit values
+               hashTrue := GetHash(hasher, true)
+               hashFalse := GetHash(hasher, false)
+
+               for i := int64(0); i < numValues; i++ {
+                       if i%3 == 0 {
+                               assert.Equal(t, hashTrue, hashes[i], "expected 
true hash at position %d", i)
+                       } else {
+                               assert.Equal(t, hashFalse, hashes[i], "expected 
false hash at position %d", i)
+                       }
+               }
+       })
+
+       t.Run("consistency with GetHash", func(t *testing.T) {
+               // Verify that GetHashesFromBitmap produces same results as 
GetHash for each bool
+               testCases := []struct {
+                       name   string
+                       bitmap []byte
+                       offset int64
+                       count  int64
+               }{
+                       {"aligned_8", []byte{0b10110100}, 0, 8},
+                       {"unaligned_start", []byte{0xFF, 0x00}, 3, 10},
+                       {"single_bit", []byte{0b00000001}, 0, 1},
+                       {"partial_byte", []byte{0b11110000}, 2, 5},
+               }
+
+               for _, tc := range testCases {
+                       t.Run(tc.name, func(t *testing.T) {
+                               hashes := GetHashesFromBitmap(hasher, 
tc.bitmap, tc.offset, tc.count)
+
+                               // Generate expected hashes using GetHash on 
individual bools
+                               for i := int64(0); i < tc.count; i++ {
+                                       val := bitutil.BitIsSet(tc.bitmap, 
int(tc.offset+i))
+                                       expectedHash := GetHash(hasher, val)
+                                       assert.Equal(t, expectedHash, hashes[i],
+                                               "hash mismatch at position %d 
(bit value: %v)", i, val)
+                               }
+                       })
+               }
+       })
+}
+
+func TestGetSpacedHashesFromBitmap(t *testing.T) {
+       mem := memory.DefaultAllocator
+       bf := NewBloomFilter(32, 1024, mem)
+       hasher := bf.Hasher()
+
+       t.Run("all valid bits", func(t *testing.T) {
+               bitmap := []byte{0b10101010} // alternating true/false
+               validBits := []byte{0xFF}    // all valid
+               numValues := int64(8)
+               numValid := int64(8)
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, 8)
+
+               // Should match non-spaced version since all are valid
+               expectedHashes := GetHashesFromBitmap(hasher, bitmap, 0, 
numValues)
+               assert.Equal(t, expectedHashes, hashes)
+       })
+
+       t.Run("some null values", func(t *testing.T) {
+               // Data bitmap: [1,1,1,1,0,0,0,0]
+               bitmap := []byte{0b00001111}
+               // Valid bits: [1,0,1,0,1,0,1,0] - only positions 0,2,4,6 are 
valid
+               validBits := []byte{0b01010101}
+               numValues := int64(8)
+               numValid := int64(4)
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, 4, "should only hash valid values")
+
+               // Manually verify the valid positions: 0,2,4,6 with data bits: 
1,1,0,0
+               expectedHashes := []uint64{
+                       GetHash(hasher, true),  // position 0: bit is set
+                       GetHash(hasher, true),  // position 2: bit is set
+                       GetHash(hasher, false), // position 4: bit is clear
+                       GetHash(hasher, false), // position 6: bit is clear
+               }
+               assert.Equal(t, expectedHashes, hashes)
+       })
+
+       t.Run("all null values", func(t *testing.T) {
+               bitmap := []byte{0xFF}
+               validBits := []byte{0x00} // all null
+
+               hashes := GetSpacedHashesFromBitmap(hasher, 0, bitmap, 0, 8, 
validBits, 0)
+               assert.Empty(t, hashes, "should return empty for all nulls")
+       })
+
+       t.Run("unaligned offsets", func(t *testing.T) {
+               // Create larger bitmaps with offsets
+               bitmap := make([]byte, 3)
+               validBits := make([]byte, 3)
+
+               // Set data bits starting from offset 5
+               bitutil.SetBit(bitmap, 5) // true
+               bitutil.SetBit(bitmap, 6) // true
+               // bit 7 = false
+               bitutil.SetBit(bitmap, 8) // true
+               // bit 9 = false
+               bitutil.SetBit(bitmap, 10) // true
+
+               // Set valid bits (skip position 6)
+               bitutil.SetBit(validBits, 5) // valid
+               // bit 6 is null
+               bitutil.SetBit(validBits, 7)  // valid
+               bitutil.SetBit(validBits, 8)  // valid
+               bitutil.SetBit(validBits, 9)  // valid
+               bitutil.SetBit(validBits, 10) // valid
+
+               numValues := int64(6)
+               numValid := int64(5) // all except position 6
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
5, numValues, validBits, 5)
+               assert.Len(t, hashes, 5)
+
+               // Verify each hash: positions 5(true), 7(false), 8(true), 
9(false), 10(true)
+               expectedHashes := []uint64{
+                       GetHash(hasher, true),  // position 5
+                       GetHash(hasher, false), // position 7
+                       GetHash(hasher, true),  // position 8
+                       GetHash(hasher, false), // position 9
+                       GetHash(hasher, true),  // position 10
+               }
+               assert.Equal(t, expectedHashes, hashes)
+       })
+
+       t.Run("first half valid", func(t *testing.T) {
+               bitmap := []byte{0xFF, 0xFF}    // all true
+               validBits := []byte{0xFF, 0x00} // first 8 valid, last 8 null
+               numValues := int64(16)
+               numValid := int64(8)
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, 8)
+
+               // All should be hash of true
+               expectedHash := GetHash(hasher, true)
+               for i, hash := range hashes {
+                       assert.Equal(t, expectedHash, hash, "position %d", i)
+               }
+       })
+
+       t.Run("second half valid", func(t *testing.T) {
+               bitmap := []byte{0xFF, 0x00}    // first 8 true, last 8 false
+               validBits := []byte{0x00, 0xFF} // first 8 null, last 8 valid
+               numValues := int64(16)
+               numValid := int64(8)
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, 8)
+
+               // All should be hash of false
+               expectedHash := GetHash(hasher, false)
+               for i, hash := range hashes {
+                       assert.Equal(t, expectedHash, hash, "position %d", i)
+               }
+       })
+
+       t.Run("sparse valid bits", func(t *testing.T) {
+               // Every 3rd bit is valid
+               numValues := int64(24)
+               bitmap := make([]byte, bitutil.BytesForBits(numValues))
+               validBits := make([]byte, bitutil.BytesForBits(numValues))
+
+               // Set data: alternating pattern
+               for i := int64(0); i < numValues; i++ {
+                       if i%2 == 0 {
+                               bitutil.SetBit(bitmap, int(i))
+                       }
+               }
+
+               // Set valid: every 3rd bit
+               numValid := int64(0)
+               for i := int64(0); i < numValues; i += 3 {
+                       bitutil.SetBit(validBits, int(i))
+                       numValid++
+               }
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, int(numValid))
+
+               // Verify each hash matches the expected bit value
+               hashIdx := 0
+               for i := int64(0); i < numValues; i += 3 {
+                       val := bitutil.BitIsSet(bitmap, int(i))
+                       expectedHash := GetHash(hasher, val)
+                       assert.Equal(t, expectedHash, hashes[hashIdx], "hash 
mismatch at position %d", i)
+                       hashIdx++
+               }
+       })
+
+       t.Run("consistency with GetSpacedHashes", func(t *testing.T) {
+               // Verify GetSpacedHashesFromBitmap produces same results as 
GetSpacedHashes with []bool
+               testCases := []struct {
+                       name         string
+                       bitmap       []byte
+                       validBits    []byte
+                       bitmapOffset int64
+                       validOffset  int64
+                       numValues    int64
+                       numValid     int64
+               }{
+                       {"all_valid", []byte{0b10110010}, []byte{0xFF}, 0, 0, 
8, 8},
+                       {"half_valid", []byte{0xFF}, []byte{0x0F}, 0, 0, 8, 4},
+                       {"sparse", []byte{0xFF, 0x00}, []byte{0b01010101, 
0b10101010}, 0, 0, 16, 8},
+                       {"unaligned", []byte{0xFF, 0xFF, 0xFF}, []byte{0xFF, 
0x0F, 0x00}, 3, 3, 10, 9},
+               }
+
+               for _, tc := range testCases {
+                       t.Run(tc.name, func(t *testing.T) {
+                               // Get hashes using bitmap method
+                               hashesFromBitmap := 
GetSpacedHashesFromBitmap(hasher, tc.numValid, tc.bitmap, tc.bitmapOffset, 
tc.numValues, tc.validBits, tc.validOffset)
+
+                               // Get hashes using []bool method
+                               bools := make([]bool, tc.numValues)
+                               for i := int64(0); i < tc.numValues; i++ {
+                                       bools[i] = bitutil.BitIsSet(tc.bitmap, 
int(tc.bitmapOffset+i))
+                               }
+                               hashesFromBool := GetSpacedHashes(hasher, 
tc.numValid, bools, tc.validBits, tc.validOffset)
+
+                               // Both should produce identical results
+                               assert.Equal(t, hashesFromBool, 
hashesFromBitmap, "hash mismatch")
+                               assert.Len(t, hashesFromBitmap, 
int(tc.numValid))
+                       })
+               }
+       })
+
+       t.Run("large bitmap with sparse valid", func(t *testing.T) {
+               numValues := int64(1000)
+               bitmap := make([]byte, bitutil.BytesForBits(numValues))
+               validBits := make([]byte, bitutil.BytesForBits(numValues))
+
+               // Set every 5th bit in bitmap
+               for i := int64(0); i < numValues; i += 5 {
+                       bitutil.SetBit(bitmap, int(i))
+               }
+
+               // Set every 7th bit as valid
+               numValid := int64(0)
+               for i := int64(0); i < numValues; i += 7 {
+                       bitutil.SetBit(validBits, int(i))
+                       numValid++
+               }
+
+               hashes := GetSpacedHashesFromBitmap(hasher, numValid, bitmap, 
0, numValues, validBits, 0)
+               assert.Len(t, hashes, int(numValid))
+
+               // Verify count of true vs false hashes
+               hashTrue := GetHash(hasher, true)
+               hashFalse := GetHash(hasher, false)
+
+               trueCount, falseCount := 0, 0
+               for _, h := range hashes {
+                       switch h {
+                       case hashTrue:
+                               trueCount++
+                       case hashFalse:
+                               falseCount++
+                       default:
+                               t.Fatalf("unexpected hash value")
+                       }
+               }
+
+               assert.Equal(t, int(numValid), trueCount+falseCount, "all 
hashes should be true or false")
+       })
+}
diff --git a/parquet/metadata/statistics_test.go 
b/parquet/metadata/statistics_test.go
index 70dd3ebc..d312437a 100644
--- a/parquet/metadata/statistics_test.go
+++ b/parquet/metadata/statistics_test.go
@@ -290,3 +290,316 @@ func TestUnsignedDefaultStats(t *testing.T) {
                assert.Zero(t, maxv)
        }
 }
+
+func TestBooleanStatisticsUpdateFromBitmap(t *testing.T) {
+       mem := memory.DefaultAllocator
+       node, err := schema.NewPrimitiveNode("test", 
parquet.Repetitions.Required, parquet.Types.Boolean, 0, 0)
+       require.NoError(t, err)
+       descr := schema.NewColumn(node, 0, 0)
+
+       t.Run("all true bits", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0xFF, 0xFF} // all bits set
+               numValues := int64(16)
+
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, true, stats.Min(), "min should be true when all 
bits are true")
+               assert.Equal(t, true, stats.Max(), "max should be true when all 
bits are true")
+               assert.Equal(t, numValues, stats.NumValues())
+               assert.Equal(t, int64(0), stats.NullCount())
+       })
+
+       t.Run("all false bits", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0x00, 0x00} // all bits clear
+               numValues := int64(16)
+
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, false, stats.Min(), "min should be false when 
all bits are false")
+               assert.Equal(t, false, stats.Max(), "max should be false when 
all bits are false")
+               assert.Equal(t, numValues, stats.NumValues())
+               assert.Equal(t, int64(0), stats.NullCount())
+       })
+
+       t.Run("mixed true and false", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0b10101010} // alternating bits
+               numValues := int64(8)
+
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, false, stats.Min(), "min should be false with 
mixed bits")
+               assert.Equal(t, true, stats.Max(), "max should be true with 
mixed bits")
+               assert.Equal(t, numValues, stats.NumValues())
+       })
+
+       t.Run("unaligned offset", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               // Bitmap: [0,0,0,1,1,0,1,0, 1,1,...]
+               bitmap := make([]byte, 2)
+               bitutil.SetBit(bitmap, 3)
+               bitutil.SetBit(bitmap, 4)
+               bitutil.SetBit(bitmap, 6)
+               bitutil.SetBit(bitmap, 8)
+               bitutil.SetBit(bitmap, 9)
+
+               // Read 5 bits starting from offset 3: [1,1,0,1,0] -> has both 
true and false
+               offset := int64(3)
+               numValues := int64(5)
+
+               stats.UpdateFromBitmap(bitmap, offset, numValues, 0)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, false, stats.Min(), "should find false bit")
+               assert.Equal(t, true, stats.Max(), "should find true bit")
+               assert.Equal(t, numValues, stats.NumValues())
+       })
+
+       t.Run("with nulls", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0xFF}
+               numValues := int64(8)
+               numNulls := int64(3)
+
+               stats.UpdateFromBitmap(bitmap, 0, numValues, numNulls)
+
+               assert.Equal(t, numValues, stats.NumValues())
+               assert.Equal(t, numNulls, stats.NullCount())
+       })
+
+       t.Run("multiple updates", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+
+               // First update: all true
+               bitmap1 := []byte{0xFF}
+               stats.UpdateFromBitmap(bitmap1, 0, 8, 0)
+               assert.Equal(t, true, stats.Min())
+               assert.Equal(t, true, stats.Max())
+
+               // Second update: has false - should update min
+               bitmap2 := []byte{0x00}
+               stats.UpdateFromBitmap(bitmap2, 0, 8, 0)
+               assert.Equal(t, false, stats.Min(), "min should update to 
false")
+               assert.Equal(t, true, stats.Max(), "max should remain true")
+               assert.Equal(t, int64(16), stats.NumValues())
+       })
+
+       t.Run("early exit optimization", func(t *testing.T) {
+               // Create a large bitmap with both true and false in first few 
bits
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               numValues := int64(10000)
+               bitmap := make([]byte, bitutil.BytesForBits(numValues))
+
+               // Set first bit to true, second to false (early exit should 
trigger)
+               bitutil.SetBit(bitmap, 0) // true
+               // bit 1 is false
+
+               stats.UpdateFromBitmap(bitmap, 0, numValues, 0)
+
+               assert.Equal(t, false, stats.Min())
+               assert.Equal(t, true, stats.Max())
+       })
+
+       t.Run("consistency with Update method", func(t *testing.T) {
+               // Verify UpdateFromBitmap produces same results as Update with 
[]bool
+               testCases := []struct {
+                       name   string
+                       bitmap []byte
+                       offset int64
+                       count  int64
+               }{
+                       {"all_true", []byte{0xFF}, 0, 8},
+                       {"all_false", []byte{0x00}, 0, 8},
+                       {"mixed", []byte{0b10110010}, 0, 8},
+                       {"unaligned", []byte{0xFF, 0x00}, 3, 10},
+               }
+
+               for _, tc := range testCases {
+                       t.Run(tc.name, func(t *testing.T) {
+                               stats1 := metadata.NewBooleanStatistics(descr, 
mem)
+                               stats2 := metadata.NewBooleanStatistics(descr, 
mem)
+
+                               // Update using bitmap
+                               stats1.UpdateFromBitmap(tc.bitmap, tc.offset, 
tc.count, 0)
+
+                               // Update using []bool
+                               bools := make([]bool, tc.count)
+                               for i := int64(0); i < tc.count; i++ {
+                                       bools[i] = bitutil.BitIsSet(tc.bitmap, 
int(tc.offset+i))
+                               }
+                               stats2.Update(bools, 0)
+
+                               // Both should produce same results
+                               assert.Equal(t, stats2.Min(), stats1.Min(), 
"min mismatch")
+                               assert.Equal(t, stats2.Max(), stats1.Max(), 
"max mismatch")
+                               assert.Equal(t, stats2.NumValues(), 
stats1.NumValues(), "numValues mismatch")
+                       })
+               }
+       })
+}
+
+func TestBooleanStatisticsUpdateFromBitmapSpaced(t *testing.T) {
+       mem := memory.DefaultAllocator
+       node, err := schema.NewPrimitiveNode("test", 
parquet.Repetitions.Optional, parquet.Types.Boolean, 0, 0)
+       require.NoError(t, err)
+       descr := schema.NewColumn(node, 0, 0)
+
+       t.Run("all valid bits", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0b10101010} // data: alternating
+               validBits := []byte{0xFF}    // all valid
+               numValues := int64(8)
+
+               stats.UpdateFromBitmapSpaced(bitmap, 0, numValues, validBits, 
0, 0)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, false, stats.Min())
+               assert.Equal(t, true, stats.Max())
+               assert.Equal(t, numValues, stats.NumValues())
+       })
+
+       t.Run("some null values", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               // Data bitmap: [1,1,1,1,0,0,0,0]
+               bitmap := []byte{0b00001111}
+               // Valid bits: [1,0,1,0,1,0,1,0] - only positions 0,2,4,6 are 
valid
+               validBits := []byte{0b01010101}
+               numValues := int64(8)
+               numNulls := int64(4)
+
+               stats.UpdateFromBitmapSpaced(bitmap, 0, numValues, validBits, 
0, numNulls)
+
+               assert.True(t, stats.HasMinMax())
+               // Valid positions are 0,2,4,6 with data bits: 1,1,0,0
+               assert.Equal(t, false, stats.Min(), "should find false at 
position 4 or 6")
+               assert.Equal(t, true, stats.Max(), "should find true at 
position 0 or 2")
+               assert.Equal(t, int64(4), stats.NumValues(), "should count only 
valid values")
+               assert.Equal(t, numNulls, stats.NullCount())
+       })
+
+       t.Run("all null values", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               bitmap := []byte{0xFF}
+               validBits := []byte{0x00} // all null
+               numValues := int64(8)
+
+               stats.UpdateFromBitmapSpaced(bitmap, 0, numValues, validBits, 
0, numValues)
+
+               assert.False(t, stats.HasMinMax(), "should not have min/max 
with all nulls")
+               assert.Equal(t, int64(0), stats.NumValues())
+               assert.Equal(t, numValues, stats.NullCount())
+       })
+
+       t.Run("unaligned offsets", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               // Create larger bitmaps with offset
+               bitmap := make([]byte, 3)
+               validBits := make([]byte, 3)
+
+               // Set some data bits starting from offset 5
+               bitutil.SetBit(bitmap, 5) // true
+               bitutil.SetBit(bitmap, 6) // true
+               // bit 7 = false
+               bitutil.SetBit(bitmap, 8) // true
+               // bit 9 = false
+
+               // Set valid bits (skip position 6)
+               bitutil.SetBit(validBits, 5)
+               // bit 6 is null
+               bitutil.SetBit(validBits, 7)
+               bitutil.SetBit(validBits, 8)
+               bitutil.SetBit(validBits, 9)
+
+               stats.UpdateFromBitmapSpaced(bitmap, 5, 5, validBits, 5, 1)
+
+               assert.True(t, stats.HasMinMax())
+               // Valid positions: 5(true), 7(false), 8(true), 9(false)
+               assert.Equal(t, false, stats.Min())
+               assert.Equal(t, true, stats.Max())
+               assert.Equal(t, int64(4), stats.NumValues())
+       })
+
+       t.Run("only true values valid", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               // Data: [1,1,1,1,0,0,0,0]
+               bitmap := []byte{0b00001111}
+               // Valid: only first 4 bits (all true in data)
+               validBits := []byte{0b00001111}
+
+               stats.UpdateFromBitmapSpaced(bitmap, 0, 8, validBits, 0, 4)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, true, stats.Min(), "only true values are valid")
+               assert.Equal(t, true, stats.Max())
+       })
+
+       t.Run("only false values valid", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+               // Data: [1,1,1,1,0,0,0,0]
+               bitmap := []byte{0b00001111}
+               // Valid: only last 4 bits (all false in data)
+               validBits := []byte{0b11110000}
+
+               stats.UpdateFromBitmapSpaced(bitmap, 0, 8, validBits, 0, 4)
+
+               assert.True(t, stats.HasMinMax())
+               assert.Equal(t, false, stats.Min(), "only false values are 
valid")
+               assert.Equal(t, false, stats.Max())
+       })
+
+       t.Run("consistency with UpdateSpaced", func(t *testing.T) {
+               // Verify UpdateFromBitmapSpaced produces same results as 
UpdateSpaced with []bool
+               bitmap := []byte{0b10110010}
+               validBits := []byte{0b11010110}
+               numValues := int64(8)
+               numNulls := int64(3)
+
+               stats1 := metadata.NewBooleanStatistics(descr, mem)
+               stats2 := metadata.NewBooleanStatistics(descr, mem)
+
+               // Update using bitmap
+               stats1.UpdateFromBitmapSpaced(bitmap, 0, numValues, validBits, 
0, numNulls)
+
+               // Update using []bool
+               bools := make([]bool, numValues)
+               for i := int64(0); i < numValues; i++ {
+                       bools[i] = bitutil.BitIsSet(bitmap, int(i))
+               }
+               stats2.UpdateSpaced(bools, validBits, 0, numNulls)
+
+               // Both should produce same results
+               assert.Equal(t, stats2.HasMinMax(), stats1.HasMinMax())
+               if stats1.HasMinMax() {
+                       assert.Equal(t, stats2.Min(), stats1.Min(), "min 
mismatch")
+                       assert.Equal(t, stats2.Max(), stats1.Max(), "max 
mismatch")
+               }
+               assert.Equal(t, stats2.NumValues(), stats1.NumValues(), 
"numValues mismatch")
+               assert.Equal(t, stats2.NullCount(), stats1.NullCount(), 
"nullCount mismatch")
+       })
+
+       t.Run("multiple updates accumulate correctly", func(t *testing.T) {
+               stats := metadata.NewBooleanStatistics(descr, mem)
+
+               // First update: only true values
+               bitmap1 := []byte{0xFF}
+               validBits1 := []byte{0x0F} // first 4 valid
+               stats.UpdateFromBitmapSpaced(bitmap1, 0, 8, validBits1, 0, 4)
+               assert.Equal(t, true, stats.Min())
+               assert.Equal(t, true, stats.Max())
+
+               // Second update: has false values
+               bitmap2 := []byte{0x00}
+               validBits2 := []byte{0xF0} // last 4 valid
+               stats.UpdateFromBitmapSpaced(bitmap2, 0, 8, validBits2, 0, 4)
+
+               assert.Equal(t, false, stats.Min(), "should update to include 
false")
+               assert.Equal(t, true, stats.Max())
+               assert.Equal(t, int64(8), stats.NumValues())
+               assert.Equal(t, int64(8), stats.NullCount())
+       })
+}
diff --git a/parquet/metadata/statistics_types.gen.go 
b/parquet/metadata/statistics_types.gen.go
index 73988e5e..a00010f9 100644
--- a/parquet/metadata/statistics_types.gen.go
+++ b/parquet/metadata/statistics_types.gen.go
@@ -24,6 +24,7 @@ import (
 
        "github.com/apache/arrow-go/v18/arrow"
        "github.com/apache/arrow-go/v18/arrow/array"
+       "github.com/apache/arrow-go/v18/arrow/bitutil"
        "github.com/apache/arrow-go/v18/arrow/float16"
        "github.com/apache/arrow-go/v18/arrow/memory"
        "github.com/apache/arrow-go/v18/internal/bitutils"
@@ -1713,6 +1714,82 @@ func (s *BooleanStatistics) UpdateFromArrow(values 
arrow.Array, updateCounts boo
        return fmt.Errorf("%w: update boolean stats from Arrow", 
arrow.ErrNotImplemented)
 }
 
+// UpdateFromBitmap updates statistics from a boolean bitmap without 
converting to []bool.
+// This avoids the 8x memory overhead and provides better performance.
+func (s *BooleanStatistics) UpdateFromBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64, numNulls int64) {
+       s.IncNulls(numNulls)
+       s.nvalues += numValues
+
+       if numValues == 0 {
+               return
+       }
+
+       s.SetMinMax(s.getMinMaxFromBitmap(bitmap, bitmapOffset, numValues))
+}
+
+// UpdateFromBitmapSpaced updates statistics from a spaced boolean bitmap.
+func (s *BooleanStatistics) UpdateFromBitmapSpaced(bitmap []byte, bitmapOffset 
int64, numValues int64, validBits []byte, validBitsOffset int64, numNull int64) 
{
+       s.IncNulls(numNull)
+       notnull := numValues - numNull
+       s.nvalues += notnull
+
+       if notnull == 0 {
+               return
+       }
+
+       // For spaced case, we need to check valid bits
+       min := s.defaultMin()
+       max := s.defaultMax()
+
+       if s.bitSetReader == nil {
+               s.bitSetReader = bitutils.NewSetBitRunReader(validBits, 
validBitsOffset, numValues)
+       } else {
+               s.bitSetReader.Reset(validBits, validBitsOffset, numValues)
+       }
+
+       for {
+               run := s.bitSetReader.NextRun()
+               if run.Length == 0 {
+                       break
+               }
+               // Check bits in this valid run
+               for i := int64(0); i < run.Length; i++ {
+                       if bitutil.BitIsSet(bitmap, 
int(bitmapOffset+run.Pos+i)) {
+                               max = true
+                       } else {
+                               min = false
+                       }
+                       // Early exit if we've found both values
+                       if !min && max {
+                               s.SetMinMax(min, max)
+                               return
+                       }
+               }
+       }
+
+       s.SetMinMax(min, max)
+}
+
+// getMinMaxFromBitmap finds min/max directly from bitmap.
+// For booleans: min is false if any false exists, max is true if any true 
exists.
+func (s *BooleanStatistics) getMinMaxFromBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64) (min, max bool) {
+       min = true  // Will become false if we find any false
+       max = false // Will become true if we find any true
+
+       for i := int64(0); i < numValues; i++ {
+               if bitutil.BitIsSet(bitmap, int(bitmapOffset+i)) {
+                       max = true
+               } else {
+                       min = false
+               }
+               // Early exit optimization: if we've seen both values, we're 
done
+               if !min && max {
+                       return
+               }
+       }
+       return
+}
+
 // SetMinMax updates the min and max values only if they are not currently set
 // or if argMin is less than the current min / argMax is greater than the 
current max
 func (s *BooleanStatistics) SetMinMax(argMin, argMax bool) {
@@ -2005,7 +2082,7 @@ func (s *ByteArrayStatistics) UpdateFromArrow(values 
arrow.Array, updateCounts b
        )
 
        for i := 0; i < arr.Len(); i++ {
-               nextOffset := curOffset + int64(arr.ValueLen(i))                
+               nextOffset := curOffset + int64(arr.ValueLen(i))
                v := data[curOffset:nextOffset]
                curOffset = nextOffset
 
diff --git a/parquet/metadata/statistics_types.gen.go.tmpl 
b/parquet/metadata/statistics_types.gen.go.tmpl
index fb14ad43..83493998 100644
--- a/parquet/metadata/statistics_types.gen.go.tmpl
+++ b/parquet/metadata/statistics_types.gen.go.tmpl
@@ -412,7 +412,7 @@ func (s *{{.Name}}Statistics) UpdateFromArrow(values 
arrow.Array, updateCounts b
   return fmt.Errorf("%w: update float16 stats from Arrow", 
arrow.ErrNotImplemented)
 {{else}}
   if values.DataType().(arrow.FixedWidthDataType).Bytes() != 
arrow.{{.Name}}SizeBytes {
-    return fmt.Errorf("%w: cannot update {{.name}} stats with %s arrow array", 
+    return fmt.Errorf("%w: cannot update {{.name}} stats with %s arrow array",
       arrow.ErrInvalid, values.DataType())
   }
 
@@ -422,6 +422,84 @@ func (s *{{.Name}}Statistics) UpdateFromArrow(values 
arrow.Array, updateCounts b
 {{end -}}
 }
 
+{{if eq .Name "Boolean"}}
+// UpdateFromBitmap updates statistics from a boolean bitmap without 
converting to []bool.
+// This avoids the 8x memory overhead and provides better performance.
+func (s *{{.Name}}Statistics) UpdateFromBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64, numNulls int64) {
+  s.IncNulls(numNulls)
+  s.nvalues += numValues
+
+  if numValues == 0 {
+    return
+  }
+
+  s.SetMinMax(s.getMinMaxFromBitmap(bitmap, bitmapOffset, numValues))
+}
+
+// UpdateFromBitmapSpaced updates statistics from a spaced boolean bitmap.
+func (s *{{.Name}}Statistics) UpdateFromBitmapSpaced(bitmap []byte, 
bitmapOffset int64, numValues int64, validBits []byte, validBitsOffset int64, 
numNull int64) {
+  s.IncNulls(numNull)
+  notnull := numValues - numNull
+  s.nvalues += notnull
+
+  if notnull == 0 {
+    return
+  }
+
+  // For spaced case, we need to check valid bits
+  min := s.defaultMin()
+  max := s.defaultMax()
+
+  if s.bitSetReader == nil {
+    s.bitSetReader = bitutils.NewSetBitRunReader(validBits, validBitsOffset, 
numValues)
+  } else {
+    s.bitSetReader.Reset(validBits, validBitsOffset, numValues)
+  }
+
+  for {
+    run := s.bitSetReader.NextRun()
+    if run.Length == 0 {
+      break
+    }
+    // Check bits in this valid run
+    for i := int64(0); i < run.Length; i++ {
+      if bitutil.BitIsSet(bitmap, int(bitmapOffset+run.Pos+i)) {
+        max = true
+      } else {
+        min = false
+      }
+      // Early exit if we've found both values
+      if !min && max {
+        s.SetMinMax(min, max)
+        return
+      }
+    }
+  }
+
+  s.SetMinMax(min, max)
+}
+
+// getMinMaxFromBitmap finds min/max directly from bitmap.
+// For booleans: min is false if any false exists, max is true if any true 
exists.
+func (s *{{.Name}}Statistics) getMinMaxFromBitmap(bitmap []byte, bitmapOffset 
int64, numValues int64) (min, max bool) {
+  min = true  // Will become false if we find any false
+  max = false // Will become true if we find any true
+
+  for i := int64(0); i < numValues; i++ {
+    if bitutil.BitIsSet(bitmap, int(bitmapOffset+i)) {
+      max = true
+    } else {
+      min = false
+    }
+    // Early exit optimization: if we've seen both values, we're done
+    if !min && max {
+      return
+    }
+  }
+  return
+}
+{{end}}
+
 // SetMinMax updates the min and max values only if they are not currently set
 // or if argMin is less than the current min / argMax is greater than the 
current max
 func (s *{{.Name}}Statistics) SetMinMax(argMin, argMax {{.name}}) {

Reply via email to