(arrow-nanoarrow) branch main updated: chore: Support decimal32, decimal64, and dictionaries in integration test utilities (#880)

paleolimbot Sun, 10 May 2026 20:07:58 -0700

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new dabe1b71 chore: Support decimal32, decimal64, and dictionaries in 
integration test utilities (#880)
dabe1b71 is described below

commit dabe1b717de7e5c120167a48b383d62f15f4bd7b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Sun May 10 22:07:40 2026 -0500

    chore: Support decimal32, decimal64, and dictionaries in integration test 
utilities (#880)
    
    In pursuit of removing skips in the integration test for
    https://github.com/apache/arrow/pull/49910 . While the IPC reader has
    supported Decimal 32 and 64 since the beginning, the JSON parser hasn't.
    
    The file reader for the integration tests also was never updated to
    support dictionary batches. To fully support the integration test we
    also need a writer for dictionaries which is a bit more involved and
    I'll do it in another PR.
    
    ---------
    
    Co-authored-by: Copilot <[email protected]>
---
 .github/workflows/build-and-test-device.yaml |   5 +-
 .github/workflows/build-and-test-ipc.yaml    |   5 +
 ci/scripts/run-ipc-integration-tests.sh      | 164 +++++++++++++++++++++++++++
 examples/cmake-scenarios/run.sh              |   9 +-
 src/nanoarrow/integration/ipc_integration.cc |  41 ++++++-
 src/nanoarrow/ipc/decoder.c                  |  26 +++++
 src/nanoarrow/ipc/encoder.c                  |   2 +
 src/nanoarrow/ipc/files_test.cc              |  71 ++++++++----
 src/nanoarrow/nanoarrow_ipc.h                |   2 +
 src/nanoarrow/testing/testing.cc             |  26 ++++-
 src/nanoarrow/testing/testing_test.cc        |   8 +-
 11 files changed, 320 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/build-and-test-device.yaml 
b/.github/workflows/build-and-test-device.yaml
index 3e10d495..b2088d3d 100644
--- a/.github/workflows/build-and-test-device.yaml
+++ b/.github/workflows/build-and-test-device.yaml
@@ -80,8 +80,11 @@ jobs:
       - name: Install build dependencies
         if: matrix.config.label == 'with-cuda'
         run: |
+          # Install minimal CUDA packages needed for building (avoid full 
cuda-toolkit
+          # which includes nsight-systems and other large tools that exhaust 
disk space)
           sudo apt-get update && \
-            sudo apt-get install -y cmake build-essential cuda-toolkit tzdata
+            sudo apt-get install -y cmake build-essential tzdata \
+              cuda-nvcc-13-2 cuda-cudart-dev-13-2 libcublas-dev-13-2
 
           # Install newer cmake for building Arrow C++
           pip install cmake
diff --git a/.github/workflows/build-and-test-ipc.yaml 
b/.github/workflows/build-and-test-ipc.yaml
index 2ae83dea..6257396e 100644
--- a/.github/workflows/build-and-test-ipc.yaml
+++ b/.github/workflows/build-and-test-ipc.yaml
@@ -149,3 +149,8 @@ jobs:
         with:
           name: nanoarrow-ipc-memcheck
           path: build/Testing/Temporary/MemoryChecker.*.log
+
+      - name: Run integration test validation for arrow-testing files
+        if: matrix.config.label == 'default-build'
+        run: |
+          ./ci/scripts/run-ipc-integration-tests.sh build
diff --git a/ci/scripts/run-ipc-integration-tests.sh 
b/ci/scripts/run-ipc-integration-tests.sh
new file mode 100755
index 00000000..266f8e5d
--- /dev/null
+++ b/ci/scripts/run-ipc-integration-tests.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script runs the nanoarrow_ipc_integration VALIDATE command against
+# all test files from the arrow-testing repository's integration directory.
+#
+# Usage:
+#   export NANOARROW_ARROW_TESTING_DIR=/path/to/arrow-testing
+#   ./ci/scripts/run-ipc-integration-tests.sh [build_dir]
+#
+# Arguments:
+#   build_dir: Optional path to the build directory containing
+#              nanoarrow_ipc_integration. Defaults to "build".
+
+REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+BUILD_DIR="${1:-${REPO_ROOT}/build}"
+
+if [ -z "${NANOARROW_ARROW_TESTING_DIR}" ]; then
+    echo "Error: NANOARROW_ARROW_TESTING_DIR environment variable not set"
+    echo "Please set it to the path of a checkout of apache/arrow-testing"
+    exit 1
+fi
+
+if [ ! -d "${NANOARROW_ARROW_TESTING_DIR}" ]; then
+    echo "Error: NANOARROW_ARROW_TESTING_DIR does not exist: 
${NANOARROW_ARROW_TESTING_DIR}"
+    exit 1
+fi
+
+INTEGRATION_BIN="${BUILD_DIR}/nanoarrow_ipc_integration"
+if [ ! -x "${INTEGRATION_BIN}" ]; then
+    echo "Error: nanoarrow_ipc_integration not found at ${INTEGRATION_BIN}"
+    echo "Please build the project first or specify the build directory as an 
argument"
+    exit 1
+fi
+
+DATA_DIR="${NANOARROW_ARROW_TESTING_DIR}/data/arrow-ipc-stream/integration"
+
+# Create a temp directory for decompressed JSON files
+TEMP_DIR=$(mktemp -d)
+trap "rm -rf ${TEMP_DIR}" EXIT
+
+# Track results
+PASSED=0
+FAILED=0
+SKIPPED=0
+
+# Known files that are expected to be skipped (unsupported types)
+SKIP_PATTERNS=(
+    "generated_list_view"      # ListView not supported
+    "generated_binary_view"    # BinaryView not supported
+    "generated_run_end_encoded" # REE not supported
+)
+
+# Function to check if a file should be skipped
+should_skip() {
+    local basename="$1"
+    for pattern in "${SKIP_PATTERNS[@]}"; do
+        if [[ "${basename}" == *"${pattern}"* ]]; then
+            return 0
+        fi
+    done
+    return 1
+}
+
+# Function to run VALIDATE for a given test file
+run_validate() {
+    local subdir="$1"
+    local basename="$2"
+
+    local arrow_file="${DATA_DIR}/${subdir}/${basename}.arrow_file"
+    local json_gz="${DATA_DIR}/${subdir}/${basename}.json.gz"
+    local json_file="${TEMP_DIR}/${subdir}_${basename}.json"
+
+    # We require .arrow_file format (with ARROW1 magic and footer)
+    if [ ! -f "${arrow_file}" ]; then
+        return 2  # Skip - no arrow file
+    fi
+
+    # Check if JSON exists (possibly gzipped)
+    if [ -f "${json_gz}" ]; then
+        gunzip -c "${json_gz}" > "${json_file}"
+    elif [ -f "${DATA_DIR}/${subdir}/${basename}.json" ]; then
+        json_file="${DATA_DIR}/${subdir}/${basename}.json"
+    else
+        return 2  # Skip - no JSON file
+    fi
+
+    if COMMAND=VALIDATE ARROW_PATH="${arrow_file}" JSON_PATH="${json_file}" 
"${INTEGRATION_BIN}" > /dev/null 2>&1; then
+        return 0  # Pass
+    else
+        return 1  # Fail
+    fi
+}
+
+echo "=== Running IPC Integration Tests ==="
+echo "Using arrow-testing at: ${NANOARROW_ARROW_TESTING_DIR}"
+echo "Using integration binary at: ${INTEGRATION_BIN}"
+echo ""
+
+# Find all subdirectories in the integration directory
+for subdir_path in "${DATA_DIR}"/*/; do
+    [ -d "${subdir_path}" ] || continue
+    subdir=$(basename "${subdir_path}")
+
+    # Skip versions before 1.0.0
+    if [[ "${subdir}" == 0.* ]]; then
+        continue
+    fi
+
+    echo "=== Testing ${subdir} ==="
+
+    # Find all unique basenames (from .arrow_file files)
+    for arrow_file in "${subdir_path}"*.arrow_file; do
+        [ -f "${arrow_file}" ] || continue
+
+        basename=$(basename "${arrow_file}" .arrow_file)
+
+        # Check if this file should be skipped
+        if should_skip "${basename}"; then
+            ((SKIPPED++))
+            continue
+        fi
+
+        run_validate "${subdir}" "${basename}"
+        result=$?
+
+        if [ $result -eq 0 ]; then
+            echo "  PASS: ${basename}"
+            ((PASSED++))
+        elif [ $result -eq 2 ]; then
+            echo "  SKIP: ${basename} (missing files)"
+            ((SKIPPED++))
+        else
+            echo "  FAIL: ${basename}"
+            ((FAILED++))
+        fi
+    done
+    echo ""
+done
+
+echo "=== Summary ==="
+echo "Passed: ${PASSED}"
+echo "Failed: ${FAILED}"
+echo "Skipped: ${SKIPPED}"
+
+if [ ${FAILED} -gt 0 ]; then
+    exit 1
+fi
diff --git a/examples/cmake-scenarios/run.sh b/examples/cmake-scenarios/run.sh
index 71f33079..ccc21171 100755
--- a/examples/cmake-scenarios/run.sh
+++ b/examples/cmake-scenarios/run.sh
@@ -27,13 +27,14 @@ 
WIN_DLL_NANOARROW_INSTALLED="$(pwd)/scratch/nanoarrow_install/bin"
 # The mismatched_shared_libs test is static-only, so no DLL path needed
 for dir in scratch/build*; do
     # Special cases where we have to set PATH on Windows
-    if [ "${dir}" = "scratch/build_against_fetched_shared" ] && [ "${OSTYPE}" 
= "msys" ]; then
+    # OSTYPE can be "msys" (Git Bash) or "cygwin" (Cygwin/GitHub Actions)
+    if [ "${dir}" = "scratch/build_against_fetched_shared" ] && [[ "${OSTYPE}" 
== msys* || "${OSTYPE}" == cygwin* ]]; then
         PATH="${PATH}:${WIN_DLL_NANOARROW_FETCHED}"  
./${dir}/Debug/minimal_cpp_app
-    elif [ "${dir}" = "scratch/build_shared" ] && [ "${OSTYPE}" = "msys" ]; 
then
+    elif [ "${dir}" = "scratch/build_shared" ] && [[ "${OSTYPE}" == msys* || 
"${OSTYPE}" == cygwin* ]]; then
         PATH="${PATH}:${WIN_DLL_NANOARROW_BUILT}" 
./${dir}/Debug/minimal_cpp_app
-    elif [ "${dir}" = "scratch/build_against_install_shared" ] && [ 
"${OSTYPE}" = "msys" ]; then
+    elif [ "${dir}" = "scratch/build_against_install_shared" ] && [[ 
"${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
         PATH="${PATH}:${WIN_DLL_NANOARROW_INSTALLED}" 
./${dir}/Debug/minimal_cpp_app
-    elif [ "${OSTYPE}" = "msys" ]; then
+    elif [[ "${OSTYPE}" == msys* || "${OSTYPE}" == cygwin* ]]; then
         ./${dir}/Debug/minimal_cpp_app
     else
         ./${dir}/minimal_cpp_app
diff --git a/src/nanoarrow/integration/ipc_integration.cc 
b/src/nanoarrow/integration/ipc_integration.cc
index 47ec9392..7a56fca7 100644
--- a/src/nanoarrow/integration/ipc_integration.cc
+++ b/src/nanoarrow/integration/ipc_integration.cc
@@ -195,14 +195,45 @@ struct MaterializedArrayStream {
 
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(
         ArrowSchemaDeepCopy(&decoder->footer->schema, schema.get()), error);
-    NANOARROW_RETURN_NOT_OK(
-        ArrowIpcDecoderSetSchema(decoder.get(), &decoder->footer->schema, 
error));
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetSchemaWithDictionaries(
+        decoder.get(), &decoder->footer->schema, 
&decoder->footer->dictionaries, error));
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(
         ArrowIpcDecoderSetEndianness(decoder.get(), decoder->endianness), 
error);
 
+    // Initialize dictionaries storage
+    nanoarrow::ipc::UniqueDictionaries dictionaries;
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDictionariesInit(
+        dictionaries.get(), &decoder->footer->dictionaries, error));
+
+    // Move both block buffers out of the footer BEFORE decoding any headers,
+    // because ArrowIpcDecoderDecodeHeader resets the footer
+    nanoarrow::UniqueBuffer dictionary_blocks;
     nanoarrow::UniqueBuffer record_batch_blocks;
+    ArrowBufferMove(&decoder->footer->dictionary_blocks, 
dictionary_blocks.get());
     ArrowBufferMove(&decoder->footer->record_batch_blocks, 
record_batch_blocks.get());
 
+    // Read dictionary blocks
+    for (int i = 0; i < dictionary_blocks->size_bytes / sizeof(struct 
ArrowIpcFileBlock);
+         i++) {
+      const auto& block =
+          reinterpret_cast<struct 
ArrowIpcFileBlock*>(dictionary_blocks->data)[i];
+      struct ArrowBufferView metadata_view = {
+          {bytes.data() + block.offset},
+          block.metadata_length,
+      };
+      NANOARROW_RETURN_NOT_OK(
+          ArrowIpcDecoderDecodeHeader(decoder.get(), metadata_view, error));
+
+      struct ArrowBufferView body_view = {
+          {metadata_view.data.as_uint8 + metadata_view.size_bytes},
+          block.body_length,
+      };
+      NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeDictionary(
+          decoder.get(), body_view, NANOARROW_VALIDATION_LEVEL_FULL, 
dictionaries.get(),
+          error));
+    }
+
+    // Read record batch blocks
     for (int i = 0;
          i < record_batch_blocks->size_bytes / sizeof(struct 
ArrowIpcFileBlock); i++) {
       const auto& block =
@@ -219,9 +250,9 @@ struct MaterializedArrayStream {
           block.body_length,
       };
       nanoarrow::UniqueArray batch;
-      NANOARROW_RETURN_NOT_OK(
-          ArrowIpcDecoderDecodeArray(decoder.get(), body_view, -1, batch.get(),
-                                     NANOARROW_VALIDATION_LEVEL_FULL, error));
+      NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeArrayWithDictionaries(
+          decoder.get(), body_view, -1, dictionaries.get(), batch.get(),
+          NANOARROW_VALIDATION_LEVEL_FULL, error));
       batches.push_back(std::move(batch));
     }
 
diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c
index 4b0239c1..d9c8e1c4 100644
--- a/src/nanoarrow/ipc/decoder.c
+++ b/src/nanoarrow/ipc/decoder.c
@@ -1225,10 +1225,16 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema* 
schema, ns(Field_table_t)
     case ns(Type_FixedSizeBinary):
       return ArrowIpcDecoderSetTypeFixedSizeBinary(schema, 
ns(Field_type_get(field)),
                                                    error);
+    case ns(Type_BinaryView):
+      ArrowErrorSet(error, "BinaryView not yet supported in IPC reader");
+      return ENOTSUP;
     case ns(Type_Utf8):
       return ArrowIpcDecoderSetTypeSimple(schema, NANOARROW_TYPE_STRING, 
error);
     case ns(Type_LargeUtf8):
       return ArrowIpcDecoderSetTypeSimple(schema, NANOARROW_TYPE_LARGE_STRING, 
error);
+    case ns(Type_Utf8View):
+      ArrowErrorSet(error, "Utf8View not yet supported in IPC reader");
+      return ENOTSUP;
     case ns(Type_Date):
       return ArrowIpcDecoderSetTypeDate(schema, ns(Field_type_get(field)), 
error);
     case ns(Type_Time):
@@ -1248,11 +1254,18 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema* 
schema, ns(Field_table_t)
     case ns(Type_FixedSizeList):
       return ArrowIpcDecoderSetTypeFixedSizeList(schema, 
ns(Field_type_get(field)),
                                                  error);
+    case ns(Type_ListView):
+    case ns(Type_LargeListView):
+      ArrowErrorSet(error, "ListView/LargeListView not yet supported in IPC 
reader");
+      return ENOTSUP;
     case ns(Type_Map):
       return ArrowIpcDecoderSetTypeMap(schema, ns(Field_type_get(field)), 
error);
     case ns(Type_Union):
       return ArrowIpcDecoderSetTypeUnion(schema, ns(Field_type_get(field)), 
n_children,
                                          error);
+    case ns(Type_RunEndEncoded):
+      ArrowErrorSet(error, "RunEndEncoded not yet supported in IPC reader");
+      return ENOTSUP;
     default:
       ArrowErrorSet(error, "Unrecognized Field type with value %d", type_type);
       return EINVAL;
@@ -1885,6 +1898,19 @@ ArrowErrorCode ArrowIpcDecoderDecodeFooter(struct 
ArrowIpcDecoder* decoder,
     record_batches[i].body_length = ns(Block_bodyLength(blocks + i));
   }
 
+  blocks = ns(Footer_dictionaries(footer));
+  n = ns(Block_vec_len(blocks));
+  
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(&private_data->footer.dictionary_blocks,
+                                            sizeof(struct ArrowIpcFileBlock) * 
n,
+                                            /*shrink_to_fit=*/0));
+  struct ArrowIpcFileBlock* dictionaries =
+      (struct ArrowIpcFileBlock*)private_data->footer.dictionary_blocks.data;
+  for (int64_t i = 0; i < n; i++) {
+    dictionaries[i].offset = ns(Block_offset(blocks + i));
+    dictionaries[i].metadata_length = ns(Block_metaDataLength(blocks + i));
+    dictionaries[i].body_length = ns(Block_bodyLength(blocks + i));
+  }
+
   decoder->footer = &private_data->footer;
   return NANOARROW_OK;
 }
diff --git a/src/nanoarrow/ipc/encoder.c b/src/nanoarrow/ipc/encoder.c
index 8b3fc489..27c9da95 100644
--- a/src/nanoarrow/ipc/encoder.c
+++ b/src/nanoarrow/ipc/encoder.c
@@ -629,6 +629,7 @@ ArrowErrorCode ArrowIpcEncoderEncodeSimpleRecordBatch(
 void ArrowIpcFooterInit(struct ArrowIpcFooter* footer) {
   footer->schema.release = NULL;
   ArrowBufferInit(&footer->record_batch_blocks);
+  ArrowBufferInit(&footer->dictionary_blocks);
   ArrowIpcDictionaryEncodingsInit(&footer->dictionaries);
 }
 
@@ -637,6 +638,7 @@ void ArrowIpcFooterReset(struct ArrowIpcFooter* footer) {
     ArrowSchemaRelease(&footer->schema);
   }
   ArrowBufferReset(&footer->record_batch_blocks);
+  ArrowBufferReset(&footer->dictionary_blocks);
   ArrowIpcDictionaryEncodingsReset(&footer->dictionaries);
 }
 
diff --git a/src/nanoarrow/ipc/files_test.cc b/src/nanoarrow/ipc/files_test.cc
index 6b70a0c7..46c7203c 100644
--- a/src/nanoarrow/ipc/files_test.cc
+++ b/src/nanoarrow/ipc/files_test.cc
@@ -491,7 +491,6 @@ INSTANTIATE_TEST_SUITE_P(
         TestFile::OK("generated_datetime.stream"),
         TestFile::OK("generated_decimal.stream"),
         TestFile::OK("generated_decimal256.stream"),
-
         TestFile::OK("generated_duplicate_fieldnames.stream"),
         TestFile::OK("generated_interval.stream"),
         TestFile::OK("generated_map_non_canonical.stream"),
@@ -545,18 +544,10 @@ TEST_P(TestFileFixture, NanoarrowIpcTestFileIPCCheckJSON) 
{
   param.TestIPCCheckJSON(dir_builder.str());
 }
 
-// At least one Windows MSVC version does not allow the #if defined()
-// to be within a macro invocation, so we define these two cases
-// with some repetition.
-#if defined(NANOARROW_IPC_WITH_ZSTD) && defined(NANOARROW_IPC_WITH_LZ4)
 INSTANTIATE_TEST_SUITE_P(
     NanoarrowIpcTest, TestFileFixture,
     ::testing::Values(
         // Testing of other files
-        TestFile::OK("2.0.0-compression/generated_uncompressible_zstd.stream"),
-        TestFile::OK("2.0.0-compression/generated_zstd.stream"),
-        TestFile::OK("2.0.0-compression/generated_uncompressible_lz4.stream"),
-        TestFile::OK("2.0.0-compression/generated_lz4.stream"),
         TestFile::OK("0.17.1/generated_union.stream"),
         TestFile::OK("0.14.1/generated_datetime.stream"),
         TestFile::OK("0.14.1/generated_decimal.stream"),
@@ -565,24 +556,54 @@ INSTANTIATE_TEST_SUITE_P(
         TestFile::OK("0.14.1/generated_nested.stream"),
         TestFile::OK("0.14.1/generated_primitive.stream"),
         TestFile::OK("0.14.1/generated_primitive_no_batches.stream"),
-        TestFile::OK("0.14.1/generated_primitive_zerolength.stream")
+        TestFile::OK("0.14.1/generated_primitive_zerolength.stream"),
+        TestFile::ReadOnly("4.0.0-shareddict/generated_shared_dict.stream"),
+        // cpp-21.0.0 regenerated gold files
+        TestFile::OK("cpp-21.0.0/generated_binary.stream"),
+        TestFile::OK("cpp-21.0.0/generated_binary_no_batches.stream"),
+        TestFile::OK("cpp-21.0.0/generated_binary_zerolength.stream"),
+        TestFile::OK("cpp-21.0.0/generated_custom_metadata.stream"),
+        TestFile::OK("cpp-21.0.0/generated_datetime.stream"),
+        TestFile::OK("cpp-21.0.0/generated_decimal.stream"),
+        TestFile::OK("cpp-21.0.0/generated_decimal256.stream"),
+        TestFile::OK("cpp-21.0.0/generated_decimal32.stream"),
+        TestFile::OK("cpp-21.0.0/generated_decimal64.stream"),
+        TestFile::OK("cpp-21.0.0/generated_duplicate_fieldnames.stream"),
+        TestFile::OK("cpp-21.0.0/generated_duration.stream"),
+        TestFile::OK("cpp-21.0.0/generated_interval.stream"),
+        TestFile::OK("cpp-21.0.0/generated_interval_mdn.stream"),
+        TestFile::OK("cpp-21.0.0/generated_large_binary.stream"),
+        TestFile::OK("cpp-21.0.0/generated_map.stream"),
+        TestFile::OK("cpp-21.0.0/generated_map_non_canonical.stream"),
+        TestFile::OK("cpp-21.0.0/generated_nested.stream"),
+        TestFile::OK("cpp-21.0.0/generated_nested_large_offsets.stream"),
+        TestFile::OK("cpp-21.0.0/generated_null.stream"),
+        TestFile::OK("cpp-21.0.0/generated_null_trivial.stream"),
+        TestFile::OK("cpp-21.0.0/generated_primitive.stream"),
+        TestFile::OK("cpp-21.0.0/generated_primitive_no_batches.stream"),
+        TestFile::OK("cpp-21.0.0/generated_primitive_zerolength.stream"),
+        TestFile::OK("cpp-21.0.0/generated_recursive_nested.stream"),
+        TestFile::OK("cpp-21.0.0/generated_union.stream"),
+        TestFile::ReadOnly("cpp-21.0.0/generated_dictionary.stream"),
+        TestFile::ReadOnly("cpp-21.0.0/generated_dictionary_unsigned.stream"),
+        TestFile::ReadOnly("cpp-21.0.0/generated_extension.stream"),
+        TestFile::ReadOnly("cpp-21.0.0/generated_nested_dictionary.stream"),
+        TestFile::NotSupported("cpp-21.0.0/generated_list_view.stream"),
+        TestFile::NotSupported("cpp-21.0.0/generated_binary_view.stream"),
+        TestFile::NotSupported("cpp-21.0.0/generated_run_end_encoded.stream")
+        // Comment to keep line from wrapping
+        ));
+
+#if defined(NANOARROW_IPC_WITH_ZSTD) && defined(NANOARROW_IPC_WITH_LZ4)
+INSTANTIATE_TEST_SUITE_P(
+    NanoarrowIpcTestCompression, TestFileFixture,
+    ::testing::Values(
+        TestFile::OK("2.0.0-compression/generated_uncompressible_zstd.stream"),
+        TestFile::OK("2.0.0-compression/generated_zstd.stream"),
+        TestFile::OK("2.0.0-compression/generated_uncompressible_lz4.stream"),
+        TestFile::OK("2.0.0-compression/generated_lz4.stream")
         // Comment to keep line from wrapping
         ));
-#else
-INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest, TestFileFixture,
-                         ::testing::Values(
-                             // Testing of other files
-                             TestFile::OK("0.17.1/generated_union.stream"),
-                             TestFile::OK("0.14.1/generated_datetime.stream"),
-                             TestFile::OK("0.14.1/generated_decimal.stream"),
-                             TestFile::OK("0.14.1/generated_interval.stream"),
-                             TestFile::OK("0.14.1/generated_map.stream"),
-                             TestFile::OK("0.14.1/generated_nested.stream"),
-                             TestFile::OK("0.14.1/generated_primitive.stream"),
-                             
TestFile::OK("0.14.1/generated_primitive_no_batches.stream"),
-                             
TestFile::OK("0.14.1/generated_primitive_zerolength.stream")
-                             // Comment to keep line from wrapping
-                             ));
 #endif
 
 #endif
diff --git a/src/nanoarrow/nanoarrow_ipc.h b/src/nanoarrow/nanoarrow_ipc.h
index f4c8ef2c..f0cd71a5 100644
--- a/src/nanoarrow/nanoarrow_ipc.h
+++ b/src/nanoarrow/nanoarrow_ipc.h
@@ -954,6 +954,8 @@ struct ArrowIpcFooter {
   struct ArrowIpcDictionaryEncodings dictionaries;
   /// \brief all blocks containing RecordBatch Messages
   struct ArrowBuffer record_batch_blocks;
+  /// \brief all blocks containing DictionaryBatch Messages
+  struct ArrowBuffer dictionary_blocks;
 };
 
 /// \brief Initialize a footer
diff --git a/src/nanoarrow/testing/testing.cc b/src/nanoarrow/testing/testing.cc
index 597f42ef..df69cebe 100644
--- a/src/nanoarrow/testing/testing.cc
+++ b/src/nanoarrow/testing/testing.cc
@@ -421,6 +421,12 @@ ArrowErrorCode WriteData(std::ostream& out, const 
ArrowArrayView* value,
       break;
     }
 
+    case NANOARROW_TYPE_DECIMAL32:
+      NANOARROW_RETURN_NOT_OK(WriteDecimalData(out, value, 32));
+      break;
+    case NANOARROW_TYPE_DECIMAL64:
+      NANOARROW_RETURN_NOT_OK(WriteDecimalData(out, value, 64));
+      break;
     case NANOARROW_TYPE_DECIMAL128:
       NANOARROW_RETURN_NOT_OK(WriteDecimalData(out, value, 128));
       break;
@@ -508,6 +514,8 @@ ArrowErrorCode WriteTypeFromView(std::ostream& out, const 
ArrowSchemaView* field
     case NANOARROW_TYPE_FIXED_SIZE_BINARY:
       out << R"("name": "fixedsizebinary", "byteWidth": )" << 
field->fixed_size;
       break;
+    case NANOARROW_TYPE_DECIMAL32:
+    case NANOARROW_TYPE_DECIMAL64:
     case NANOARROW_TYPE_DECIMAL128:
     case NANOARROW_TYPE_DECIMAL256:
       out << R"("name": "decimal", "bitWidth": )" << field->decimal_bitwidth
@@ -1154,6 +1162,12 @@ ArrowErrorCode SetTypeDecimal(ArrowSchema* schema, const 
json& value, ArrowError
 
   ArrowType type;
   switch (bit_width_int) {
+    case 32:
+      type = NANOARROW_TYPE_DECIMAL32;
+      break;
+    case 64:
+      type = NANOARROW_TYPE_DECIMAL64;
+      break;
     case 128:
       type = NANOARROW_TYPE_DECIMAL128;
       break;
@@ -1161,7 +1175,7 @@ ArrowErrorCode SetTypeDecimal(ArrowSchema* schema, const 
json& value, ArrowError
       type = NANOARROW_TYPE_DECIMAL256;
       break;
     default:
-      ArrowErrorSet(error, "Type[name=='decimal'] bitWidth must be 128 or 
256");
+      ArrowErrorSet(error, "Type[name=='decimal'] bitWidth must be 32, 64, 128 
or 256");
       return EINVAL;
   }
 
@@ -1918,6 +1932,9 @@ ArrowErrorCode SetBufferDecimal(const json& value, 
ArrowBuffer* buffer, int bitw
   ArrowDecimal decimal;
   ArrowDecimalInit(&decimal, bitwidth, 0, 0);
 
+  // n_words is 0 for decimal32, so calculate byte size from bitwidth directly
+  size_t element_size_bytes = bitwidth / 8;
+
   ArrowStringView item_view;
 
   for (const auto& item : value) {
@@ -1928,8 +1945,7 @@ ArrowErrorCode SetBufferDecimal(const json& value, 
ArrowBuffer* buffer, int bitw
     item_view.size_bytes = item_str.size();
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowDecimalSetDigits(&decimal, 
item_view), error);
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowBufferAppend(buffer, decimal.words, decimal.n_words * 
sizeof(uint64_t)),
-        error);
+        ArrowBufferAppend(buffer, decimal.words, element_size_bytes), error);
   }
 
   return NANOARROW_OK;
@@ -2053,6 +2069,10 @@ ArrowErrorCode SetArrayColumnBuffers(const json& value, 
ArrowArrayView* array_vi
           return SetBufferIntervalDayTime(data, buffer, error);
         case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
           return SetBufferIntervalMonthDayNano(data, buffer, error);
+        case NANOARROW_TYPE_DECIMAL32:
+          return SetBufferDecimal(data, buffer, 32, error);
+        case NANOARROW_TYPE_DECIMAL64:
+          return SetBufferDecimal(data, buffer, 64, error);
         case NANOARROW_TYPE_DECIMAL128:
           return SetBufferDecimal(data, buffer, 128, error);
         case NANOARROW_TYPE_DECIMAL256:
diff --git a/src/nanoarrow/testing/testing_test.cc 
b/src/nanoarrow/testing/testing_test.cc
index 90abef8a..8c3cb4c3 100644
--- a/src/nanoarrow/testing/testing_test.cc
+++ b/src/nanoarrow/testing/testing_test.cc
@@ -1144,6 +1144,12 @@ TEST(NanoarrowTestingTest, 
NanoarrowTestingTestFieldFixedSizeBinary) {
 }
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldDecimal) {
+  TestTypeRoundtrip(
+      R"({"name": "decimal", "bitWidth": 32, "precision": 8, "scale": 3})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0", 
"258"]})");
+  TestTypeRoundtrip(
+      R"({"name": "decimal", "bitWidth": 64, "precision": 10, "scale": 3})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0", 
"258"]})");
   TestTypeRoundtrip(
       R"({"name": "decimal", "bitWidth": 128, "precision": 10, "scale": 3})",
       R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0", 
"258"]})");
@@ -1152,7 +1158,7 @@ TEST(NanoarrowTestingTest, 
NanoarrowTestingTestFieldDecimal) {
       R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0", 
"258"]})");
 
   TestTypeError(R"({"name": "decimal", "bitWidth": 123, "precision": 10, 
"scale": 3})",
-                "Type[name=='decimal'] bitWidth must be 128 or 256");
+                "Type[name=='decimal'] bitWidth must be 32, 64, 128 or 256");
 
   // Ensure that omitted bitWidth maps to decimal128
   TestingJSONReader reader;

(arrow-nanoarrow) branch main updated: chore: Support decimal32, decimal64, and dictionaries in integration test utilities (#880)

Reply via email to