paleolimbot commented on code in PR #330:
URL: https://github.com/apache/arrow-nanoarrow/pull/330#discussion_r1417583444
##########
src/nanoarrow/nanoarrow_testing.hpp:
##########
@@ -1661,6 +1714,305 @@ class TestingJSONReader {
}
};
+/// \brief Integration testing comparison utility
+///
+/// Utility to compare ArrowSchema, ArrowArray, and ArrowArrayStream instances.
+/// This should only be used in the context of integration testing as the
+/// comparison logic is specific to the integration testing JSON files and
+/// specification. Notably:
+///
+/// - Map types are considered equal regardless of the child names "entries",
+/// "key", and "value".
+/// - Float32 and Float64 values are only compared to 3 decimal places.
+class TestingJSONComparison {
+ private:
+ // Internal representation of a human-readable inequality
+ struct Difference {
+ std::string path;
+ std::string actual;
+ std::string expected;
+ };
+
+ public:
+ /// \brief Returns the number of differences found by the previous call
+ size_t num_differences() const { return differences_.size(); }
+
+ /// \brief Dump a human-readable summary of differences to out
+ void WriteDifferences(std::ostream& out) {
+ for (const auto& difference : differences_) {
+ out << "Path: " << difference.path << "\n";
+ out << "- " << difference.actual << "\n";
+ out << "+ " << difference.expected << "\n";
+ out << "\n";
+ }
+ }
+
+ /// \brief Clear any existing differences
+ void ClearDifferences() { differences_.clear(); }
+
+ /// \brief Compare a stream of record batches
+ ///
+ /// Compares actual against expected using the following strategy:
+ ///
+ /// - Compares schemas for equality, returning if differences were found
+ /// - Compares pairs of record batches, returning if one stream finished
+ /// before another.
+ ///
+ /// Returns NANOARROW_OK if the comparison ran without error. Callers must
+ /// query num_differences() to obtain the result of the comparison on
success.
+ ArrowErrorCode CompareArrayStream(ArrowArrayStream* actual,
ArrowArrayStream* expected,
+ ArrowError* error = nullptr) {
+ // Read both schemas
+ nanoarrow::UniqueSchema actual_schema;
+ nanoarrow::UniqueSchema expected_schema;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(actual->get_schema(actual,
actual_schema.get()),
+ error);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ expected->get_schema(expected, expected_schema.get()), error);
+
+ // Compare them and return if they are not equal
+ NANOARROW_RETURN_NOT_OK(
+ CompareSchema(expected_schema.get(), actual_schema.get(), error,
"Schema"));
+ if (num_differences() > 0) {
+ return NANOARROW_OK;
+ }
+
+ // Keep a record of the schema to compare batches
+ NANOARROW_RETURN_NOT_OK(SetSchema(expected_schema.get(), error));
+
+ int64_t n_batches = -1;
+ nanoarrow::UniqueArray actual_array;
+ nanoarrow::UniqueArray expected_array;
+ do {
+ n_batches++;
+ std::string batch_label = std::string("Batch ") +
std::to_string(n_batches);
+
+ // Read a batch from each stream
+ actual_array.reset();
+ expected_array.reset();
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(actual->get_next(actual,
actual_array.get()),
+ error);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ expected->get_next(expected, expected_array.get()), error);
+
+ // Check the finished/unfinished status of both streams
+ if (actual_array->release == nullptr && expected_array->release !=
nullptr) {
+ differences_.push_back({batch_label, "finished stream", "unfinished
stream"});
+ return NANOARROW_OK;
+ }
+
+ if (actual_array->release != nullptr && expected_array->release ==
nullptr) {
+ differences_.push_back({batch_label, "unfinished stream", "finished
stream"});
+ return NANOARROW_OK;
+ }
+
+ // If both streams are done, break
+ if (actual_array->release == nullptr) {
+ break;
+ }
+
+ // Compare this batch
+ NANOARROW_RETURN_NOT_OK(
+ CompareBatch(actual_array.get(), expected_array.get(), error,
batch_label));
+ } while (true);
+
+ return NANOARROW_OK;
+ }
+
+ /// \brief Compare a top-level ArrowSchema struct
+ ///
+ /// Returns NANOARROW_OK if the comparison ran without error. Callers must
+ /// query num_differences() to obtain the result of the comparison on
success.
+ ArrowErrorCode CompareSchema(const ArrowSchema* actual, const ArrowSchema*
expected,
+ ArrowError* error = nullptr,
Review Comment:
It's null in the tests (and in general, the `error` member can always be
null in nanoarrow if the caller doesn't want/need verbose output)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]