bkietz commented on code in PR #578:
URL: https://github.com/apache/arrow-nanoarrow/pull/578#discussion_r1710106120
##########
src/nanoarrow/common/array.c:
##########
@@ -1335,3 +1335,177 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
ArrowErrorSet(error, "validation_level not recognized");
return EINVAL;
}
+
+struct ArrowComparisonInternalState {
+ enum ArrowCompareLevel level;
+ int is_equal;
+ struct ArrowBuffer path;
+ struct ArrowError reason;
+};
+
+#define RETURN_NOT_EQUAL_IMPL(state_, path_size_, cond_, reason_) \
+ do { \
+ if (cond_) { \
+ ArrowErrorSet(&state_->reason, "%s", reason_); \
+ state_->path.size_bytes = (path_size_); \
+ state_->is_equal = 0; \
+ return NANOARROW_OK; \
+ } \
+ } while (0)
+
+#define RETURN_NOT_EQUAL(state_, path_size_, condition_) \
+ RETURN_NOT_EQUAL_IMPL(state_, path_size_, condition_, #condition_)
+
+static ArrowErrorCode ArrowArrayViewCompareStructure(
+ const struct ArrowArrayView* actual, const struct ArrowArrayView* expected,
+ struct ArrowComparisonInternalState* state) {
+ int64_t path_size = state->path.size_bytes;
+
+ RETURN_NOT_EQUAL(state, path_size, actual->storage_type !=
expected->storage_type);
+ RETURN_NOT_EQUAL(state, path_size, actual->n_children !=
expected->n_children);
+ RETURN_NOT_EQUAL(state, path_size,
+ actual->dictionary == NULL && expected->dictionary != NULL);
+ RETURN_NOT_EQUAL(state, path_size,
+ actual->dictionary != NULL && expected->dictionary == NULL);
+
+ char child_id[128];
+ for (int64_t i = 0; i < actual->n_children; i++) {
+ snprintf(child_id, sizeof(child_id), ".children[%" PRId64 "]", i);
+ state->path.size_bytes = path_size;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendStringView(&state->path, ArrowCharView(child_id)));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewCompareStructure(actual->children[i],
+
expected->children[i], state));
+ if (!state->is_equal) {
+ return NANOARROW_OK;
+ }
+ }
+
+ if (actual->dictionary != NULL) {
+ state->path.size_bytes = path_size;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendStringView(&state->path,
ArrowCharView(".dictionary")));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewCompareStructure(actual->dictionary,
expected->dictionary, state));
+ if (!state->is_equal) {
+ return NANOARROW_OK;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayViewCompareIdentical(
+ const struct ArrowArrayView* actual, const struct ArrowArrayView* expected,
+ struct ArrowComparisonInternalState* state) {
+ int64_t path_size = state->path.size_bytes;
+
+ RETURN_NOT_EQUAL(state, path_size, actual->length != expected->length);
+ RETURN_NOT_EQUAL(state, path_size, actual->offset != expected->offset);
+ RETURN_NOT_EQUAL(state, path_size, actual->null_count !=
expected->null_count);
+
+ char child_id[128];
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ snprintf(child_id, sizeof(child_id), ".buffers[%d]", i);
+ state->path.size_bytes = path_size;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendStringView(&state->path, ArrowCharView(child_id)));
+
+ RETURN_NOT_EQUAL(
+ state, state->path.size_bytes,
+ actual->buffer_views[i].size_bytes !=
expected->buffer_views[i].size_bytes);
+ int64_t buffer_size = actual->buffer_views[i].size_bytes;
+ if (buffer_size == 0) {
+ continue;
+ }
+
+ RETURN_NOT_EQUAL(state, state->path.size_bytes,
+ memcmp(actual->buffer_views[i].data.data,
+ expected->buffer_views[i].data.data, buffer_size)
!= 0);
Review Comment:
This should be fine for now, but there are some cases where this will not be
sufficient. For example:
- if you have two int32 arrays both of which are `[1, null, 3]` they might
still memcmp unequal if the values "under" the null bit are not identical
- if you have two utf8 arrays both of which are `["hello", "world"]` the
might still memcmp unequal if their first offsets aren't identical
##########
src/nanoarrow/nanoarrow.h:
##########
@@ -1064,6 +1065,19 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
enum ArrowValidationLevel
validation_level,
struct ArrowError* error);
+/// \brief Compare two ArrowArrayView objects for equality
+///
+/// Given two ArrowArrayView instances, place either 0 (not equal) and
+/// 1 (equal) at the address pointed to by out. If the comparison determines
+/// that actual and expected are not equal, a reason will be communicated via
+/// error if error is non-NULL.
+///
+/// Returns NANOARROW_OK if the comparison completed successfully.
+ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView* expected,
+ enum ArrowCompareLevel level, int* out,
+ struct ArrowError* error);
Review Comment:
This should probably not be an `ArrowError` since it isn't
really/necessarily communicating an error. For another thing, it might be
desirable to add `NANOARROW_COMPARE_DIFF` later (diffing has been very useful
in arrow C++) or other comparison levels where we'd like more than 1024 bytes
of explanation.
```suggestion
char** explanation);
```
```suggestion
struct ArrowBuffer* explanation);
```
##########
src/nanoarrow/nanoarrow.h:
##########
@@ -1064,6 +1065,19 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
enum ArrowValidationLevel
validation_level,
struct ArrowError* error);
+/// \brief Compare two ArrowArrayView objects for equality
+///
+/// Given two ArrowArrayView instances, place either 0 (not equal) and
+/// 1 (equal) at the address pointed to by out. If the comparison determines
+/// that actual and expected are not equal, a reason will be communicated via
+/// error if error is non-NULL.
+///
+/// Returns NANOARROW_OK if the comparison completed successfully.
+ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView* expected,
+ enum ArrowCompareLevel level, int* out,
+ struct ArrowError* error);
Review Comment:
(especially in the context of testing but also in general) I think array
comparison shouldn't include validation and none of the allocations would be
huge; what other sources of actual errors apply?
##########
src/nanoarrow/common/array.c:
##########
@@ -1335,3 +1335,177 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
ArrowErrorSet(error, "validation_level not recognized");
return EINVAL;
}
+
+struct ArrowComparisonInternalState {
+ enum ArrowCompareLevel level;
+ int is_equal;
+ struct ArrowBuffer path;
+ struct ArrowError reason;
+};
+
+#define RETURN_NOT_EQUAL_IMPL(state_, path_size_, cond_, reason_) \
+ do { \
+ if (cond_) { \
+ ArrowErrorSet(&state_->reason, "%s", reason_); \
+ state_->path.size_bytes = (path_size_); \
+ state_->is_equal = 0; \
+ return NANOARROW_OK; \
+ } \
+ } while (0)
+
+#define RETURN_NOT_EQUAL(state_, path_size_, condition_) \
+ RETURN_NOT_EQUAL_IMPL(state_, path_size_, condition_, #condition_)
+
+static ArrowErrorCode ArrowArrayViewCompareStructure(
+ const struct ArrowArrayView* actual, const struct ArrowArrayView* expected,
+ struct ArrowComparisonInternalState* state) {
+ int64_t path_size = state->path.size_bytes;
+
+ RETURN_NOT_EQUAL(state, path_size, actual->storage_type !=
expected->storage_type);
+ RETURN_NOT_EQUAL(state, path_size, actual->n_children !=
expected->n_children);
+ RETURN_NOT_EQUAL(state, path_size,
+ actual->dictionary == NULL && expected->dictionary != NULL);
+ RETURN_NOT_EQUAL(state, path_size,
+ actual->dictionary != NULL && expected->dictionary == NULL);
+
Review Comment:
Should we compare layouts or union type ids here?
##########
src/nanoarrow/common/array.c:
##########
@@ -1335,3 +1335,177 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
ArrowErrorSet(error, "validation_level not recognized");
return EINVAL;
}
+
+struct ArrowComparisonInternalState {
+ enum ArrowCompareLevel level;
+ int is_equal;
+ struct ArrowBuffer path;
+ struct ArrowError reason;
+};
+
+#define RETURN_NOT_EQUAL_IMPL(state_, path_size_, cond_, reason_) \
+ do { \
+ if (cond_) { \
+ ArrowErrorSet(&state_->reason, "%s", reason_); \
+ state_->path.size_bytes = (path_size_); \
+ state_->is_equal = 0; \
+ return NANOARROW_OK; \
+ } \
+ } while (0)
+
+#define RETURN_NOT_EQUAL(state_, path_size_, condition_) \
+ RETURN_NOT_EQUAL_IMPL(state_, path_size_, condition_, #condition_)
+
+static ArrowErrorCode ArrowArrayViewCompareStructure(
+ const struct ArrowArrayView* actual, const struct ArrowArrayView* expected,
+ struct ArrowComparisonInternalState* state) {
+ int64_t path_size = state->path.size_bytes;
+
+ RETURN_NOT_EQUAL(state, path_size, actual->storage_type !=
expected->storage_type);
Review Comment:
Nits:
- could this be named `RETURN_UNLESS` instead? the condition isn't
necessarily equality comparison
- could the condition be the first argument? I think that'd be more readable
- IIUC we could eliminate the separate path_size argument
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]