This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a953a51a143 [Fix](Column) refactor ColumnNullable to provide flags
safety (#40769)
a953a51a143 is described below
commit a953a51a143a6e6b7ae9e3e121824422f6edc264
Author: zclllhhjj <[email protected]>
AuthorDate: Sat Sep 14 10:22:04 2024 +0800
[Fix](Column) refactor ColumnNullable to provide flags safety (#40769)
## Proposed changes
Issue Number: close #xxx
In this pr we move null_map and relative flags to an individual base
class to constrain its visibility.
please reed the comment carefully
test in master:
```
xxx/column_nullable_test.cpp:187: Failure
Value of: null_dst->has_null()
Actual: false
Expected: true
Error occurred in case0
[ FAILED ] ColumnNullableTest.PredicateTest (0 ms)
[ RUN ] ColumnNullableTest.HashTest
[ OK ] ColumnNullableTest.HashTest (0 ms)
[----------] 3 tests from ColumnNullableTest (0 ms total)
[----------] Global test environment tear-down
[==========] 3 tests from 1 test suite ran. (1 ms total)
[ PASSED ] 2 tests.
[ FAILED ] 1 test, listed below:
[ FAILED ] ColumnNullableTest.PredicateTest
```
and success after this pr
---------
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/vec/columns/column_nullable.cpp | 85 +++++-------
be/src/vec/columns/column_nullable.h | 153 ++++++++++++++-------
.../column_nullable_seriazlization_test.cpp | 75 +---------
be/test/vec/columns/column_nullable_test.cpp | 112 +++++++++++++++
be/test/vec/columns/column_nullable_test.h | 105 ++++++++++++++
be/test/vec/columns/column_resize_test.cpp | 1 -
be/test/vec/function/function_test_util.h | 8 ++
.../correctness/test_column_nullable_cache.out | 6 +
.../correctness/test_column_nullable_cache.groovy | 57 ++++++++
9 files changed, 426 insertions(+), 176 deletions(-)
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index 483ed5ca6cd..dbee5a2025a 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -31,18 +31,19 @@
namespace doris::vectorized {
ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_,
MutableColumnPtr&& null_map_)
- : nested_column(std::move(nested_column_)),
null_map(std::move(null_map_)) {
+ : NullMapProvider(std::move(null_map_)),
nested_column(std::move(nested_column_)) {
/// ColumnNullable cannot have constant nested column. But constant
argument could be passed. Materialize it.
nested_column = get_nested_column().convert_to_full_column_if_const();
// after convert const column to full column, it may be a nullable column
if (nested_column->is_nullable()) {
- assert_cast<ColumnNullable&>(*nested_column).apply_null_map((const
ColumnUInt8&)*null_map);
- null_map =
assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr();
+ assert_cast<ColumnNullable&>(*nested_column)
+ .apply_null_map(static_cast<const
ColumnUInt8&>(get_null_map_column()));
+
reset_null_map(assert_cast<ColumnNullable&>(*nested_column).get_null_map_column_ptr());
nested_column =
assert_cast<ColumnNullable&>(*nested_column).get_nested_column_ptr();
}
- if (is_column_const(*null_map)) {
+ if (is_column_const(get_null_map_column())) [[unlikely]] {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnNullable cannot have constant null map");
__builtin_unreachable();
@@ -69,7 +70,7 @@ void ColumnNullable::update_xxHash_with_value(size_t start,
size_t end, uint64_t
nested_column->update_xxHash_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
- assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+ assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::xxHash64NullWithSeed(hash);
@@ -85,7 +86,7 @@ void ColumnNullable::update_crc_with_value(size_t start,
size_t end, uint32_t& h
nested_column->update_crc_with_value(start, end, hash, nullptr);
} else {
const auto* __restrict real_null_data =
- assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+ assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
for (int i = start; i < end; ++i) {
if (real_null_data[i] != 0) {
hash = HashUtil::zlib_crc_hash_null(hash);
@@ -110,7 +111,7 @@ void ColumnNullable::update_crcs_with_value(uint32_t*
__restrict hashes, doris::
auto s = rows;
DCHECK(s == size());
const auto* __restrict real_null_data =
- assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+ assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_crcs_with_value(hashes, type, rows, offset,
nullptr);
} else {
@@ -128,7 +129,7 @@ void ColumnNullable::update_hashes_with_value(uint64_t*
__restrict hashes,
DCHECK(null_data == nullptr);
auto s = size();
const auto* __restrict real_null_data =
- assert_cast<const ColumnUInt8&>(*null_map).get_data().data();
+ assert_cast<const
ColumnUInt8&>(get_null_map_column()).get_data().data();
if (!has_null()) {
nested_column->update_hashes_with_value(hashes, nullptr);
} else {
@@ -183,24 +184,24 @@ StringRef ColumnNullable::get_data_at(size_t n) const {
void ColumnNullable::insert_data(const char* pos, size_t length) {
if (pos == nullptr) {
get_nested_column().insert_default();
- _get_null_map_data().push_back(1);
+ get_null_map_data().push_back(1);
_has_null = true;
+ _need_update_has_null = false;
} else {
get_nested_column().insert_data(pos, length);
- _get_null_map_data().push_back(0);
+ _push_false_to_nullmap(1);
}
}
void ColumnNullable::insert_many_strings(const StringRef* strings, size_t num)
{
- auto& null_map_data = _get_null_map_data();
for (size_t i = 0; i != num; ++i) {
if (strings[i].data == nullptr) {
nested_column->insert_default();
- null_map_data.push_back(1);
+ get_null_map_data().push_back(1);
_has_null = true;
} else {
nested_column->insert_data(strings[i].data, strings[i].size);
- null_map_data.push_back(0);
+ _push_false_to_nullmap(1);
}
}
}
@@ -227,13 +228,14 @@ const char*
ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
UInt8 val = *reinterpret_cast<const UInt8*>(pos);
pos += sizeof(val);
- _get_null_map_data().push_back(val);
+ get_null_map_data().push_back(val);
if (val == 0) {
pos = get_nested_column().deserialize_and_insert_from_arena(pos);
} else {
get_nested_column().insert_default();
_has_null = true;
+ _need_update_has_null = false;
}
return pos;
@@ -251,7 +253,7 @@ void ColumnNullable::serialize_vec(std::vector<StringRef>&
keys, size_t num_rows
}
void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const
size_t num_rows) {
- auto& arr = _get_null_map_data();
+ auto& arr = get_null_map_data();
const size_t old_size = arr.size();
arr.resize(old_size + num_rows);
@@ -274,21 +276,15 @@ void
ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t
void ColumnNullable::insert_range_from_ignore_overflow(const
doris::vectorized::IColumn& src,
size_t start, size_t
length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
- _get_null_map_column().insert_range_from(*nullable_col.null_map, start,
length);
+
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(),
start, length);
get_nested_column().insert_range_from_ignore_overflow(*nullable_col.nested_column,
start,
length);
- const auto& src_null_map_data = nullable_col.get_null_map_data();
- _has_null = has_null();
- _has_null |= simd::contain_byte(src_null_map_data.data() + start, length,
1);
}
void ColumnNullable::insert_range_from(const IColumn& src, size_t start,
size_t length) {
const auto& nullable_col = assert_cast<const ColumnNullable&>(src);
- _get_null_map_column().insert_range_from(*nullable_col.null_map, start,
length);
+
get_null_map_column().insert_range_from(nullable_col.get_null_map_column(),
start, length);
get_nested_column().insert_range_from(*nullable_col.nested_column, start,
length);
- const auto& src_null_map_data = nullable_col.get_null_map_data();
- _has_null = has_null();
- _has_null |= simd::contain_byte(src_null_map_data.data() + start, length,
1);
}
void ColumnNullable::insert_indices_from(const IColumn& src, const uint32_t*
indices_begin,
@@ -296,9 +292,8 @@ void ColumnNullable::insert_indices_from(const IColumn&
src, const uint32_t* ind
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(),
indices_begin,
indices_end);
-
_get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(),
indices_begin,
- indices_end);
- _need_update_has_null = true;
+
get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(),
indices_begin,
+ indices_end);
}
void ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
@@ -307,17 +302,18 @@ void
ColumnNullable::insert_indices_from_not_has_null(const IColumn& src,
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_indices_from(src_concrete.get_nested_column(),
indices_begin,
indices_end);
- _get_null_map_column().insert_many_defaults(indices_end - indices_begin);
+ _push_false_to_nullmap(indices_end - indices_begin);
}
void ColumnNullable::insert(const Field& x) {
if (x.is_null()) {
get_nested_column().insert_default();
- _get_null_map_data().push_back(1);
+ get_null_map_data().push_back(1);
_has_null = true;
+ _need_update_has_null = false;
} else {
get_nested_column().insert(x);
- _get_null_map_data().push_back(0);
+ _push_false_to_nullmap(1);
}
}
@@ -325,19 +321,18 @@ void ColumnNullable::insert_from(const IColumn& src,
size_t n) {
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
auto is_null = src_concrete.get_null_map_data()[n];
- _has_null |= is_null;
- _get_null_map_data().push_back(is_null);
+ get_null_map_data().push_back(is_null);
}
void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
get_nested_column().insert_from(src, n);
- _get_null_map_data().push_back(0);
+ _push_false_to_nullmap(1);
}
void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t
start,
size_t length) {
get_nested_column().insert_range_from(src, start, length);
- _get_null_map_data().resize_fill(_get_null_map_data().size() + length, 0);
+ _push_false_to_nullmap(length);
}
void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t
position,
@@ -366,15 +361,14 @@ size_t ColumnNullable::filter(const Filter& filter) {
}
Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t
sel_size, IColumn* col_ptr) {
- const auto* nullable_col_ptr = reinterpret_cast<const
ColumnNullable*>(col_ptr);
+ auto* nullable_col_ptr = assert_cast<ColumnNullable*>(col_ptr);
ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
- ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
+
+ /// `get_null_map_data` will set `_need_update_has_null` to true
+ auto& res_nullmap = nullable_col_ptr->get_null_map_data();
+
RETURN_IF_ERROR(get_nested_column().filter_by_selector(
sel, sel_size,
const_cast<doris::vectorized::IColumn*>(nest_col_ptr.get())));
- //insert cur nullmap into result nullmap which is empty
- auto& res_nullmap = reinterpret_cast<vectorized::ColumnVector<UInt8>*>(
-
const_cast<doris::vectorized::IColumn*>(null_map_ptr.get()))
- ->get_data();
DCHECK(res_nullmap.empty());
res_nullmap.resize(sel_size);
auto& cur_nullmap = get_null_map_column().get_data();
@@ -522,15 +516,10 @@ void ColumnNullable::get_permutation(bool reverse, size_t
limit, int null_direct
}
}
}
-//
-//void ColumnNullable::gather(ColumnGathererStream & gatherer)
-//{
-// gatherer.gather(*this);
-//}
void ColumnNullable::reserve(size_t n) {
get_nested_column().reserve(n);
- _get_null_map_data().reserve(n);
+ get_null_map_data(false).reserve(n);
}
void ColumnNullable::resize(size_t n) {
@@ -582,7 +571,7 @@ void ColumnNullable::apply_null_map(const ColumnNullable&
other) {
}
void ColumnNullable::check_consistency() const {
- if (null_map->size() != get_nested_column().size()) {
+ if (get_null_map_column().size() != get_nested_column().size()) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Sizes of nested column and null map of Nullable
column are not equal");
}
@@ -596,8 +585,8 @@ void ColumnNullable::sort_column(const ColumnSorter*
sorter, EqualFlags& flags,
}
void ColumnNullable::_update_has_null() {
- const UInt8* null_pos = _get_null_map_data().data();
- _has_null = simd::contain_byte(null_pos, _get_null_map_data().size(), 1);
+ const UInt8* null_pos = get_null_map_data().data();
+ _has_null = simd::contain_byte(null_pos, get_null_map_data().size(), 1);
_need_update_has_null = false;
}
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 5425242aad7..7772b6e80ad 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -21,7 +21,6 @@
#pragma once
#include <functional>
-#include <ostream>
#include <string>
#include <type_traits>
#include <utility>
@@ -50,6 +49,69 @@ class ColumnSorter;
using NullMap = ColumnUInt8::Container;
using ConstNullMapPtr = const NullMap*;
+/// use this to avoid directly access null_map forgetting modify
_need_update_has_null. see more in inner comments
+class NullMapProvider {
+public:
+ NullMapProvider() = default;
+ NullMapProvider(MutableColumnPtr&& null_map) :
_null_map(std::move(null_map)) {}
+ void reset_null_map(MutableColumnPtr&& null_map) { _null_map =
std::move(null_map); }
+
+ // return the column that represents the byte map. if want use null_map,
just call this.
+ const ColumnPtr& get_null_map_column_ptr() const { return _null_map; }
+ // for functions getting nullmap, we assume it will modify it. so set
`_need_update_has_null` to true. if you know it wouldn't,
+ // call with arg false. but for the ops which will set _has_null
themselves, call `update_has_null()`
+ MutableColumnPtr get_null_map_column_ptr(bool may_change = true) {
+ if (may_change) {
+ _need_update_has_null = true;
+ }
+ return _null_map->assume_mutable();
+ }
+ ColumnUInt8::WrappedPtr& get_null_map(bool may_change = true) {
+ if (may_change) {
+ _need_update_has_null = true;
+ }
+ return _null_map;
+ }
+
+ ColumnUInt8& get_null_map_column(bool may_change = true) {
+ if (may_change) {
+ _need_update_has_null = true;
+ }
+ return assert_cast<ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
+ }
+ const ColumnUInt8& get_null_map_column() const {
+ return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*_null_map);
+ }
+
+ NullMap& get_null_map_data(bool may_change = true) {
+ return get_null_map_column(may_change).get_data();
+ }
+ const NullMap& get_null_map_data() const { return
get_null_map_column().get_data(); }
+
+ void clear_null_map() {
assert_cast<ColumnUInt8*>(_null_map.get())->clear(); }
+
+ void update_has_null(bool new_value) {
+ _has_null = new_value;
+ _need_update_has_null = false;
+ }
+
+protected:
+ /**
+ * Here we have three variables which serve for `has_null()` judgement. If
we have known the nullity of object, no need
+ * to check through the `null_map` to get the answer until the next time
we modify it. Here `_has_null` is just the answer
+ * we cached. `_need_update_has_null` indicates there's modification or
not since we got `_has_null()` last time. So in
+ * `_has_null()` we can check the two vars to know if there's need to
update `has_null` or not.
+ * If you just want QUERY BUT NOT MODIFY, make sure the caller is const.
There will be no perf overhead for const overload.
+ * Otherwise, this class, as the base class, will make it no possible to
directly visit `null_map` forgetting to change the
+ * protected flags. Just call the interface is ok.
+ */
+ bool _need_update_has_null = true;
+ bool _has_null = true;
+
+private:
+ IColumn::WrappedPtr _null_map;
+};
+
/// Class that specifies nullable columns. A nullable column represents
/// a column, which may have any type, provided with the possibility of
/// storing NULL values. For this purpose, a ColumnNullable object stores
@@ -59,7 +121,7 @@ using ConstNullMapPtr = const NullMap*;
/// over a bitmap because columns are usually stored on disk as compressed
/// files. In this regard, using a bitmap instead of a byte map would
/// greatly complicate the implementation with little to no benefits.
-class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
+class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public
NullMapProvider {
private:
friend class COWHelper<IColumn, ColumnNullable>;
@@ -89,10 +151,11 @@ public:
std::string get_name() const override { return "Nullable(" +
nested_column->get_name() + ")"; }
MutableColumnPtr clone_resized(size_t size) const override;
size_t size() const override {
- return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*null_map).size();
+ return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(get_null_map_column())
+ .size();
}
PURE bool is_null_at(size_t n) const override {
- return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*null_map)
+ return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(get_null_map_column())
.get_data()[n] != 0;
}
Field operator[](size_t n) const override;
@@ -141,8 +204,13 @@ public:
assert_cast<ColumnType*>(nested_column.get())
->insert_from(src_concrete.get_nested_column(), n);
auto is_null = src_concrete.get_null_map_data()[n];
- _has_null |= is_null;
- _get_null_map_data().push_back(is_null);
+ if (is_null) {
+ get_null_map_data().push_back(1);
+ _has_null = true;
+ _need_update_has_null = false;
+ } else {
+ _push_false_to_nullmap(1);
+ }
}
void insert_from_not_nullable(const IColumn& src, size_t n);
@@ -150,19 +218,19 @@ public:
void insert_many_from_not_nullable(const IColumn& src, size_t position,
size_t length);
void insert_many_fix_len_data(const char* pos, size_t num) override {
- _get_null_map_column().insert_many_vals(0, num);
+ _push_false_to_nullmap(num);
get_nested_column().insert_many_fix_len_data(pos, num);
}
void insert_many_raw_data(const char* pos, size_t num) override {
DCHECK(pos);
- _get_null_map_column().insert_many_vals(0, num);
+ _push_false_to_nullmap(num);
get_nested_column().insert_many_raw_data(pos, num);
}
void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict,
size_t data_num, uint32_t dict_num) override {
- _get_null_map_column().insert_many_vals(0, data_num);
+ _push_false_to_nullmap(data_num);
get_nested_column().insert_many_dict_data(data_array, start_index,
dict, data_num,
dict_num);
}
@@ -172,38 +240,40 @@ public:
if (UNLIKELY(num == 0)) {
return;
}
- _get_null_map_column().insert_many_vals(0, num);
+ _push_false_to_nullmap(num);
get_nested_column().insert_many_continuous_binary_data(data, offsets,
num);
}
void insert_many_binary_data(char* data_array, uint32_t* len_array,
uint32_t* start_offset_array, size_t num)
override {
- _get_null_map_column().insert_many_vals(0, num);
+ _push_false_to_nullmap(num);
get_nested_column().insert_many_binary_data(data_array, len_array,
start_offset_array, num);
}
void insert_default() override {
get_nested_column().insert_default();
- _get_null_map_data().push_back(1);
+ get_null_map_data().push_back(1);
_has_null = true;
+ _need_update_has_null = false;
}
void insert_many_defaults(size_t length) override {
get_nested_column().insert_many_defaults(length);
- _get_null_map_data().resize_fill(_get_null_map_data().size() + length,
1);
+ get_null_map_data().resize_fill(get_null_map_data().size() + length,
1);
_has_null = true;
+ _need_update_has_null = false;
}
void insert_not_null_elements(size_t num) {
get_nested_column().insert_many_defaults(num);
- _get_null_map_column().insert_many_vals(0, num);
- _has_null = false;
+ _push_false_to_nullmap(num);
}
void insert_null_elements(int num) {
get_nested_column().insert_many_defaults(num);
- _get_null_map_column().insert_many_vals(1, num);
+ get_null_map_column().insert_many_vals(1, num);
_has_null = true;
+ _need_update_has_null = false;
}
void pop_back(size_t n) override;
@@ -255,7 +325,7 @@ public:
void for_each_subcolumn(ColumnCallback callback) override {
callback(nested_column);
- callback(null_map);
+ callback(get_null_map());
}
bool structure_equals(const IColumn& rhs) const override {
@@ -281,11 +351,13 @@ public:
bool is_fixed_and_contiguous() const override { return false; }
bool is_exclusive() const override {
- return IColumn::is_exclusive() && nested_column->is_exclusive() &&
null_map->is_exclusive();
+ return IColumn::is_exclusive() && nested_column->is_exclusive() &&
+ get_null_map_column().is_exclusive();
}
size_t size_of_value_if_fixed() const override {
- return null_map->size_of_value_if_fixed() +
nested_column->size_of_value_if_fixed();
+ return get_null_map_column().size_of_value_if_fixed() +
+ nested_column->size_of_value_if_fixed();
}
bool only_null() const override { return size() == 1 && is_null_at(0); }
@@ -301,32 +373,12 @@ public:
MutableColumnPtr get_nested_column_ptr() { return
nested_column->assume_mutable(); }
- /// Return the column that represents the byte map.
- const ColumnPtr& get_null_map_column_ptr() const { return null_map; }
-
- MutableColumnPtr get_null_map_column_ptr() {
- _need_update_has_null = true;
- return null_map->assume_mutable();
- }
-
- ColumnUInt8& get_null_map_column() {
- _need_update_has_null = true;
- return assert_cast<ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*null_map);
- }
- const ColumnUInt8& get_null_map_column() const {
- return assert_cast<const ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*null_map);
- }
-
void clear() override {
- null_map->clear();
+ clear_null_map();
nested_column->clear();
_has_null = false;
}
- NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
-
- const NullMap& get_null_map_data() const { return
get_null_map_column().get_data(); }
-
/// Apply the null byte map of a specified nullable column onto the
/// null byte map of the current column by performing an element-wise OR
/// between both byte maps. This method is used to determine the null byte
@@ -352,7 +404,8 @@ public:
DCHECK(size() > self_row);
const auto& nullable_rhs =
assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(rhs);
- null_map->replace_column_data(*nullable_rhs.null_map, row, self_row);
+
get_null_map_column().replace_column_data(nullable_rhs.get_null_map_column(),
row,
+ self_row);
if (!nullable_rhs.is_null_at(row)) {
nested_column->replace_column_data(*nullable_rhs.nested_column,
row, self_row);
@@ -413,21 +466,15 @@ public:
}
private:
- // the two functions will not update `_need_update_has_null`
- ColumnUInt8& _get_null_map_column() {
- return assert_cast<ColumnUInt8&,
TypeCheckOnRelease::DISABLE>(*null_map);
- }
- NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); }
-
- WrappedPtr nested_column;
- WrappedPtr null_map;
-
- bool _need_update_has_null = true;
- bool _has_null = true;
-
void _update_has_null();
+
template <bool negative>
void apply_null_map_impl(const ColumnUInt8& map);
+
+ // push not null value wouldn't change the nullity. no need to update
_has_null
+ void _push_false_to_nullmap(size_t num) {
get_null_map_column(false).insert_many_vals(0, num); }
+
+ WrappedPtr nested_column;
};
ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);
diff --git a/be/test/vec/columns/column_nullable_seriazlization_test.cpp
b/be/test/vec/columns/column_nullable_seriazlization_test.cpp
index 635916328ba..70702369cd3 100644
--- a/be/test/vec/columns/column_nullable_seriazlization_test.cpp
+++ b/be/test/vec/columns/column_nullable_seriazlization_test.cpp
@@ -20,92 +20,19 @@
#include <cstddef>
#include <cstdlib>
-#include <type_traits>
+#include "column_nullable_test.h"
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/common/arena.h"
#include "vec/common/string_ref.h"
-#include "vec/core/field.h"
#include "vec/core/types.h"
-#include "vec/data_types/data_type.h"
-#include "vec/data_types/data_type_date_time.h"
-#include "vec/data_types/data_type_string.h"
using namespace doris;
using namespace doris::vectorized;
-static std::string generate_random_string(size_t max_length) {
- std::srand(std::time(nullptr)); // use current time as seed for random
generator
-
- if (max_length == 0) {
- return "";
- }
-
- auto randbyte = []() -> char {
- // generate a random byte, in range [0x00, 0xFF]
- return static_cast<char>(rand() % 256);
- };
-
- std::string str(max_length, 0);
- std::generate_n(str.begin(), max_length, randbyte);
-
- return str;
-}
-
-static MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null
= false,
- bool all_not_null = false) {
- std::srand(std::time(nullptr)); // use current time as seed for random
generator
- auto null_map = ColumnUInt8::create();
- for (size_t i = 0; i < input_rows_count; ++i) {
- if (all_null) {
- null_map->insert(1);
- } else if (all_not_null) {
- null_map->insert(0);
- } else {
- null_map->insert(rand() % 2);
- }
- }
- return null_map;
-}
-
-template <typename T>
-static MutableColumnPtr create_nested_column(size_t input_rows_count) {
- MutableColumnPtr column;
- if constexpr (std::is_integral_v<T>) {
- column = ColumnVector<T>::create();
- } else if constexpr (std::is_same_v<T, String>) {
- column = ColumnString::create();
- } else if constexpr (std::is_same_v<T, Decimal64>) {
- column = ColumnDecimal64::create(0, 6);
- }
-
- for (size_t i = 0; i < input_rows_count; ++i) {
- if constexpr (std::is_integral_v<T>) {
- column->insert(rand() % std::numeric_limits<T>::max());
- } else if constexpr (std::is_same_v<T, String>) {
- column->insert(generate_random_string(rand() % 512));
- } else if constexpr (std::is_same_v<T, Decimal64>) {
- column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
- } else {
- throw std::runtime_error("Unsupported type");
- }
- }
-
- return column;
-}
-
-template <typename T>
-static ColumnNullable::MutablePtr create_column_nullable(size_t
input_rows_count,
- bool all_null = false,
- bool all_not_null =
false) {
- auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
- auto nested_column = create_nested_column<T>(input_rows_count);
- return ColumnNullable::create(std::move(nested_column),
std::move(null_map));
-}
-
TEST(ColumnNullableSerializationTest, column_nullable_column_vector) {
const size_t input_rows_count = 4096 * 1000;
ColumnNullable::Ptr column_nullable =
create_column_nullable<Int64>(input_rows_count);
diff --git a/be/test/vec/columns/column_nullable_test.cpp
b/be/test/vec/columns/column_nullable_test.cpp
new file mode 100644
index 00000000000..9e66d5bb302
--- /dev/null
+++ b/be/test/vec/columns/column_nullable_test.cpp
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_nullable.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest.h>
+
+#include "column_nullable_test.h"
+#include "common/status.h"
+#include "runtime/define_primitive_type.h"
+#include "vec/columns/columns_number.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+TEST(ColumnNullableTest, NullTest) {
+ ColumnNullable::MutablePtr null_col = create_column_nullable<Int64>(500,
true);
+ EXPECT_TRUE(null_col->has_null());
+
+ ColumnNullable::MutablePtr dst_col =
+ ColumnNullable::create(create_nested_column<Int64>(10),
ColumnUInt8::create(10));
+ EXPECT_FALSE(dst_col->has_null());
+
+ ColumnInt64::MutablePtr source_col = ColumnInt64::create();
+ source_col->insert_range_of_integer(0, 100);
+
+ dst_col->insert(Field());
+ EXPECT_TRUE(dst_col->has_null());
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_many_from_not_nullable(*source_col, 0, 10);
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_from_not_nullable(*source_col, 5);
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_many_from_not_nullable(*source_col, 5, 5);
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_range_from_not_nullable(*source_col, 5, 5);
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_range_from(
+ *ColumnNullable::create(std::move(source_col),
ColumnUInt8::create(10)), 5, 5);
+ EXPECT_FALSE(dst_col->has_null());
+
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_null_elements(10);
+ EXPECT_TRUE(dst_col->has_null());
+
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_from(*null_col, 100);
+ EXPECT_TRUE(dst_col->has_null());
+
+ auto tmp_col =
+ ColumnNullable::create(create_nested_column<Int64>(10),
ColumnUInt8::create(10, 1));
+
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_from(*tmp_col, 9);
+ EXPECT_TRUE(dst_col->has_null());
+
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_range_from(*tmp_col, 0, 3);
+ EXPECT_TRUE(dst_col->has_null());
+
+ dst_col->clear();
+ EXPECT_FALSE(dst_col->has_null());
+ dst_col->insert_from(*tmp_col, 9);
+ EXPECT_TRUE(dst_col->has_null());
+}
+
+TEST(ColumnNullableTest, PredicateTest) {
+ auto nullable_pred =
+ ColumnNullable::create(PredicateColumnType<TYPE_DATE>::create(),
ColumnUInt8::create());
+ nullable_pred->insert_many_defaults(3);
+ EXPECT_TRUE(nullable_pred->has_null());
+ nullable_pred->insert_null_elements(10);
+ EXPECT_TRUE(nullable_pred->has_null());
+
+ nullable_pred->clear();
+ EXPECT_FALSE(nullable_pred->has_null());
+ nullable_pred->insert_null_elements(10);
+ EXPECT_TRUE(nullable_pred->has_null()); // now it have 10 nulls
+
+ auto null_dst = ColumnNullable::create(ColumnDate::create(),
ColumnUInt8::create());
+ EXPECT_FALSE(null_dst->has_null());
+
+ uint16_t selector[] = {5, 8}; // both null
+ EXPECT_EQ(nullable_pred->filter_by_selector(selector, 2, null_dst.get()),
Status::OK());
+ // filter_by_selector must announce to update has_null to make below right.
+ EXPECT_TRUE(null_dst->has_null());
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/columns/column_nullable_test.h
b/be/test/vec/columns/column_nullable_test.h
new file mode 100644
index 00000000000..0f90a25c9b5
--- /dev/null
+++ b/be/test/vec/columns/column_nullable_test.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-more-matchers.h>
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <cstdlib>
+#include <type_traits>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+inline std::string generate_random_string(size_t max_length) {
+ std::srand(std::time(nullptr)); // use current time as seed for random
generator
+
+ if (max_length == 0) {
+ return "";
+ }
+
+ auto randbyte = []() -> char {
+ // generate a random byte, in range [0x00, 0xFF]
+ return static_cast<char>(rand() % 256);
+ };
+
+ std::string str(max_length, 0);
+ std::generate_n(str.begin(), max_length, randbyte);
+
+ return str;
+}
+
+inline MutableColumnPtr create_null_map(size_t input_rows_count, bool all_null
= false,
+ bool all_not_null = false) {
+ std::srand(std::time(nullptr)); // use current time as seed for random
generator
+ auto null_map = ColumnUInt8::create();
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (all_null) {
+ null_map->insert(1);
+ } else if (all_not_null) {
+ null_map->insert(0);
+ } else {
+ null_map->insert(rand() % 2);
+ }
+ }
+ return null_map;
+}
+
+template <typename T>
+inline MutableColumnPtr create_nested_column(size_t input_rows_count) {
+ MutableColumnPtr column;
+ if constexpr (std::is_integral_v<T>) {
+ column = ColumnVector<T>::create();
+ } else if constexpr (std::is_same_v<T, String>) {
+ column = ColumnString::create();
+ } else if constexpr (std::is_same_v<T, Decimal64>) {
+ column = ColumnDecimal64::create(0, 6);
+ }
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if constexpr (std::is_integral_v<T>) {
+ column->insert(rand() % std::numeric_limits<T>::max());
+ } else if constexpr (std::is_same_v<T, String>) {
+ column->insert(generate_random_string(rand() % 512));
+ } else if constexpr (std::is_same_v<T, Decimal64>) {
+ column->insert(Int64(rand() % std::numeric_limits<Int64>::max()));
+ } else {
+ throw std::runtime_error("Unsupported type");
+ }
+ }
+
+ return column;
+}
+
+template <typename T>
+inline ColumnNullable::MutablePtr create_column_nullable(size_t
input_rows_count,
+ bool all_null = false,
+ bool all_not_null =
false) {
+ auto null_map = create_null_map(input_rows_count, all_null, all_not_null);
+ auto nested_column = create_nested_column<T>(input_rows_count);
+ return ColumnNullable::create(std::move(nested_column),
std::move(null_map));
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/test/vec/columns/column_resize_test.cpp
b/be/test/vec/columns/column_resize_test.cpp
index dd6f78f3e8d..0b5f6ba183e 100644
--- a/be/test/vec/columns/column_resize_test.cpp
+++ b/be/test/vec/columns/column_resize_test.cpp
@@ -1,4 +1,3 @@
-
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
diff --git a/be/test/vec/function/function_test_util.h
b/be/test/vec/function/function_test_util.h
index 62666d7c67c..e6ce662f64d 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -272,6 +272,14 @@ Status check_function(const std::string& func_name, const
InputTypeSet& input_ty
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2;
} else if (std::is_same_v<ReturnType, DateTimeV2>) {
fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2;
+ } else if (std::is_same_v<ReturnType, Decimal128V2>) {
+ fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMALV2;
+ } else if (std::is_same_v<ReturnType, Decimal32>) {
+ fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL32;
+ } else if (std::is_same_v<ReturnType, Decimal64>) {
+ fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL64;
+ } else if (std::is_same_v<ReturnType, Decimal128V3>) {
+ fn_ctx_return.type = doris::PrimitiveType::TYPE_DECIMAL128I;
} else {
fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE;
}
diff --git a/regression-test/data/correctness/test_column_nullable_cache.out
b/regression-test/data/correctness/test_column_nullable_cache.out
new file mode 100644
index 00000000000..eac491df212
--- /dev/null
+++ b/regression-test/data/correctness/test_column_nullable_cache.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test1 --
+
+-- !test2 --
+0
+
diff --git
a/regression-test/suites/correctness/test_column_nullable_cache.groovy
b/regression-test/suites/correctness/test_column_nullable_cache.groovy
new file mode 100644
index 00000000000..cf871c617aa
--- /dev/null
+++ b/regression-test/suites/correctness/test_column_nullable_cache.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_column_nullable_cache") {
+ sql """
+ drop table if exists test_column_nullable_cache;
+ """
+ sql """
+ CREATE TABLE `test_column_nullable_cache` (
+ `col_int_undef_signed2` int NULL,
+ `col_int_undef_signed` int NULL,
+ `col_int_undef_signed3` int NULL,
+ `col_int_undef_signed4` int NULL,
+ `pk` int NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`col_int_undef_signed2`)
+ DISTRIBUTED by RANDOM BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into test_column_nullable_cache
+
(pk,col_int_undef_signed,col_int_undef_signed2,col_int_undef_signed3,col_int_undef_signed4)
+ values
(0,3,7164641,5,8),(1,null,3916062,5,6),(2,1,5533498,0,9),(3,7,2,null,7057679),(4,1,0,7,7),
+
(5,null,4,2448564,1),(6,7531976,7324373,9,7),(7,3,1,1,3),(8,6,8131576,9,-1793807),(9,9,2,4214547,9),
+
(10,-7299852,5,1,3),(11,7,3,-1036551,5),(12,-6108579,84823,4,1229534),(13,-1065629,5,4,null),(14,null,8072633,3328285,2),
+
(15,2,7,6,6),(16,8,5,-4582103,1),(17,5,-4677722,-2379367,4),(18,-7807532,-6686732,0,5329341),
+
(19,8,7,-4013246,-7013374),(20,0,2,9,2),(21,7,2383333,5,4),(22,5844611,2,2,0),(23,0,4756185,0,-5612039),
+
(24,6,4878754,608172,0),(25,null,7858692,7,-6704206),(26,7,-1697597,6,9),(27,9,-7021349,3,-3094786),
+
(28,2,2830915,null,8),(29,4133633,489212,5,9),(30,6,-3346211,3668768,2),(31,1,4862070,-5066405,0),(32,9,6,7,8),
+
(33,2,null,4,2),(34,1,2893430,-3282825,5),(35,2,3,4,2),(36,4,-3418732,6,1263819),(37,5,4,-6342170,6),(99,9,2,8,null);
+ """
+
+ qt_test1 """
+ select * from test_column_nullable_cache where col_int_undef_signed3
IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
+ """
+
+ qt_test2 """
+ select count(*) from test_column_nullable_cache where
col_int_undef_signed3 IS NULL and col_int_undef_signed3 = col_int_undef_signed3;
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]