ColinLeeo commented on code in PR #748:
URL: https://github.com/apache/tsfile/pull/748#discussion_r3014202634
##########
cpp/src/common/tablet.cc:
##########
@@ -444,6 +444,57 @@ void Tablet::set_column_categories(
}
}
+void Tablet::reset_string_columns() {
+ size_t schema_count = schema_vec_->size();
+ for (size_t c = 0; c < schema_count; c++) {
+ const MeasurementSchema& schema = schema_vec_->at(c);
+ if (schema.data_type_ == STRING || schema.data_type_ == TEXT ||
+ schema.data_type_ == BLOB) {
+ value_matrix_[c].string_col->reset();
+ }
+ }
+}
+
+std::vector<uint32_t> Tablet::find_all_device_boundaries() const {
+ const uint32_t row_count = get_cur_row_size();
+ if (row_count <= 1) return {};
+
+ // Use uint64_t bitmap instead of vector<bool> for faster set/test/scan.
+ const uint32_t nwords = (row_count + 63) / 64;
+ std::vector<uint64_t> boundary(nwords, 0);
+
+ for (auto col_idx : id_column_indexes_) {
+ const StringColumn& sc = *value_matrix_[col_idx].string_col;
+ const uint32_t* off = sc.offsets;
+ const char* buf = sc.buffer;
+ for (uint32_t i = 1; i < row_count; i++) {
+ if (boundary[i >> 6] & (1ULL << (i & 63))) continue;
+ uint32_t len_a = off[i] - off[i - 1];
+ uint32_t len_b = off[i + 1] - off[i];
+ if (len_a != len_b ||
+ (len_a > 0 &&
+ memcmp(buf + off[i - 1], buf + off[i], len_a) != 0)) {
+ boundary[i >> 6] |= (1ULL << (i & 63));
Review Comment:
fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]