alamb commented on code in PR #9081:
URL: https://github.com/apache/arrow-rs/pull/9081#discussion_r2669453924
##########
arrow/benches/row_format.rs:
##########
@@ -85,6 +87,101 @@ fn bench_iter(c: &mut Criterion) {
});
}
+/// A single benchmark with a medium number of columns (around 50) without
nested columns for real-world use cases
+/// This also makes sure there is a large gap between each value in the column
and how it is laid out in the row format.
+/// and it is on the edge of not fitting in L3 on some machines
+fn run_benchmark_on_medium_amount_and_types_of_columns_without_nesting(
+ batch_size: usize,
+ c: &mut Criterion,
+) {
+ let mut seed = 0;
+
+ let mut cols: Vec<ArrayRef> = vec![];
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int8Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int32Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for _ in 0..10 {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+ batch_size, 0.0, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(
+ create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+ batch_size, nulls, 0, 50, "", seed,
+ ),
+ ));
+ }
+
+ for _ in 0..3 {
+ seed += 1;
+ cols.push(Arc::new(
+ create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+ batch_size, 0.0, 0, 10, "", seed,
+ ),
+ ));
+ }
+ for _ in 0..3 {
+ seed += 1;
+ cols.push(Arc::new(
+ create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+ batch_size, 0.0, 10, 20, "", seed,
+ ),
+ ));
+ }
+ for _ in 0..3 {
+ seed += 1;
+ cols.push(Arc::new(
+ create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+ batch_size, 0.0, 20, 30, "", seed,
+ ),
+ ));
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_boolean_array_with_seed(
+ batch_size, nulls, 0.5, seed,
+ )));
+ }
+
+ for _ in 0..10 {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+ batch_size, 0.0, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_f64_array_with_seed(batch_size, nulls,
seed)) as ArrayRef);
+ }
+
+ do_bench(c, format!("{batch_size} lot of columns").as_str(), cols);
Review Comment:
Could you please make the description more precise (specificially, "how
many" columns) -- maybe with an assert too to make sure the benchmark stays in
sync
```rust
assert_eq!(cols.len(), 50)
```
##########
arrow/benches/row_format.rs:
##########
@@ -85,6 +87,101 @@ fn bench_iter(c: &mut Criterion) {
});
}
+/// A single benchmark with a medium number of columns (around 50) without
nested columns for real-world use cases
+/// This also makes sure there is a large gap between each value in the column
and how it is laid out in the row format.
+/// and it is on the edge of not fitting in L3 on some machines
+fn run_benchmark_on_medium_amount_and_types_of_columns_without_nesting(
+ batch_size: usize,
+ c: &mut Criterion,
+) {
+ let mut seed = 0;
+
+ let mut cols: Vec<ArrayRef> = vec![];
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int8Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int32Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+ batch_size, nulls, seed,
+ )) as ArrayRef);
+ }
+
+ for _ in 0..10 {
+ seed += 1;
+ cols.push(Arc::new(create_primitive_array_with_seed::<Int64Type>(
+ batch_size, 0.0, seed,
+ )) as ArrayRef);
+ }
+
+ for nulls in [0.0, 0.1, 0.2, 0.5] {
+ seed += 1;
+ cols.push(Arc::new(
+ create_string_array_with_len_range_and_prefix_and_seed::<i32>(
+ batch_size, nulls, 0, 50, "", seed,
+ ),
+ ));
+ }
+
+ for _ in 0..3 {
Review Comment:
how about also adding a StringView column too (in addition to string)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]