keith-turner opened a new issue, #6022:
URL: https://github.com/apache/accumulo/issues/6022
**Describe the bug**
While testing #6010 an attempt was made to retrieve a large batch of key
values by adjusting tablet server and scanner settings. The scanners would
return a batches that seemed to be below the max sizes specified. Its possible
some configuration was missed. Would like to further investigate this.
**To Reproduce**
Apply the following diff to the code in #6010. When the test runs it prints
out all of the spans. Each span corresponds to a scan batch. The configured
max batch size was 1M and also the scanner was configured to allow up to 10000
entries. However each span shows numbers like
`accumulo.scan.entries.returned=797` and `accumulo.scan.bytes.returned=820113`.
Seems like the bytes returned should be closer to 1M.
```diff
diff --git
a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
index 2217ba0241..1638f12e11 100644
--- a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
+++ b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
@@ -21,7 +21,9 @@ package org.apache.accumulo.core.trace;
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Proxy;
+import java.util.Arrays;
import java.util.Map;
+import java.util.Objects;
import java.util.concurrent.Callable;
import org.apache.accumulo.core.Constants;
diff --git
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
index 2196302fea..ac0ecdb410 100644
---
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
+++
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
@@ -57,6 +57,7 @@ public class ScanTraceClient {
scanner.setRange(new Range(startRow, true, endRow, false));
}
diff --git
a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
index 2217ba0241..1638f12e11 100644
--- a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
+++ b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
@@ -21,7 +21,9 @@ package org.apache.accumulo.core.trace;
import java.lang.reflect.InvocationHandler;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Proxy;
+import java.util.Arrays;
import java.util.Map;
+import java.util.Objects;
import java.util.concurrent.Callable;
import org.apache.accumulo.core.Constants;
diff --git
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
index 2196302fea..ac0ecdb410 100644
---
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
+++
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
@@ -57,6 +57,7 @@ public class ScanTraceClient {
scanner.setRange(new Range(startRow, true, endRow, false));
}
setColumn(scanner);
+ scanner.setBatchSize(10_000);
}
void conigureScanner(BatchScanner scanner) {
diff --git
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
index 1d4e70d302..5a24a45d35 100644
--- a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
@@ -36,6 +36,9 @@ import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloException;
+import org.apache.accumulo.core.client.AccumuloSecurityException;
+import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.admin.NewTableConfiguration;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.TableId;
@@ -92,6 +95,37 @@ class ScanTracingIT extends ConfigurableMacBase {
collector.stop();
}
+ @Test
+ public void testLargeBatch() throws Exception {
+ var tableName = getUniqueNames(1)[0];
+
+ try (var client =
Accumulo.newClient().from(getClientProperties()).build()) {
+ var ingestParams = new TestIngest.IngestParams(getClientProperties(),
tableName);
+ ingestParams.createTable = false;
+ ingestParams.rows = 10000;
+ ingestParams.cols = 10;
+ var ntc = new
NewTableConfiguration().setProperties(Map.of(Property.TABLE_SCAN_MAXMEM.getKey(),
"1M"));
+ client.tableOperations().create(tableName, ntc);
+ TestIngest.ingest(client, ingestParams);
+ client.tableOperations().flush(tableName, null, null, true);
+
+ var scanOpts = new ScanTraceClient.Options(tableName);
+ var scanResults = run(scanOpts);
+
+ System.out.println("results : "+scanResults);
+
+ while(true) {
+ var spanData = collector.take();
+ if(spanData.traceId.equals(scanResults.traceId1) ||
spanData.traceId.equals(scanResults.traceId2)) {
+ if(spanData.name.contains("scan-batch")) {
+ System.out.println(spanData);
+ }
+ }
+ }
+
+ }
+ }
+
@Test
public void test() throws Exception {
var names = getUniqueNames(7);
```
**Expected behavior**
The trace data is closer to the config or we understand why there is a
difference.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]