keith-turner opened a new issue, #6022:
URL: https://github.com/apache/accumulo/issues/6022

   **Describe the bug**
   
   While testing #6010 an attempt was made to retrieve a large batch of key 
values by adjusting tablet server and scanner settings.  The scanners would 
return a batches that seemed to be below the max sizes specified.  Its possible 
some configuration was missed.  Would like to further investigate this.
   
   **To Reproduce**
   
   Apply the following diff to the code in #6010.  When the test runs it prints 
out all of the spans.  Each span corresponds to a scan batch.  The configured 
max batch size was 1M and also the scanner was configured to allow up to 10000 
entries.  However each span shows numbers like 
`accumulo.scan.entries.returned=797` and `accumulo.scan.bytes.returned=820113`. 
 Seems like the bytes returned should be closer to 1M.  
   
   ```diff
   diff --git 
a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java 
b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   index 2217ba0241..1638f12e11 100644
   --- a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   +++ b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   @@ -21,7 +21,9 @@ package org.apache.accumulo.core.trace;
    import java.lang.reflect.InvocationHandler;
    import java.lang.reflect.InvocationTargetException;
    import java.lang.reflect.Proxy;
   +import java.util.Arrays;
    import java.util.Map;
   +import java.util.Objects;
    import java.util.concurrent.Callable;
    
    import org.apache.accumulo.core.Constants;
   diff --git 
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java 
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   index 2196302fea..ac0ecdb410 100644
   --- 
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   +++ 
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   @@ -57,6 +57,7 @@ public class ScanTraceClient {
            scanner.setRange(new Range(startRow, true, endRow, false));
          }
   diff --git 
a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java 
b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   index 2217ba0241..1638f12e11 100644
   --- a/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   +++ b/core/src/main/java/org/apache/accumulo/core/trace/TraceUtil.java
   @@ -21,7 +21,9 @@ package org.apache.accumulo.core.trace;
    import java.lang.reflect.InvocationHandler;
    import java.lang.reflect.InvocationTargetException;
    import java.lang.reflect.Proxy;
   +import java.util.Arrays;
    import java.util.Map;
   +import java.util.Objects;
    import java.util.concurrent.Callable;
    
    import org.apache.accumulo.core.Constants;
   diff --git 
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java 
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   index 2196302fea..ac0ecdb410 100644
   --- 
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   +++ 
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTraceClient.java
   @@ -57,6 +57,7 @@ public class ScanTraceClient {
            scanner.setRange(new Range(startRow, true, endRow, false));
          }
          setColumn(scanner);
   +      scanner.setBatchSize(10_000);
        }
    
        void conigureScanner(BatchScanner scanner) {
   diff --git 
a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java 
b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
   index 1d4e70d302..5a24a45d35 100644
   --- a/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
   +++ b/test/src/main/java/org/apache/accumulo/test/tracing/ScanTracingIT.java
   @@ -36,6 +36,9 @@ import java.util.stream.Collectors;
    import java.util.stream.IntStream;
    
    import org.apache.accumulo.core.client.Accumulo;
   +import org.apache.accumulo.core.client.AccumuloException;
   +import org.apache.accumulo.core.client.AccumuloSecurityException;
   +import org.apache.accumulo.core.client.TableExistsException;
    import org.apache.accumulo.core.client.admin.NewTableConfiguration;
    import org.apache.accumulo.core.conf.Property;
    import org.apache.accumulo.core.data.TableId;
   @@ -92,6 +95,37 @@ class ScanTracingIT extends ConfigurableMacBase {
        collector.stop();
      }
    
   +  @Test
   +  public void testLargeBatch() throws Exception {
   +    var tableName = getUniqueNames(1)[0];
   +
   +    try (var client = 
Accumulo.newClient().from(getClientProperties()).build()) {
   +      var ingestParams = new TestIngest.IngestParams(getClientProperties(), 
tableName);
   +      ingestParams.createTable = false;
   +      ingestParams.rows = 10000;
   +      ingestParams.cols = 10;
   +      var ntc = new 
NewTableConfiguration().setProperties(Map.of(Property.TABLE_SCAN_MAXMEM.getKey(),
 "1M"));
   +      client.tableOperations().create(tableName, ntc);
   +      TestIngest.ingest(client, ingestParams);
   +      client.tableOperations().flush(tableName, null, null, true);
   +
   +      var scanOpts = new ScanTraceClient.Options(tableName);
   +      var scanResults = run(scanOpts);
   +
   +      System.out.println("results : "+scanResults);
   +
   +      while(true) {
   +        var spanData = collector.take();
   +        if(spanData.traceId.equals(scanResults.traceId1) || 
spanData.traceId.equals(scanResults.traceId2)) {
   +          if(spanData.name.contains("scan-batch")) {
   +            System.out.println(spanData);
   +          }
   +        }
   +      }
   +
   +    }
   +  }
   +
      @Test
      public void test() throws Exception {
        var names = getUniqueNames(7);
   
   ```
   
   
   **Expected behavior**
   
   The trace data is closer to the config or we understand why there is a 
difference.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to