[ 
https://issues.apache.org/jira/browse/DRILL-8235?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17568790#comment-17568790
 ] 

ASF GitHub Bot commented on DRILL-8235:
---------------------------------------

cgivre commented on code in PR #2585:
URL: https://github.com/apache/drill/pull/2585#discussion_r925114706


##########
contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsGroupScan.java:
##########
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.googlesheets;
+
+import com.fasterxml.jackson.annotation.JacksonInject;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonTypeName;
+import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.exec.metastore.MetadataProviderManager;
+import org.apache.drill.exec.physical.base.AbstractGroupScan;
+import org.apache.drill.exec.physical.base.GroupScan;
+import org.apache.drill.exec.physical.base.PhysicalOperator;
+import org.apache.drill.exec.physical.base.ScanStats;
+import org.apache.drill.exec.physical.base.SubScan;
+import org.apache.drill.exec.planner.logical.DrillScanRel;
+import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint;
+import org.apache.drill.exec.record.metadata.TupleMetadata;
+import org.apache.drill.exec.store.StoragePluginRegistry;
+import org.apache.drill.exec.store.base.filter.ExprNode;
+import org.apache.drill.exec.util.Utilities;
+import org.apache.drill.metastore.metadata.TableMetadata;
+import org.apache.drill.metastore.metadata.TableMetadataProvider;
+import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+@JsonTypeName("googlesheets-group-scan")
+public class GoogleSheetsGroupScan extends AbstractGroupScan {
+
+  private final GoogleSheetsScanSpec scanSpec;
+  private final GoogleSheetsStoragePluginConfig config;
+  private final List<SchemaPath> columns;
+  private final String pluginName;
+  private final Map<String, ExprNode.ColRelOpConstNode> filters;
+  private final ScanStats scanStats;
+  private final double filterSelectivity;
+  private final int maxRecords;
+  private final GoogleSheetsStoragePlugin plugin;
+  private int hashCode;
+  private MetadataProviderManager metadataProviderManager;
+
+  // Initial Constructor
+  public GoogleSheetsGroupScan(String userName,
+                               GoogleSheetsScanSpec scanSpec,
+                               GoogleSheetsStoragePlugin plugin,
+                               MetadataProviderManager 
metadataProviderManager) {
+    super(userName);
+    this.scanSpec = scanSpec;
+    this.config = scanSpec.getConfig();
+    this.columns = ALL_COLUMNS;
+    this.pluginName = plugin.getName();
+    this.filters = null;
+    this.filterSelectivity = 0.0;
+    this.maxRecords = -1;
+    this.scanStats = computeScanStats();
+    this.plugin = plugin;
+    this.metadataProviderManager = metadataProviderManager;
+  }
+
+  // Copy Constructor
+  public GoogleSheetsGroupScan(GoogleSheetsGroupScan that) {
+    super(that);
+    this.scanSpec = that.scanSpec;
+    this.config = that.config;
+    this.columns = that.columns;
+    this.filters = that.filters;
+    this.pluginName = that.pluginName;
+    this.filterSelectivity = that.filterSelectivity;
+    this.scanStats = that.scanStats;
+    this.maxRecords = that.maxRecords;
+    this.plugin = that.plugin;
+    this.metadataProviderManager = that.metadataProviderManager;
+    this.hashCode = hashCode();
+  }
+
+  /**
+   * Constructor for applying a limit.
+   * @param that The previous group scan without the limit.
+   * @param maxRecords  The desired limit, pushed down from Calcite
+   */
+  public GoogleSheetsGroupScan(GoogleSheetsGroupScan that, int maxRecords) {
+    super(that);
+    this.scanSpec = that.scanSpec;
+    this.config = that.config;
+    this.columns = that.columns;
+    this.pluginName = that.pluginName;
+    this.filters = that.filters;
+    this.filterSelectivity = that.filterSelectivity;
+    this.maxRecords = maxRecords;
+    this.plugin = that.plugin;
+    this.metadataProviderManager = that.metadataProviderManager;
+    this.scanStats = computeScanStats();
+  }
+
+  /**
+   * Constructor for applying columns (Projection pushdown).
+   * @param that The previous GroupScan, without the columns
+   * @param columns The list of columns to push down
+   */
+  public GoogleSheetsGroupScan(GoogleSheetsGroupScan that, List<SchemaPath> 
columns) {
+    super(that);
+    this.scanSpec = that.scanSpec;
+    this.config = scanSpec.getConfig();
+    this.columns = columns;
+    this.filters = that.filters;
+    this.pluginName = that.pluginName;
+    this.filterSelectivity = that.filterSelectivity;
+    this.maxRecords = that.maxRecords;
+    this.plugin = that.plugin;
+    this.metadataProviderManager = that.metadataProviderManager;
+    this.scanStats = computeScanStats();
+  }
+
+  /**
+   * Constructor for applying a filter
+   * @param that Previous group scan w/o filters
+   * @param filters The list of filters
+   * @param filterSelectivity  The filter selectivity
+   */
+  public GoogleSheetsGroupScan(GoogleSheetsGroupScan that,
+                               Map<String, ExprNode.ColRelOpConstNode> filters,
+                               double filterSelectivity) {
+    super(that);
+    this.scanSpec = that.scanSpec;
+    this.config = that.config;
+    this.columns = that.columns;
+    this.filters = filters;
+    this.pluginName = that.pluginName;
+    this.filterSelectivity = filterSelectivity;
+    this.maxRecords = that.maxRecords;
+    this.plugin = that.plugin;
+    this.metadataProviderManager = that.metadataProviderManager;
+    this.scanStats = computeScanStats();
+  }
+
+  @JsonCreator
+  public GoogleSheetsGroupScan(
+    @JsonProperty("userName") String userName,
+    @JsonProperty("scanSpec") GoogleSheetsScanSpec scanSpec,
+    @JsonProperty("columns") List<SchemaPath> columns,
+    @JsonProperty("filters") Map<String, ExprNode.ColRelOpConstNode> filters,
+    @JsonProperty("filterSelectivity") double selectivity,
+    @JsonProperty("maxRecords") int maxRecords,
+    @JacksonInject StoragePluginRegistry plugins
+  ) {
+    super(userName);
+    this.scanSpec = scanSpec;
+    this.config = scanSpec.getConfig();
+    this.columns = columns;
+    this.filters = filters;
+    this.filterSelectivity = selectivity;
+    this.maxRecords = maxRecords;
+    this.scanStats = computeScanStats();
+    this.plugin = plugins.resolve(config, GoogleSheetsStoragePlugin.class);
+    this.pluginName = plugin.getName();
+  }
+
+  @JsonProperty("scanSpec")
+  public GoogleSheetsScanSpec scanSpec() {
+    return scanSpec;
+  }
+
+  @JsonProperty("config")
+  public GoogleSheetsStoragePluginConfig config() {
+    return config;
+  }
+
+  @JsonProperty("columns")
+  public List<SchemaPath> columns() {
+    return columns;
+  }
+
+  @JsonProperty("filters")
+  public Map<String, ExprNode.ColRelOpConstNode> filters() {
+    return filters;
+  }
+
+  @JsonProperty("maxRecords")
+  public int maxRecords() {
+    return maxRecords;
+  }
+
+  @Override
+  public void applyAssignments(List<DrillbitEndpoint> endpoints) {
+
+  }
+
+  public TupleMetadata getSchema() {
+    if (metadataProviderManager == null) {
+      return null;
+    }
+    try {
+      return metadataProviderManager.getSchemaProvider().read().getSchema();
+    } catch (IOException | NullPointerException e) {
+      return null;
+    }
+  }
+
+  @Override
+  public TableMetadata getTableMetadata() {
+    if (getMetadataProvider() == null) {
+      return null;
+    }
+    return getMetadataProvider().getTableMetadata();
+  }
+
+  @Override
+  public TableMetadataProvider getMetadataProvider() {
+    if (metadataProviderManager == null) {
+      return null;
+    }
+    return metadataProviderManager.getTableMetadataProvider();
+  }
+
+  @Override
+  @JsonIgnore
+  public boolean canPushdownProjects(List<SchemaPath> columns) {
+    return true;
+  }
+
+  @JsonIgnore
+  public boolean allowsFilters() {
+    return true;
+  }
+
+  @Override
+  public SubScan getSpecificScan(int minorFragmentId) {
+    return new GoogleSheetsSubScan(userName, config, scanSpec, columns, 
filters, maxRecords, getSchema());
+  }
+
+  @Override
+  public int getMaxParallelizationWidth() {
+    return 1;
+  }
+
+  @Override
+  public GroupScan clone(List<SchemaPath> columns) {
+    return new GoogleSheetsGroupScan(this, columns);
+  }
+
+  @Override
+  public boolean supportsLimitPushdown() {
+    return true;
+  }
+
+  @Override
+  public GroupScan applyLimit(int maxRecords) {
+    if (maxRecords == this.maxRecords) {
+      return null;
+    }
+    return new GoogleSheetsGroupScan(this, maxRecords);
+  }
+
+  @Override
+  public String getDigest() {
+    return toString();
+  }
+
+  @Override
+  public ScanStats getScanStats() {
+
+    // Since this class is immutable, compute stats once and cache
+    // them. If the scan changes (adding columns, adding filters), we
+    // get a new scan without cached stats.
+    return scanStats;
+  }
+
+  private ScanStats computeScanStats() {
+
+    // If this config allows filters, then make the default
+    // cost very high to force the planner to choose the version
+    // with filters.
+    if (!hasFilters()) {
+      return new ScanStats(ScanStats.GroupScanProperty.ESTIMATED_TOTAL_COST,
+        1E9, 1E112, 1E12);
+    }
+
+    // No good estimates at all, just make up something.
+    double estRowCount = 10_000;

Review Comment:
   Added TODO. 





> Add Storage Plugin for Google Sheets
> ------------------------------------
>
>                 Key: DRILL-8235
>                 URL: https://issues.apache.org/jira/browse/DRILL-8235
>             Project: Apache Drill
>          Issue Type: Improvement
>          Components: Storage - Other
>    Affects Versions: 1.20.1
>            Reporter: Charles Givre
>            Assignee: Charles Givre
>            Priority: Major
>             Fix For: 2.0.0
>
>
> Google Sheets is a very commonly used data source among business users.  
> Presto and other query engines do include integrations with Google Sheets and 
> so it would be useful for Drill to add this functionality. 
> The proposed plugin supports both reading and writing to Google Sheets. 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to