rdblue commented on a change in pull request #24832: [SPARK-27845][SQL] DataSourceV2: InsertTable URL: https://github.com/apache/spark/pull/24832#discussion_r300795416
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala ########## @@ -118,6 +122,95 @@ case class DataSourceResolution( if newColumns.forall(_.name.size == 1) => // only top-level adds are supported using AlterTableAddColumnsCommand AlterTableAddColumnsCommand(table, newColumns.map(convertToStructField)) + + case i @ InsertTableStatement(UnresolvedRelation(CatalogObjectIdentifier(Some(catalog), ident)), + _, _, _, _) if i.query.resolved => + loadTable(catalog, ident) + .map(DataSourceV2Relation.create) + .map(table => { + // ifPartitionNotExists is append with validation, but validation is not supported + if (i.ifPartitionNotExists) { + throw new AnalysisException( + s"Cannot write, IF NOT EXISTS is not supported for table: ${table.table.name}") + } + + val staticPartitions = i.partition.filter(_._2.isDefined).mapValues(_.get) + + val resolver = conf.resolver + + // add any static value as a literal column + val staticPartitionProjectList = { + // check that the data column counts match + val numColumns = table.output.size + if (numColumns > staticPartitions.size + i.query.output.size) { + throw new AnalysisException(s"Cannot write: too many columns") + } else if (numColumns < staticPartitions.size + i.query.output.size) { + throw new AnalysisException(s"Cannot write: not enough columns") + } + + val staticNames = staticPartitions.keySet + + // for each static name, find the column name it will replace and check for unknowns. + val outputNameToStaticName = staticNames.map(staticName => + table.output.find(col => resolver(col.name, staticName)) match { + case Some(attr) => + attr.name -> staticName + case _ => + throw new AnalysisException( + s"Cannot add static value for unknown column: $staticName") + }).toMap + + // for each output column, add the static value as a literal + // or use the next input column + val queryColumns = i.query.output.iterator + table.output.map { col => + outputNameToStaticName.get(col.name).flatMap(staticPartitions.get) match { + case Some(staticValue) => + Alias(Cast(Literal(staticValue), col.dataType), col.name)() + case _ => + queryColumns.next + } + } + } + + val dynamicPartitionOverwrite = table.table.partitioning.size > 0 && + staticPartitions.size < table.table.partitioning.size && + conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC + + val query = + if (staticPartitions.isEmpty) { + i.query + } else { + Project(staticPartitionProjectList, i.query) + } + + val deleteExpr = + if (staticPartitions.isEmpty) { + Literal(true) + } else { + staticPartitions.map { case (name, value) => + query.output.find(col => resolver(col.name, name)) match { + case Some(attr) => + EqualTo(attr, Cast(Literal(value), attr.dataType)) + case None => + throw new AnalysisException(s"Unknown static partition column: $name") Review comment: This should be tested. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org