Shekharrajak commented on code in PR #3068:
URL: https://github.com/apache/datafusion-comet/pull/3068#discussion_r2680063777
##########
spark/src/main/scala/org/apache/spark/sql/comet/CometNativeWriteExec.scala:
##########
@@ -21,59 +21,35 @@ package org.apache.spark.sql.comet
import java.io.ByteArrayOutputStream
-import scala.jdk.CollectionConverters._
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext, TaskAttemptID,
TaskID, TaskType}
-import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.Utils
import org.apache.comet.CometExecIterator
import org.apache.comet.serde.OperatorOuterClass.Operator
/**
- * Comet physical operator for native Parquet write operations with
FileCommitProtocol support.
- *
- * This operator writes data to Parquet files using the native Comet engine.
It integrates with
- * Spark's FileCommitProtocol to provide atomic writes with proper staging and
commit semantics.
+ * Comet physical operator for native Parquet write operations.
*
- * The implementation includes support for Spark's file commit protocol
through work_dir, job_id,
- * and task_attempt_id parameters that can be set in the operator. When
work_dir is set, files are
- * written to a temporary location that can be atomically committed later.
+ * This operator writes data to Parquet files using the native Comet engine.
Files are written
+ * directly to the output path. The commit protocol is handled by Spark's
+ * InsertIntoHadoopFsRelationCommand which manages the FileCommitProtocol
separately.
*
* @param nativeOp
- * The native operator representing the write operation (template, will be
modified per task)
+ * The native operator representing the write operation
* @param child
* The child operator providing the data to write
* @param outputPath
* The path where the Parquet file will be written
- * @param committer
- * FileCommitProtocol for atomic writes. If None, files are written directly.
- * @param jobTrackerID
- * Unique identifier for this write job
*/
-case class CometNativeWriteExec(
- nativeOp: Operator,
- child: SparkPlan,
- outputPath: String,
- committer: Option[FileCommitProtocol] = None,
- jobTrackerID: String = Utils.createTempDir().getName)
+case class CometNativeWriteExec(nativeOp: Operator, child: SparkPlan,
outputPath: String)
Review Comment:
basic execution that delegates to native writer
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]