//ForEachPartFunction.java:
import org.apache.spark.api.java.function.ForeachPartitionFunction;
import org.apache.spark.sql.Row;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class ForEachPartFunction implements ForeachPartitionFunction{
public void
Ok, there are at least two ways to do it:
Dataset df = spark.read.csv("file:///C:/input_data/*.csv")
df.foreachPartition(new ForEachPartFunction());
df.toJavaRDD().foreachPartition(new Void_java_func());
where ForEachPartFunction and Void_java_func are defined below:
//
What would be a Java equivalent of the Scala code below?
def void_function_in_scala(ipartition: Iterator[Row]): Unit ={
var df_rows=ArrayBuffer[String]()
for(irow<-ipartition){
df_rows+=irow.toString
}
val df = spark.read.csv("file:///C:/input_data/*.csv")