FTP is definitely not supported. Read the files to distributed storage first then read from there.
On Sun, Aug 8, 2021, 10:18 PM igyu <i...@21cn.com> wrote: > val ftpUrl = "ftp://ftpuser:ftpuser@10.3.87.51:21/sparkftp/" > > val schemas = StructType(List( > new StructField("name", DataTypes.StringType, true), > new StructField("age", DataTypes.IntegerType, true), > new StructField("remk", DataTypes.StringType, true))) > > val DF = sparkSession.read.format("csv") > .schema(schemas) > .option("header","true") > .load(ftpUrl) > // .filter("created<=1602864000") > > DF.printSchema() > DF.show() > > I get error > > Exception in thread "main" java.lang.IllegalArgumentException: Illegal > pattern component: XXX > at > org.apache.commons.lang3.time.FastDatePrinter.parsePattern(FastDatePrinter.java:282) > at > org.apache.commons.lang3.time.FastDatePrinter.init(FastDatePrinter.java:149) > at > org.apache.commons.lang3.time.FastDatePrinter.<init>(FastDatePrinter.java:142) > at > org.apache.commons.lang3.time.FastDateFormat.<init>(FastDateFormat.java:384) > at > org.apache.commons.lang3.time.FastDateFormat.<init>(FastDateFormat.java:369) > at > org.apache.commons.lang3.time.FastDateFormat$1.createInstance(FastDateFormat.java:91) > at > org.apache.commons.lang3.time.FastDateFormat$1.createInstance(FastDateFormat.java:88) > at > org.apache.commons.lang3.time.FormatCache.getInstance(FormatCache.java:82) > at > org.apache.commons.lang3.time.FastDateFormat.getInstance(FastDateFormat.java:165) > at > org.apache.spark.sql.execution.datasources.csv.CSVOptions.<init>(CSVOptions.scala:139) > at > org.apache.spark.sql.execution.datasources.csv.CSVOptions.<init>(CSVOptions.scala:41) > at > org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.buildReader(CSVFileFormat.scala:105) > at > org.apache.spark.sql.execution.datasources.FileFormat$class.buildReaderWithPartitionValues(FileFormat.scala:129) > at > org.apache.spark.sql.execution.datasources.TextBasedFileFormat.buildReaderWithPartitionValues(FileFormat.scala:165) > at > org.apache.spark.sql.execution.FileSourceScanExec.inputRDD$lzycompute(DataSourceScanExec.scala:312) > at > org.apache.spark.sql.execution.FileSourceScanExec.inputRDD(DataSourceScanExec.scala:310) > at > org.apache.spark.sql.execution.FileSourceScanExec.inputRDDs(DataSourceScanExec.scala:330) > at > org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:41) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:615) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:339) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) > at org.apache.spark.sql.Dataset.org > $apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3383) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544) > at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544) > at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364) > at > org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2544) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2758) > at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:291) > at org.apache.spark.sql.Dataset.show(Dataset.scala:745) > at org.apache.spark.sql.Dataset.show(Dataset.scala:704) > at org.apache.spark.sql.Dataset.show(Dataset.scala:713) > at com.join.ftp.reader.FtpReader.readFrom(FtpReader.scala:40) > at com.join.synctool$.main(synctool.scala:41) > at com.join.synctool.main(synctool.scala) > 21/08/09 11:15:08 INFO SparkContext: Invoking stop() from shutdown hook > > ------------------------------ > igyu >