lshg opened a new issue #2490: URL: https://github.com/apache/hudi/issues/2490
package com.gjr.recommend import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.{SparkConf, SparkContext} object DWDTenderLog { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[2]").set("spark.executor.memory", "512m") val sc: SparkContext = new SparkContext(conf) val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate() val hc = new HiveContext(sc) hc.setConf("spark.sql.crossJoin.enabled","true"); val tenderLog: Array[Row] = hc.sql( """ | SELECT |projectid, |provinceid, |typeId, |tender_tag |FROM |( |SELECT |projectid, |provinceid, |typeId, |antistop |FROM |app.dwd_recommend_tender_ds |WHERE |createTime >= 1608280608479 AND createTime <= 1611628847000 |AND antistop != '' |GROUP BY |projectid, |provinceid, |typeId, |antistop |) AS a lateral VIEW explode (split(antistop, "#")) table_tmp AS tender_tag """.stripMargin).collect() println(tenderLog.toBuffer) sc.stop() } } 0 [main] INFO org.apache.spark.SparkContext - Running Spark version 2.4.7 346 [main] INFO org.apache.spark.SparkContext - Submitted application: DWDTenderLog$ 390 [main] INFO org.apache.spark.SecurityManager - Changing view acls to: lsh 390 [main] INFO org.apache.spark.SecurityManager - Changing modify acls to: lsh 390 [main] INFO org.apache.spark.SecurityManager - Changing view acls groups to: 390 [main] INFO org.apache.spark.SecurityManager - Changing modify acls groups to: 391 [main] INFO org.apache.spark.SecurityManager - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(lsh); groups with view permissions: Set(); users with modify permissions: Set(lsh); groups with modify permissions: Set() 2533 [main] INFO org.apache.spark.util.Utils - Successfully started service 'sparkDriver' on port 54347. 2575 [main] INFO org.apache.spark.SparkEnv - Registering MapOutputTracker 2588 [main] INFO org.apache.spark.SparkEnv - Registering BlockManagerMaster 2589 [main] INFO org.apache.spark.storage.BlockManagerMasterEndpoint - Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information 2590 [main] INFO org.apache.spark.storage.BlockManagerMasterEndpoint - BlockManagerMasterEndpoint up 2596 [main] INFO org.apache.spark.storage.DiskBlockManager - Created local directory at C:\Users\lsh\AppData\Local\Temp\blockmgr-d134fb11-0552-4b4b-8f20-ea7e04fd086d 2609 [main] INFO org.apache.spark.storage.memory.MemoryStore - MemoryStore started with capacity 1979.1 MB 2619 [main] INFO org.apache.spark.SparkEnv - Registering OutputCommitCoordinator 2675 [main] INFO org.spark_project.jetty.util.log - Logging initialized @23630ms 2720 [main] INFO org.spark_project.jetty.server.Server - jetty-9.3.z-SNAPSHOT, build timestamp: 2019-02-16T00:53:49+08:00, git hash: eb70b240169fcf1abbd86af36482d1c49826fa0b 2731 [main] INFO org.spark_project.jetty.server.Server - Started @23687ms 2747 [main] INFO org.spark_project.jetty.server.AbstractConnector - Started ServerConnector@4d63b624{HTTP/1.1,[http/1.1]}{0.0.0.0:4040} 2747 [main] INFO org.apache.spark.util.Utils - Successfully started service 'SparkUI' on port 4040. 2767 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@27eb3298{/jobs,null,AVAILABLE,@Spark} 2768 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@1b58ff9e{/jobs/json,null,AVAILABLE,@Spark} 2768 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@2f66e802{/jobs/job,null,AVAILABLE,@Spark} 2769 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@76318a7d{/jobs/job/json,null,AVAILABLE,@Spark} 2770 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@2a492f2a{/stages,null,AVAILABLE,@Spark} 2770 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@3277e499{/stages/json,null,AVAILABLE,@Spark} 2771 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@585811a4{/stages/stage,null,AVAILABLE,@Spark} 2772 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@4c4d362a{/stages/stage/json,null,AVAILABLE,@Spark} 2773 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@5400db36{/stages/pool,null,AVAILABLE,@Spark} 2773 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@76b74e9c{/stages/pool/json,null,AVAILABLE,@Spark} 2774 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@2d72f75e{/storage,null,AVAILABLE,@Spark} 2775 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@8ab78bc{/storage/json,null,AVAILABLE,@Spark} 2776 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@5aa0dbf4{/storage/rdd,null,AVAILABLE,@Spark} 2776 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@16afbd92{/storage/rdd/json,null,AVAILABLE,@Spark} 2777 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@2c5d601e{/environment,null,AVAILABLE,@Spark} 2777 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@7fe083b1{/environment/json,null,AVAILABLE,@Spark} 2777 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@23c388c2{/executors,null,AVAILABLE,@Spark} 2778 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@486be205{/executors/json,null,AVAILABLE,@Spark} 2778 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@f713686{/executors/threadDump,null,AVAILABLE,@Spark} 2778 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@74f7d1d2{/executors/threadDump/json,null,AVAILABLE,@Spark} 2783 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@4b4dd216{/static,null,AVAILABLE,@Spark} 2784 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@54afd745{/,null,AVAILABLE,@Spark} 2786 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@677dbd89{/api,null,AVAILABLE,@Spark} 2787 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@2ca47471{/jobs/job/kill,null,AVAILABLE,@Spark} 2788 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@5a021cb9{/stages/stage/kill,null,AVAILABLE,@Spark} 2790 [main] INFO org.apache.spark.ui.SparkUI - Bound SparkUI to 0.0.0.0, and started at http://DESKTOP-E6TA5L3:4040 2873 [main] INFO org.apache.spark.executor.Executor - Starting executor ID driver on host localhost 2921 [main] INFO org.apache.spark.util.Utils - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 54366. 2922 [main] INFO org.apache.spark.network.netty.NettyBlockTransferService - Server created on DESKTOP-E6TA5L3:54366 2923 [main] INFO org.apache.spark.storage.BlockManager - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy 2939 [main] INFO org.apache.spark.storage.BlockManagerMaster - Registering BlockManager BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None) 2942 [dispatcher-event-loop-0] INFO org.apache.spark.storage.BlockManagerMasterEndpoint - Registering block manager DESKTOP-E6TA5L3:54366 with 1979.1 MB RAM, BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None) 2945 [main] INFO org.apache.spark.storage.BlockManagerMaster - Registered BlockManager BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None) 2945 [main] INFO org.apache.spark.storage.BlockManager - Initialized BlockManager: BlockManagerId(driver, DESKTOP-E6TA5L3, 54366, None) 3086 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@466d49f0{/metrics/json,null,AVAILABLE,@Spark} 3109 [main] WARN org.apache.spark.SparkContext - Using an existing SparkContext; some configuration may not take effect. 3244 [main] INFO org.apache.spark.sql.internal.SharedState - loading hive config file: file:/D:/GJR_PROJECT/tt/tenderRecommend/target/classes/hive-site.xml 3279 [main] INFO org.apache.spark.sql.internal.SharedState - spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir ('/user/hive/warehouse'). 3279 [main] INFO org.apache.spark.sql.internal.SharedState - Warehouse path is '/user/hive/warehouse'. 3285 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@796065aa{/SQL,null,AVAILABLE,@Spark} 3286 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@28a6301f{/SQL/json,null,AVAILABLE,@Spark} 3286 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@1436a7ab{/SQL/execution,null,AVAILABLE,@Spark} 3286 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@3b7b05a8{/SQL/execution/json,null,AVAILABLE,@Spark} 3287 [main] INFO org.spark_project.jetty.server.handler.ContextHandler - Started o.s.j.s.ServletContextHandler@336365bc{/static/sql,null,AVAILABLE,@Spark} 3707 [main] INFO org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef - Registered StateStoreCoordinator endpoint 4072 [main] INFO org.apache.spark.sql.hive.HiveUtils - Initializing HiveMetastoreConnection version 1.2.1 using Spark classes. 4479 [main] WARN org.apache.hadoop.hive.conf.HiveConf - HiveConf of name hive.server2.webui.port does not exist 4479 [main] WARN org.apache.hadoop.hive.conf.HiveConf - HiveConf of name hive.server2.webui.host does not exist 4584 [main] INFO hive.metastore - Trying to connect to metastore with URI thrift://t1:9083 4759 [main] INFO hive.metastore - Connected to metastore. 8437 [main] INFO org.apache.hadoop.hive.ql.session.SessionState - Created local directory: C:/Users/lsh/AppData/Local/Temp/99f90c54-0932-45d6-924a-b4cdd357db61_resources 8477 [main] INFO org.apache.hadoop.hive.ql.session.SessionState - Created HDFS directory: /user/hive/tmp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61 8496 [main] INFO org.apache.hadoop.hive.ql.session.SessionState - Created local directory: C:/Users/lsh/AppData/Local/Temp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61 8534 [main] INFO org.apache.hadoop.hive.ql.session.SessionState - Created HDFS directory: /user/hive/tmp/lsh/99f90c54-0932-45d6-924a-b4cdd357db61/_tmp_space.db 8560 [main] INFO org.apache.spark.sql.hive.client.HiveClientImpl - Warehouse location for Hive client (version 1.2.2) is /user/hive/warehouse 10106 [main] INFO org.apache.spark.sql.execution.datasources.FileSourceStrategy - Pruning directories with: 10108 [main] INFO org.apache.spark.sql.execution.datasources.FileSourceStrategy - Post-Scan Filters: isnotnull(createTime#25L),isnotnull(antistop#8),(createTime#25L >= 1608280608479),(createTime#25L <= 1611628847000),NOT (antistop#8 = ) 10110 [main] INFO org.apache.spark.sql.execution.datasources.FileSourceStrategy - Output Data Schema: struct<projectId: int, antistop: string, provinceId: int, typeId: int, createTime: bigint ... 3 more fields> 10118 [main] INFO org.apache.spark.sql.execution.FileSourceScanExec - Pushed Filters: IsNotNull(createTime),IsNotNull(antistop),GreaterThanOrEqual(createTime,1608280608479),LessThanOrEqual(createTime,1611628847000),Not(EqualTo(antistop,)) 10165 [main] WARN org.apache.spark.util.Utils - Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf. 11262 [main] INFO org.apache.spark.sql.execution.datasources.PrunedInMemoryFileIndex - It took 738 ms to list leaf files for 10 paths. 11568 [Spark Context Cleaner] INFO org.apache.spark.ContextCleaner - Cleaned accumulator 0 11593 [main] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 223.244 ms 11751 [main] INFO org.apache.spark.sql.execution.aggregate.HashAggregateExec - spark.sql.codegen.aggregate.map.twolevel.enabled is set to true, but current version of codegened fast hashmap does not support this aggregate. 11842 [main] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 52.0291 ms 11843 [main] INFO org.apache.spark.sql.execution.aggregate.HashAggregateExec - spark.sql.codegen.aggregate.map.twolevel.enabled is set to true, but current version of codegened fast hashmap does not support this aggregate. 11906 [main] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 38.8988 ms 11968 [main] INFO org.apache.spark.storage.memory.MemoryStore - Block broadcast_0 stored as values in memory (estimated size 242.1 KB, free 1978.9 MB) 12016 [main] INFO org.apache.spark.storage.memory.MemoryStore - Block broadcast_0_piece0 stored as bytes in memory (estimated size 23.6 KB, free 1978.8 MB) 12018 [dispatcher-event-loop-1] INFO org.apache.spark.storage.BlockManagerInfo - Added broadcast_0_piece0 in memory on DESKTOP-E6TA5L3:54366 (size: 23.6 KB, free: 1979.1 MB) 12020 [main] INFO org.apache.spark.SparkContext - Created broadcast 0 from collect at DWDTenderLog.scala:54 12024 [main] INFO org.apache.spark.sql.execution.FileSourceScanExec - Planning scan with bin packing, max size: 49910044 bytes, open cost is considered as scanning 4194304 bytes. 12175 [main] INFO org.apache.spark.SparkContext - Starting job: collect at DWDTenderLog.scala:54 12189 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Registering RDD 2 (collect at DWDTenderLog.scala:54) as input to shuffle 0 12191 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Got job 0 (collect at DWDTenderLog.scala:54) with 200 output partitions 12191 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Final stage: ResultStage 1 (collect at DWDTenderLog.scala:54) 12192 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Parents of final stage: List(ShuffleMapStage 0) 12194 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Missing parents: List(ShuffleMapStage 0) 12198 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Submitting ShuffleMapStage 0 (MapPartitionsRDD[2] at collect at DWDTenderLog.scala:54), which has no missing parents 12214 [dag-scheduler-event-loop] INFO org.apache.spark.storage.memory.MemoryStore - Block broadcast_1 stored as values in memory (estimated size 25.4 KB, free 1978.8 MB) 12217 [dag-scheduler-event-loop] INFO org.apache.spark.storage.memory.MemoryStore - Block broadcast_1_piece0 stored as bytes in memory (estimated size 10.8 KB, free 1978.8 MB) 12218 [dispatcher-event-loop-0] INFO org.apache.spark.storage.BlockManagerInfo - Added broadcast_1_piece0 in memory on DESKTOP-E6TA5L3:54366 (size: 10.8 KB, free: 1979.1 MB) 12218 [dag-scheduler-event-loop] INFO org.apache.spark.SparkContext - Created broadcast 1 from broadcast at DAGScheduler.scala:1184 12227 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - Submitting 2 missing tasks from ShuffleMapStage 0 (MapPartitionsRDD[2] at collect at DWDTenderLog.scala:54) (first 15 tasks are for partitions Vector(0, 1)) 12228 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.TaskSchedulerImpl - Adding task set 0.0 with 2 tasks 12253 [dispatcher-event-loop-1] INFO org.apache.spark.scheduler.TaskSetManager - Starting task 0.0 in stage 0.0 (TID 0, localhost, executor driver, partition 0, ANY, 10221 bytes) 12254 [dispatcher-event-loop-1] INFO org.apache.spark.scheduler.TaskSetManager - Starting task 1.0 in stage 0.0 (TID 1, localhost, executor driver, partition 1, ANY, 10288 bytes) 12260 [Executor task launch worker for task 1] INFO org.apache.spark.executor.Executor - Running task 1.0 in stage 0.0 (TID 1) 12260 [Executor task launch worker for task 0] INFO org.apache.spark.executor.Executor - Running task 0.0 in stage 0.0 (TID 0) 12341 [Executor task launch worker for task 1] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 20.998 ms 12361 [Executor task launch worker for task 0] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 7.7712 ms 12375 [Executor task launch worker for task 0] INFO org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator - Code generated in 10.5504 ms 12388 [Executor task launch worker for task 1] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536800-8517888_20210126111108.parquet, range: 0-791226, partition values: [2021-01-26] 12388 [Executor task launch worker for task 0] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/23/bf784cc0-1918-4ddd-8145-9de3f840c558-0_0-1553263-3646025_20210124000006.parquet, range: 0-1673011, partition values: [2021-01-23] 14219 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 14219 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 15102 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 15262 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 15594 [Executor task launch worker for task 0] INFO org.apache.hadoop.io.compress.zlib.ZlibFactory - Successfully loaded & initialized native-zlib library 15594 [Executor task launch worker for task 0] INFO org.apache.hadoop.io.compress.CodecPool - Got brand-new decompressor [.gz] 16218 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 16439 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 16943 [Executor task launch worker for task 1] INFO org.apache.hadoop.io.compress.CodecPool - Got brand-new decompressor [.gz] 18239 [Executor task launch worker for task 0] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/24/7a98c96a-fdeb-47a3-98a3-e2c4b4d6ec81-0_0-2634501-6263163_20210125000002.parquet, range: 0-1538381, partition values: [2021-01-24] 18679 [Executor task launch worker for task 1] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536707-8517665_20210126111054.parquet, range: 0-791122, partition values: [2021-01-26] 20217 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 20400 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 21685 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 21988 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 22055 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 22148 [Executor task launch worker for task 1] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 24301 [Executor task launch worker for task 1] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet, range: 0-791045, partition values: [2021-01-26] 24639 [Executor task launch worker for task 0] INFO org.apache.spark.sql.execution.datasources.FileScanRDD - Reading File path: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/25/3baf7087-95d2-4836-9a4c-3f5b4ced568c-0_0-3188990-7636378_20210125235941.parquet, range: 0-994567, partition values: [2021-01-25] 25837 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) 26843 [Executor task launch worker for task 1] ERROR org.apache.spark.executor.Executor - Exception in task 1.0 in stage 0.0 (TID 1) java.io.FileNotFoundException: File does not exist: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 26859 [task-result-getter-0] WARN org.apache.spark.scheduler.TaskSetManager - Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): java.io.FileNotFoundException: File does not exist: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 26861 [task-result-getter-0] ERROR org.apache.spark.scheduler.TaskSetManager - Task 1 in stage 0.0 failed 1 times; aborting job 26866 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.TaskSchedulerImpl - Cancelling stage 0 26867 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.TaskSchedulerImpl - Killing all running tasks in stage 0: Stage cancelled 26869 [dispatcher-event-loop-0] INFO org.apache.spark.executor.Executor - Executor is trying to kill task 0.0 in stage 0.0 (TID 0), reason: Stage cancelled 26869 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.TaskSchedulerImpl - Stage 0 was cancelled 26870 [dag-scheduler-event-loop] INFO org.apache.spark.scheduler.DAGScheduler - ShuffleMapStage 0 (collect at DWDTenderLog.scala:54) failed in 14.660 s due to Job aborted due to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): java.io.FileNotFoundException: File does not exist: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: 26873 [main] INFO org.apache.spark.scheduler.DAGScheduler - Job 0 failed: collect at DWDTenderLog.scala:54, took 14.697048 s 26879 [Thread-1] INFO org.apache.spark.SparkContext - Invoking stop() from shutdown hook 26885 [Thread-1] INFO org.spark_project.jetty.server.AbstractConnector - Stopped Spark@4d63b624{HTTP/1.1,[http/1.1]}{0.0.0.0:4040} 26887 [Thread-1] INFO org.apache.spark.ui.SparkUI - Stopped Spark web UI at http://DESKTOP-E6TA5L3:4040 26896 [dispatcher-event-loop-1] INFO org.apache.spark.MapOutputTrackerMasterEndpoint - MapOutputTrackerMasterEndpoint stopped! Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 0.0 failed 1 times, most recent failure: Lost task 1.0 in stage 0.0 (TID 1, localhost, executor driver): java.io.FileNotFoundException: File does not exist: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1925) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1913) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1912) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1912) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:948) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2146) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2095) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2084) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126) at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:385) at org.apache.spark.rdd.RDD.collect(RDD.scala:989) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3389) at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788) at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2788) at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3369) at org.apache.spark.sql.Dataset.collect(Dataset.scala:2788) at com.gjr.recommend.DWDTenderLog$.main(DWDTenderLog.scala:54) at com.gjr.recommend.DWDTenderLog.main(DWDTenderLog.scala) Caused by: java.io.FileNotFoundException: File does not exist: hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3536622-8517450_20210126111049.parquet It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved. at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:177) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.scan_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithKeys_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55) at org.apache.spark.scheduler.Task.run(Task.scala:123) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 26908 [Thread-1] INFO org.apache.spark.storage.memory.MemoryStore - MemoryStore cleared 26908 [Thread-1] INFO org.apache.spark.storage.BlockManager - BlockManager stopped 26910 [Thread-1] INFO org.apache.spark.storage.BlockManagerMaster - BlockManagerMaster stopped 26911 [dispatcher-event-loop-0] INFO org.apache.spark.scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint - OutputCommitCoordinator stopped! 26915 [Thread-1] INFO org.apache.spark.SparkContext - Successfully stopped SparkContext 26916 [Thread-1] INFO org.apache.spark.util.ShutdownHookManager - Shutdown hook called 26916 [Thread-1] INFO org.apache.spark.util.ShutdownHookManager - Deleting directory C:\Users\lsh\AppData\Local\Temp\spark-b9c30c48-0f2d-46ff-adb2-44e702a4dc5a 26919 [Executor task launch worker for task 0] INFO org.apache.parquet.filter2.compat.FilterCompat - Filtering using predicate: and(and(and(and(noteq(createTime, null), noteq(antistop, null)), gteq(createTime, 1608280608479)), lteq(createTime, 1611628847000)), noteq(antistop, Binary{""})) Process finished with exit code 1 hdfs data~~~~~~~~~~~~~~~~~~~~~~~~~ [root@t1 ~]# hdfs dfs -ls hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/ SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/usr/local/modules/hadoop-2.8.5/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/usr/local/software/apache-tez-0.9.2-bin/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] Found 11 items -rw-r--r-- 3 root supergroup 93 2021-01-26 00:00 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/.hoodie_partition_metadata -rw-r--r-- 3 root supergroup 781714 2021-01-26 10:53 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3526941-8492659_20210126105334.parquet -rw-r--r-- 3 root supergroup 781786 2021-01-26 10:53 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527031-8492879_20210126105340.parquet -rw-r--r-- 3 root supergroup 781872 2021-01-26 10:53 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527113-8493091_20210126105354.parquet -rw-r--r-- 3 root supergroup 781938 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527261-8493515_20210126105409.parquet -rw-r--r-- 3 root supergroup 782011 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527346-8493730_20210126105414.parquet -rw-r--r-- 3 root supergroup 782106 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527439-8493953_20210126105420.parquet -rw-r--r-- 3 root supergroup 782214 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527532-8494176_20210126105434.parquet -rw-r--r-- 3 root supergroup 782287 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527617-8494391_20210126105444.parquet -rw-r--r-- 3 root supergroup 782368 2021-01-26 10:54 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527710-8494614_20210126105454.parquet -rw-r--r-- 3 root supergroup 782465 2021-01-26 10:55 hdfs://gongjiangren/data/app/dwd_recommend_tender_ds/partitionpath=2021/01/26/a685bd51-614f-48b0-a360-f09f28baae84-0_0-3527795-8494829_20210126105500.parquet ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org