Hi,all
两个job,都从同一个kafka读数据,一份写入hdfs,一份写入hive,都是分钟分区,并发都是200。运行一段时间后发现写hive要落后hdfs很多,而且hive任务对应的hdfs路径下,某一分区内的文件甚至跨度2个小时之久。大家遇到过这种情况没
附上对应ddl
hive:
CREATE EXTERNAL TABLE hive_table (
    log_timestamp BIGINT,
    ip STRING,
    `raw` STRING
) PARTITIONED BY (`day` STRING, `hour` STRING,`minute` STRING) STORED AS PARQUET
TBLPROPERTIES (
        'parquet.compression'='SNAPPY',
        'sink.partition-commit.policy.kind' = 'success-file',
        'sink.partition-commit.success-file.name' = '_SUCCESS'
);

Hdfs:

CREATE TABLE hdfs_table (
    log_timestamp BIGINT,
    ip STRING,
    `raw` STRING,
    `day` STRING, `hour` STRING,`minute` STRING
) PARTITIONED BY (`day` , `hour` ,`minute`) WITH (
    'connector'='filesystem',
    'path'='hdfs://xx/test.db/hdfs_table',
    'format'='parquet',
    'parquet.compression'='SNAPPY',
    'sink.partition-commit.policy.kind' = 'success-file’,
    'sink.partition-commit.success-file.name' = '_SUCCESS'
);


实际hdfs文件对比:

-rw-r--r--   3 hadoop hadoop    1514862 2020-11-26 09:26 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-150-824
-rw-r--r--   3 hadoop hadoop   10798011 2020-11-26 09:34 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-151-830
-rw-r--r--   3 hadoop hadoop    4002618 2020-11-26 09:35 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-151-831
-rw-r--r--   3 hadoop hadoop    8057522 2020-11-26 09:51 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-152-844
-rw-r--r--   3 hadoop hadoop    6675744 2020-11-26 09:52 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-152-845
-rw-r--r--   3 hadoop hadoop    4062571 2020-11-26 09:51 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-153-844
-rw-r--r--   3 hadoop hadoop   10247973 2020-11-26 09:52 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-153-845
-rw-r--r--   3 hadoop hadoop     483029 2020-11-26 09:53 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-153-846
-rw-r--r--   3 hadoop hadoop    9440221 2020-11-26 09:16 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-154-816
-rw-r--r--   3 hadoop hadoop    5346956 2020-11-26 09:17 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-154-817
-rw-r--r--   3 hadoop hadoop    4940718 2020-11-26 09:51 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-155-844
-rw-r--r--   3 hadoop hadoop    9687410 2020-11-26 09:52 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-155-845
-rw-r--r--   3 hadoop hadoop      51998 2020-11-26 09:53 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-155-846
-rw-r--r--   3 hadoop hadoop       3518 2020-11-26 09:37 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-156-833
-rw-r--r--   3 hadoop hadoop   13801987 2020-11-26 09:39 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-156-834
-rw-r--r--   3 hadoop hadoop     963288 2020-11-26 09:40 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-156-835
-rw-r--r--   3 hadoop hadoop    6036601 2020-11-26 09:27 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-157-825
-rw-r--r--   3 hadoop hadoop    8864235 2020-11-26 09:29 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-157-826
-rw-r--r--   3 hadoop hadoop   10865872 2020-11-26 09:37 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-158-833
-rw-r--r--   3 hadoop hadoop    4031077 2020-11-26 09:39 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-158-834
-rw-r--r--   3 hadoop hadoop     228350 2020-11-26 09:09 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-159-811
-rw-r--r--   3 hadoop hadoop   14661395 2020-11-26 09:11 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-159-812
-rw-r--r--   3 hadoop hadoop    5451995 2020-11-26 09:29 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-160-826
-rw-r--r--   3 hadoop hadoop    9149301 2020-11-26 09:30 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-160-827
-rw-r--r--   3 hadoop hadoop    4731543 2020-11-26 09:30 
hdfs://xxx/test.db/hive_table/day=2020-11-26/hour=08/minute=59/part-17bacc1b-cf96-463b-9cb7-aa318a05936c-161-827




-rw-r--r--   3 hadoop hadoop    5950562 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-17-1288
-rw-r--r--   3 hadoop hadoop    8922364 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-170-1287
-rw-r--r--   3 hadoop hadoop    5898257 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-170-1288
-rw-r--r--   3 hadoop hadoop    8848292 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-171-1287
-rw-r--r--   3 hadoop hadoop    5893106 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-171-1288
-rw-r--r--   3 hadoop hadoop    8905617 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-172-1287
-rw-r--r--   3 hadoop hadoop    5800338 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-172-1288
-rw-r--r--   3 hadoop hadoop    8914099 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-173-1287
-rw-r--r--   3 hadoop hadoop    5773258 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-173-1288
-rw-r--r--   3 hadoop hadoop    8950742 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-174-1287
-rw-r--r--   3 hadoop hadoop    5829613 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-174-1288
-rw-r--r--   3 hadoop hadoop    8808161 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-175-1287
-rw-r--r--   3 hadoop hadoop    5910085 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-175-1288
-rw-r--r--   3 hadoop hadoop    8871508 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-176-1287
-rw-r--r--   3 hadoop hadoop    5896191 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-176-1288
-rw-r--r--   3 hadoop hadoop    8855378 2020-11-26 08:59 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-177-1287
-rw-r--r--   3 hadoop hadoop    5857271 2020-11-26 09:00 
hdfs://xxx/test.db/hdfs_table/day=2020-11-26/hour=08/minute=59/part-633fa7d2-8d80-4e4d-ab90-f8b77c792b40-177-1288

回复