[ https://issues.apache.org/jira/browse/HADOOP-16942?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17067701#comment-17067701 ]
vijayant soni commented on HADOOP-16942: ---------------------------------------- As per https://issues.apache.org/jira/browse/HADOOP-13811 moving to V2 list api should resolve this. There are many delete markers still created in Hadoop 3.2.1. We haven't ran the app which throws above error in Hadoop 3.2.1 but we do see many delete markers at folder level in Hadoop 3 (details in description). > S3A creating folder level delete markers > ---------------------------------------- > > Key: HADOOP-16942 > URL: https://issues.apache.org/jira/browse/HADOOP-16942 > Project: Hadoop Common > Issue Type: Task > Components: fs/s3 > Affects Versions: 3.2.1 > Reporter: vijayant soni > Priority: Minor > > Using S3A URL scheme while writing out data from Spark to S3 is creating many > folder level delete markers. > Writing the same with S3 URL scheme, does not create any delete markers at > all. > > Spark - 2.4.4 > Hadoop - 3.2.1 > EMR version - 6.0.0 > {code:scala} > spark-shell > Welcome to > ____ __ > / __/__ ___ _____/ /__ > _\ \/ _ \/ _ `/ __/ '_/ > /___/ .__/\_,_/_/ /_/\_\ version 2.4.4 > /_/ > > Using Scala version 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_242) > Type in expressions to have them evaluated. > Type :help for more information. > scala> val df = spark.sql("select 1 as a") > df: org.apache.spark.sql.DataFrame = [a: int] > scala> df.show(false) > +---+ > > |a | > +---+ > |1 | > +---+ > scala> // Writing to S3 using s3 > scala> > df.write.mode(org.apache.spark.sql.SaveMode.Overwrite).save("s3://stage-dwh/tmp/vijayant/s3/") > > > scala> // Writing to S3 using s3a > scala> > df.write.mode(org.apache.spark.sql.SaveMode.Overwrite).save("s3a://stage-dwh/tmp/vijayant/s3a/") > > > scala> > {code} > Getting delete markers from `s3` write > {code:bash} > aws s3api list-object-versions --bucket stage-dwh --prefix tmp/vijayant/s3 > { > "Versions": [ > { > "LastModified": "2020-03-26T12:57:54.000Z", > "VersionId": "h7_SIsHYoC.1il2s4qporAFnVbLgiLN5", > "ETag": "\"d41d8cd98f00b204e9800998ecf8427e\"", > "StorageClass": "STANDARD", > "Key": "tmp/vijayant/s3/_SUCCESS", > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "Size": 0 > }, > { > "LastModified": "2020-03-26T12:57:54.000Z", > "VersionId": "pOALzyzpBR7glCEk3cqPOR.u8QCIcLnC", > "ETag": "\"26e70a1e26c709e3e8498acd49cfaaa3-1\"", > "StorageClass": "STANDARD", > "Key": > "tmp/vijayant/s3/part-00000-9af16781-7944-497d-9b19-f31ab1e5f850-c000.snappy.parquet", > > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "Size": 384 > } > ] > } > {code} > Getting delete markers from `s3a` write > {code:bash} > aws s3api list-object-versions --bucket stage-dwh --prefix tmp/vijayant/s3a > { > "DeleteMarkers": [ > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "Jd8PHlUK3TbVJY2RWQxi74a6.2Gp2mUL", > "Key": "tmp/vijayant/s3a/", > "LastModified": "2020-03-26T13:00:14.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "j2llpkiXzEaomJr5xLhQ9xTmfoq_8dOy", > "Key": "tmp/vijayant/s3a/", > "LastModified": "2020-03-26T13:00:13.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "ry6BqTUGvyY3U.eqFfgg2hJ2BBMxVcwH", > "Key": "tmp/vijayant/s3a/", > "LastModified": "2020-03-26T13:00:13.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "xG2oNiCpovqWCSZxaWiqtL.E7znE7AmR", > "Key": "tmp/vijayant/s3a/", > "LastModified": "2020-03-26T13:00:12.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "OedjkEU1VeWl0ZZouur.13dufhYa7JXm", > "Key": "tmp/vijayant/s3a/", > "LastModified": "2020-03-26T13:00:01.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "Nn8O947GGwqCePelc9VL9O2sWsmSsy2i", > "Key": "tmp/vijayant/s3a/_temporary/", > "LastModified": "2020-03-26T13:00:13.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "7DcZ4b3pmoIi_TuzoRsykdtzyUGDLUo9", > "Key": "tmp/vijayant/s3a/_temporary/", > "LastModified": "2020-03-26T13:00:12.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "eFahQrYnglWeRHZHTod6IszSoNE3jPCH", > "Key": "tmp/vijayant/s3a/_temporary/", > "LastModified": "2020-03-26T13:00:01.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "m4mGr.QA3sO0pQb_tuZEZX6OVIeprgwl", > "Key": "tmp/vijayant/s3a/_temporary/0/", > "LastModified": "2020-03-26T13:00:13.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "2TBn8RzdMKzEqn6cP8O_CI9OdZkhvv53", > "Key": "tmp/vijayant/s3a/_temporary/0/", > "LastModified": "2020-03-26T13:00:12.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "NXVubIX_eu9RYLDWpD4JH91VK08OmHwu", > "Key": "tmp/vijayant/s3a/_temporary/0/_temporary/", > "LastModified": "2020-03-26T13:00:13.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "FtubGslxkfMiT5uxuuEorWsg0OIvXmzY", > "Key": "tmp/vijayant/s3a/_temporary/0/_temporary/", > "LastModified": "2020-03-26T13:00:12.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "0tv9I0s1mvurxP4KX_Zgqr7P8OQ5bIs7", > "Key": > "tmp/vijayant/s3a/_temporary/0/_temporary/attempt_20200326130000_0002_m_000000_2/", > > "LastModified": "2020-03-26T13:00:14.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "VersionId": "y058RX4xXC.a_ltup_OxdI7S5o288h38", > "Key": > "tmp/vijayant/s3a/_temporary/0/_temporary/attempt_20200326130000_0002_m_000000_2/", > > "LastModified": "2020-03-26T13:00:12.000Z" > }, > { > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "VersionId": "T54GB8P1SVmWUu_9lXXogf970cYZOszE", > "Key": > "tmp/vijayant/s3a/_temporary/0/_temporary/attempt_20200326130000_0002_m_000000_2/part-00000-8ecb77ed-8279-4256-9ef1-5ea352318c1a-c000.snappy.parquet", > > "LastModified": "2020-03-26T13:00:13.000Z" > } > ], > "Versions": [ > { > "LastModified": "2020-03-26T13:00:14.000Z", > "VersionId": "3HSpCqBQyrVoh9X1tTfskNEiQIet7f_0", > "ETag": "\"d41d8cd98f00b204e9800998ecf8427e\"", > "StorageClass": "STANDARD", > "Key": "tmp/vijayant/s3a/_SUCCESS", > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "Size": 0 > }, > { > "LastModified": "2020-03-26T13:00:00.000Z", > "VersionId": "zB.ELKr2RcK9RgdSgx5wwj55YPlZTWD0", > "ETag": "\"d41d8cd98f00b204e9800998ecf8427e\"", > "StorageClass": "STANDARD", > "Key": "tmp/vijayant/s3a/_temporary/0/", > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "Size": 0 > }, > { > "LastModified": "2020-03-26T13:00:13.000Z", > "VersionId": "XoIDfWRP0Y6DySn_FVkh3z.LCSCv1H4x", > "ETag": "\"d41d8cd98f00b204e9800998ecf8427e\"", > "StorageClass": "STANDARD", > "Key": > "tmp/vijayant/s3a/_temporary/0/_temporary/attempt_20200326130000_0002_m_000000_2/", > > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "Size": 0 > }, > { > "LastModified": "2020-03-26T13:00:12.000Z", > "VersionId": "OL24nTI4C0DJFur6ZfXeWFH1N_eo.SIl", > "ETag": "\"1c1179f44b770f1d661f06b9324c27da\"", > "StorageClass": "STANDARD", > "Key": > "tmp/vijayant/s3a/_temporary/0/_temporary/attempt_20200326130000_0002_m_000000_2/part-00000-8ecb77ed-8279-4256-9ef1-5ea352318c1a-c000.snappy.parquet", > > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": false, > "Size": 384 > }, > { > "LastModified": "2020-03-26T13:00:13.000Z", > "VersionId": "EscpHQeMrYBhDGdmnH5TPEDLpLUwZzBS", > "ETag": "\"1c1179f44b770f1d661f06b9324c27da\"", > "StorageClass": "STANDARD", > "Key": > "tmp/vijayant/s3a/part-00000-8ecb77ed-8279-4256-9ef1-5ea352318c1a-c000.snappy.parquet", > > "Owner": { > "DisplayName": "<display-name>", > "ID": "<owner-id>" > }, > "IsLatest": true, > "Size": 384 > } > ] > } > {code} > This in turn makes listing objects slow and we have even noticed timeouts due > to too many delete markers. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org