petartushev commented on issue #11997:
URL: https://github.com/apache/iceberg/issues/11997#issuecomment-2649285762
@timoha upgraded the hadoop dependencies and my `pom.xml` file now looks
like this:
```
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core
-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql
-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.12 </artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql-kafka-0-10_2.12</artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-spark-runtime-3.5_2.12</artifactId>
<version>1.7.0</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-core -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-core</artifactId>
<version>1.7.0</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/org.apache.iceberg/iceberg-aws-bundle -->
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-aws-bundle</artifactId>
<version>1.7.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>3.5.1</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/org.apache.spark/spark-hadoop-cloud -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hadoop-cloud_2.12</artifactId>
<version>3.5.3</version>
</dependency>
<!--
https://mvnrepository.com/artifact/org.apache.spark/spark-common-utils -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-common-utils_2.12</artifactId>
<version>3.5.3</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/org.apache.hive/hive-metastore -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>3.1.3</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
</exclusion>
</exclusions>
</dependency>
<!--
https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.18.2</version>
<scope>provided</scope>
</dependency>
<!-- AWS SDK Core (required by the S3 SDK) -->
<!--
https://mvnrepository.com/artifact/software.amazon.awssdk/url-connection-client
-->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>url-connection-client</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.4.1</version> <!-- Ensure the version is compatible
with your Flink version -->
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>3.4.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.4.1</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/s3 -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/sts
-->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>sts</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/glue
-->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>glue</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/software.amazon.awssdk/protocol-core -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>protocol-core</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/utils
-->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>utils</artifactId>
<version>2.30.16</version>
</dependency>
<!--
https://mvnrepository.com/artifact/software.amazon.awssdk/aws-core -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>aws-core</artifactId>
<version>2.30.16</version>
</dependency>
<!--
https://mvnrepository.com/artifact/software.amazon.awssdk/dynamodb -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>dynamodb</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/software.amazon.awssdk/kms
-->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>kms</artifactId>
<version>2.30.16</version>
<scope>provided</scope>
</dependency>
<!--
https://mvnrepository.com/artifact/io.github.cdimascio/dotenv-java -->
<dependency>
<groupId>io.github.cdimascio</groupId>
<artifactId>dotenv-java</artifactId>
<version>3.0.2</version>
</dependency>
</dependencies>
```
Also I set your suggested options and my original issue is solved (I
presume), but now I'm getting an error with this stacktrace:
```
Caused by: software.amazon.awssdk.services.s3.model.S3Exception: The AWS
Access Key Id you provided does not exist in our records. (Service: S3, Status
Code: 403, Request ID: 6ZZMT4KJZXHSNQSG, Extended Request ID:
4crEDh1G8LkL0TgG3BvoUK9WOIXHSzl46Xa9iAVF8D7F78fpbm0WEJkPFufDl3yrHLuLIXFZ8hKewnjR+dby3+gUgiorE+Fr)
at
software.amazon.awssdk.protocols.xml.internal.unmarshall.AwsXmlPredicatedResponseHandler.handleErrorResponse(AwsXmlPredicatedResponseHandler.java:156)
at
software.amazon.awssdk.protocols.xml.internal.unmarshall.AwsXmlPredicatedResponseHandler.handleResponse(AwsXmlPredicatedResponseHandler.java:108)
at
software.amazon.awssdk.protocols.xml.internal.unmarshall.AwsXmlPredicatedResponseHandler.handle(AwsXmlPredicatedResponseHandler.java:85)
at
software.amazon.awssdk.protocols.xml.internal.unmarshall.AwsXmlPredicatedResponseHandler.handle(AwsXmlPredicatedResponseHandler.java:43)
at
software.amazon.awssdk.awscore.client.handler.AwsSyncClientHandler$Crc32ValidationResponseHandler.handle(AwsSyncClientHandler.java:93)
at
software.amazon.awssdk.core.internal.handler.BaseClientHandler.lambda$successTransformationResponseHandler$7(BaseClientHandler.java:279)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.HandleResponseStage.execute(HandleResponseStage.java:50)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.HandleResponseStage.execute(HandleResponseStage.java:38)
at
software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptTimeoutTrackingStage.execute(ApiCallAttemptTimeoutTrackingStage.java:74)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptTimeoutTrackingStage.execute(ApiCallAttemptTimeoutTrackingStage.java:43)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.TimeoutExceptionHandlingStage.execute(TimeoutExceptionHandlingStage.java:79)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.TimeoutExceptionHandlingStage.execute(TimeoutExceptionHandlingStage.java:41)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptMetricCollectionStage.execute(ApiCallAttemptMetricCollectionStage.java:55)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallAttemptMetricCollectionStage.execute(ApiCallAttemptMetricCollectionStage.java:39)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.RetryableStage2.executeRequest(RetryableStage2.java:93)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.RetryableStage2.execute(RetryableStage2.java:56)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.RetryableStage2.execute(RetryableStage2.java:36)
at
software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
at
software.amazon.awssdk.core.internal.http.StreamManagingStage.execute(StreamManagingStage.java:53)
at
software.amazon.awssdk.core.internal.http.StreamManagingStage.execute(StreamManagingStage.java:35)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.executeWithTimer(ApiCallTimeoutTrackingStage.java:82)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.execute(ApiCallTimeoutTrackingStage.java:62)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallTimeoutTrackingStage.execute(ApiCallTimeoutTrackingStage.java:43)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallMetricCollectionStage.execute(ApiCallMetricCollectionStage.java:50)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ApiCallMetricCollectionStage.execute(ApiCallMetricCollectionStage.java:32)
at
software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
at
software.amazon.awssdk.core.internal.http.pipeline.RequestPipelineBuilder$ComposingRequestPipelineStage.execute(RequestPipelineBuilder.java:206)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ExecutionFailureExceptionReportingStage.execute(ExecutionFailureExceptionReportingStage.java:37)
at
software.amazon.awssdk.core.internal.http.pipeline.stages.ExecutionFailureExceptionReportingStage.execute(ExecutionFailureExceptionReportingStage.java:26)
at
software.amazon.awssdk.core.internal.http.AmazonSyncHttpClient$RequestExecutionBuilderImpl.execute(AmazonSyncHttpClient.java:210)
at
software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.invoke(BaseSyncClientHandler.java:103)
at
software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.doExecute(BaseSyncClientHandler.java:173)
at
software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.lambda$execute$1(BaseSyncClientHandler.java:80)
at
software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.measureApiCallSuccess(BaseSyncClientHandler.java:182)
at
software.amazon.awssdk.core.internal.handler.BaseSyncClientHandler.execute(BaseSyncClientHandler.java:74)
at
software.amazon.awssdk.core.client.handler.SdkSyncClientHandler.execute(SdkSyncClientHandler.java:45)
at
software.amazon.awssdk.awscore.client.handler.AwsSyncClientHandler.execute(AwsSyncClientHandler.java:53)
at
software.amazon.awssdk.services.s3.DefaultS3Client.putObject(DefaultS3Client.java:10595)
at
software.amazon.awssdk.services.s3.DelegatingS3Client.lambda$putObject$83(DelegatingS3Client.java:8604)
at
software.amazon.awssdk.services.s3.internal.crossregion.S3CrossRegionSyncClient.invokeOperation(S3CrossRegionSyncClient.java:74)
at
software.amazon.awssdk.services.s3.DelegatingS3Client.putObject(DelegatingS3Client.java:8604)
at
org.apache.iceberg.aws.s3.S3OutputStream.completeUploads(S3OutputStream.java:444)
at
org.apache.iceberg.aws.s3.S3OutputStream.close(S3OutputStream.java:270)
at
org.apache.iceberg.aws.s3.S3OutputStream.close(S3OutputStream.java:256)
at
org.apache.iceberg.shaded.org.apache.parquet.io.DelegatingPositionOutputStream.close(DelegatingPositionOutputStream.java:38)
at
org.apache.iceberg.shaded.org.apache.parquet.hadoop.ParquetFileWriter.end(ParquetFileWriter.java:1204)
at
org.apache.iceberg.parquet.ParquetWriter.close(ParquetWriter.java:257)
at org.apache.iceberg.io.DataWriter.close(DataWriter.java:82)
at
org.apache.iceberg.io.RollingFileWriter.closeCurrentWriter(RollingFileWriter.java:122)
at
org.apache.iceberg.io.RollingFileWriter.close(RollingFileWriter.java:147)
at
org.apache.iceberg.io.RollingDataWriter.close(RollingDataWriter.java:32)
at
org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.close(SparkWrite.java:747)
at
org.apache.iceberg.spark.source.SparkWrite$UnpartitionedDataWriter.commit(SparkWrite.java:729)
at
org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.$anonfun$run$5(WriteToDataSourceV2Exec.scala:475)
at
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
at
org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run(WriteToDataSourceV2Exec.scala:491)
at
org.apache.spark.sql.execution.datasources.v2.WritingSparkTask.run$(WriteToDataSourceV2Exec.scala:430)
at
org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTask$.run(WriteToDataSourceV2Exec.scala:496)
at
org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.$anonfun$writeWithV2$2(WriteToDataSourceV2Exec.scala:393)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at
org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
```
I created my own keys in the minio client, and I also tried with the root
keys specified in `docker-compose.yml`, but the latest issue persists.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]