aicam commented on code in PR #4136:
URL: https://github.com/apache/texera/pull/4136#discussion_r2641208253
##########
common/workflow-core/src/main/scala/org/apache/texera/service/util/S3StorageClient.scala:
##########
@@ -259,4 +262,79 @@ object S3StorageClient {
DeleteObjectRequest.builder().bucket(bucketName).key(objectKey).build()
)
}
+ def uploadPart(
Review Comment:
Add comment and correct formatting to this function
##########
common/workflow-core/src/main/scala/org/apache/texera/service/util/S3StorageClient.scala:
##########
@@ -259,4 +262,79 @@ object S3StorageClient {
DeleteObjectRequest.builder().bucket(bucketName).key(objectKey).build()
)
}
+ def uploadPart(
+ bucket: String,
+ key: String,
+ uploadId: String,
+ partNumber: Int,
+ inputStream: InputStream,
+ contentLength: Option[Long]
+ ): Unit = {
+ val body: RequestBody = contentLength match {
Review Comment:
We need streaming here, it just read all bytes at once
##########
file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala:
##########
@@ -639,139 +643,103 @@ class DatasetResource {
@QueryParam("type") operationType: String,
@QueryParam("ownerEmail") ownerEmail: String,
@QueryParam("datasetName") datasetName: String,
- @QueryParam("filePath") encodedUrl: String,
- @QueryParam("uploadId") uploadId: Optional[String],
+ @QueryParam("filePath") filePath: String,
@QueryParam("numParts") numParts: Optional[Integer],
- payload: Map[
- String,
- Any
- ], // Expecting {"parts": [...], "physicalAddress": "s3://bucket/path"}
@Auth user: SessionUser
): Response = {
val uid = user.getUid
+ val dataset: Dataset = getDatasetBy(ownerEmail, datasetName)
+
+ operationType.toLowerCase match {
+ case "init" => initMultipartUpload(dataset.getDid, filePath, numParts,
uid)
+ case "finish" => finishMultipartUpload(dataset.getDid, filePath, uid)
+ case "abort" => abortMultipartUpload(dataset.getDid, filePath, uid)
+ case _ =>
+ throw new BadRequestException("Invalid type parameter. Use 'init',
'finish', or 'abort'.")
+ }
+ }
- withTransaction(context) { ctx =>
- val dataset = context
- .select(DATASET.fields: _*)
- .from(DATASET)
- .leftJoin(USER)
- .on(USER.UID.eq(DATASET.OWNER_UID))
- .where(USER.EMAIL.eq(ownerEmail))
- .and(DATASET.NAME.eq(datasetName))
- .fetchOneInto(classOf[Dataset])
- if (dataset == null || !userHasWriteAccess(ctx, dataset.getDid, uid)) {
- throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE)
- }
+ @POST
+ @RolesAllowed(Array("REGULAR", "ADMIN"))
+ @Path("/multipart-upload/part")
+ @Consumes(Array(MediaType.APPLICATION_OCTET_STREAM))
+ def uploadPart(
+ @QueryParam("ownerEmail") ownerEmail: String,
Review Comment:
Why do we need ownerEmail here? we already have user id using their token
and can be fetched, please move these queries to request body as JSON
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]