Copilot commented on code in PR #4004:
URL: https://github.com/apache/texera/pull/4004#discussion_r2464472200


##########
frontend/src/app/dashboard/service/user/dataset/dataset.service.ts:
##########
@@ -227,15 +236,47 @@ export class DatasetService {
               totalTime: 0,
             });
 
-            // Keep track of all uploaded parts
-            const uploadedParts: { PartNumber: number; ETag: string }[] = [];
+            // Use expand to recursively fetch URL batches on-demand
+            const totalBatches = Math.ceil(partCount / urlBatchSize);
+
+            const initialState = {
+              urls: presignedUrls,
+              nextBatch: 2, // First batch generates by lakefs

Review Comment:
   Corrected grammar in comment: 'First batch generates by lakefs' should be 
'First batch generated by lakefs'.
   ```suggestion
                 nextBatch: 2, // First batch generated by lakefs
   ```



##########
file-service/src/main/scala/org/apache/texera/service/util/S3StorageClient.scala:
##########
@@ -139,4 +159,60 @@ object S3StorageClient {
       s3Client.deleteObjects(deleteObjectsRequest)
     }
   }
+
+  /**
+    * Generates presigned URLs for multipart upload parts.
+    *
+    * @param physicalAddress The S3 object URI
+    * @param s3UploadId The multipart upload ID
+    * @param partNumbers List of part numbers to sign
+    * @param expiryHours URL expiration time in hours (default: 2)
+    * @return Map of part number to presigned URL
+    */
+  def presignUploadParts(
+      physicalAddress: String,
+      s3UploadId: String,
+      partNumbers: List[Int],
+      expiryHours: Int = 2
+  ): Map[Int, String] = {
+
+    val (bucket, key) = extractFromUri(physicalAddress)
+    partNumbers.map { partNumber =>
+      val presignRequest = UploadPartPresignRequest
+        .builder()
+        .signatureDuration(Duration.ofHours(expiryHours.toLong))
+        .uploadPartRequest(
+          UploadPartRequest
+            .builder()
+            .bucket(bucket)
+            .key(key)
+            .uploadId(s3UploadId)
+            .partNumber(partNumber)
+            .build()
+        )
+        .build()
+
+      partNumber -> 
s3Presigner.presignUploadPart(presignRequest).url().toString
+    }.toMap
+  }
+
+  private def extractFromUri(physicalAddress: String): (String, String) = {
+    val uri = new URI(physicalAddress)
+    val path = uri.getPath.stripPrefix("/")
+
+    if (uri.getScheme == "s3") {
+      // S3 URI format (e.g., "s3://my-bucket/path/to/key"); The host is the 
bucket name.

Review Comment:
   The semicolon after the example should be replaced with a period for proper 
punctuation. The sentence 'The host is the bucket name.' is a separate sentence 
and should follow standard sentence separation.
   ```suggestion
         // S3 URI format (e.g., "s3://my-bucket/path/to/key"). The host is the 
bucket name.
   ```



##########
file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala:
##########
@@ -678,16 +678,55 @@ class DatasetResource {
             filePath,
             numPartsValue
           )
+
+          val urlsMap = 
presignedResponse.getPresignedUrls.asScala.toList.zipWithIndex.map {
+            case (url, idx) => (idx + 1) -> url
+          }.toMap
+
           Response
             .ok(
               Map(
                 "uploadId" -> presignedResponse.getUploadId,
-                "presignedUrls" -> presignedResponse.getPresignedUrls,
+                "presignedUrls" -> urlsMap,
                 "physicalAddress" -> presignedResponse.getPhysicalAddress
               )
             )
             .build()
 
+        case "sign" =>
+          val pendingParts = payload.get("pendingParts") match {
+            case Some(rawList: List[_]) =>
+              rawList.map {
+                case i: Int    => i
+                case d: Double => d.toInt
+                case s: String => s.toInt
+              }
+            case _ => throw new BadRequestException("pendingParts required")

Review Comment:
   The error message 'pendingParts required' is ambiguous. Consider making it 
more descriptive, e.g., 'pendingParts is required and must be a list'.
   ```suggestion
               case _ => throw new BadRequestException("pendingParts is 
required and must be a list of integers")
   ```



##########
file-service/src/main/scala/org/apache/texera/service/resource/DatasetResource.scala:
##########
@@ -678,16 +678,55 @@ class DatasetResource {
             filePath,
             numPartsValue
           )
+
+          val urlsMap = 
presignedResponse.getPresignedUrls.asScala.toList.zipWithIndex.map {
+            case (url, idx) => (idx + 1) -> url
+          }.toMap
+
           Response
             .ok(
               Map(
                 "uploadId" -> presignedResponse.getUploadId,
-                "presignedUrls" -> presignedResponse.getPresignedUrls,
+                "presignedUrls" -> urlsMap,
                 "physicalAddress" -> presignedResponse.getPhysicalAddress
               )
             )
             .build()
 
+        case "sign" =>
+          val pendingParts = payload.get("pendingParts") match {
+            case Some(rawList: List[_]) =>
+              rawList.map {
+                case i: Int    => i
+                case d: Double => d.toInt
+                case s: String => s.toInt
+              }
+            case _ => throw new BadRequestException("pendingParts required")
+          }
+
+          val physicalAddressValue = payload.get("physicalAddress") match {
+            case Some(addr: String) => addr
+            case _                  => throw new 
BadRequestException("physicalAddress required")

Review Comment:
   The error message 'physicalAddress required' is inconsistent with similar 
messages. Consider using 'physicalAddress is required and must be a string' for 
clarity and consistency.
   ```suggestion
               case _                  => throw new 
BadRequestException("physicalAddress is required and must be a string")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to