zeroshade commented on code in PR #916:
URL: https://github.com/apache/iceberg-go/pull/916#discussion_r3102013031


##########
table/orphan_cleanup.go:
##########
@@ -333,37 +325,15 @@ func makeFileWalkFunc(fn func(path string, info 
stdfs.FileInfo) error, pathTrans
        }
 }
 
-func walkDirectory(fsys iceio.IO, root string, fn func(path string, info 
stdfs.FileInfo) error) error {
-       switch v := fsys.(type) {
-       case iceio.LocalFS:
-               cleanRoot := strings.TrimPrefix(root, "file://")
-               if cleanRoot == "" {
-                       cleanRoot = "."
-               }
-
-               return filepath.WalkDir(cleanRoot, makeFileWalkFunc(fn, 
func(path string) string {
-                       return path
-               }))
-
-       default:
-               // For blob storage: direct field access since we know the 
structure
-               bucket := getBucketName(v)
-
-               parsed, err := url.Parse(root)
-               if err != nil {
-                       return fmt.Errorf("invalid URL %s: %w", root, err)
-               }
-
-               walkPath := strings.TrimPrefix(parsed.Path, "/")
-               if walkPath == "" {
-                       walkPath = "."
-               }
-
-               // URL transform - reconstruct full URL path
-               return stdfs.WalkDir(bucket, walkPath, makeFileWalkFunc(fn, 
func(path string) string {
-                       return parsed.Scheme + "://" + parsed.Host + "/" + path
-               }))
+func walkDirectory(fsys io.IO, root string, fn func(path string, info 
stdfs.FileInfo) error) error {

Review Comment:
   We should either land atomically with #917 or simply do #917 first and then 
incorporate it with #916 afterwards. I don't want to have the reflection-based 
fallback if we can avoid it.



##########
table/orphan_cleanup.go:
##########
@@ -308,14 +308,29 @@ func (t Table) scanFiles(fs iceio.IO, location string, 
cfg *orphanCleanupConfig)
        return allFiles, totalSize, nil
 }
 
-// getBucket gets the Bucket field from blob storage - absolute minimal 
approach
-func getBucketName(fsys iceio.IO) stdfs.FS {
-       v := reflect.ValueOf(fsys).Elem() // We know it's a pointer to struct
+// getBucketFS extracts the underlying Bucket (fs.FS) from blob-backed IO
+// implementations via reflection. This is a temporary fallback until all
+// cloud backends implement ListableIO (#917).
+func getBucketFS(fsys io.IO) (stdfs.FS, error) {
+       v := reflect.ValueOf(fsys)
+       if v.Kind() == reflect.Ptr {
+               v = v.Elem()
+       }
+
+       f := v.FieldByName("Bucket")
+       if !f.IsValid() {
+               return nil, fmt.Errorf("no bucket field found on %T", fsys)
+       }
+
+       bucket, ok := f.Interface().(stdfs.FS)
+       if !ok {
+               return nil, fmt.Errorf("bucket field on %T is not an fs.FS", 
fsys)
+       }
 
-       return v.FieldByName("Bucket").Interface().(stdfs.FS)
+       return bucket, nil
 }

Review Comment:
   instead of this, can we just add the bucket implementations in 
io/<cloud....> etc? Either that, or let's get the work in #917 done and merged 
first. Then come back to this. I don't like the intermediate here where we're 
peering behind the interface like this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to