This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new bf37edab37 get subfolder names transform hardening, doc updates. fixes 
#6771 (#6773)
bf37edab37 is described below

commit bf37edab3713195211ec7e5700d3e42f7470e7ea
Author: Bart Maertens <[email protected]>
AuthorDate: Fri Mar 13 08:52:17 2026 +0000

    get subfolder names transform hardening, doc updates. fixes #6771 (#6773)
---
 .../apache/hop/core/fileinput/FileInputList.java   | 18 ++++++------
 .../pages/pipeline/transforms/getsubfolders.adoc   | 32 ++++++++++++++++++++--
 .../transforms/getsubfolders/GetSubFolders.java    | 32 ++++++++++++++++++----
 .../getsubfolders/GetSubFoldersMeta.java           | 27 +++++++++++++++++-
 4 files changed, 91 insertions(+), 18 deletions(-)

diff --git 
a/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java 
b/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
index 4de6c55849..175e137b1f 100644
--- a/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
+++ b/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
@@ -341,10 +341,7 @@ public class FileInputList {
       }
 
       try (FileObject directoryFileObject = HopVfs.getFileObject(oneFile, 
variables)) {
-        // Find all folder names in this directory
-        //
-        if (directoryFileObject != null
-            && directoryFileObject.getType() == FileType.FOLDER) { // it's a 
directory
+        if (directoryFileObject != null && directoryFileObject.getType() == 
FileType.FOLDER) {
           FileObject[] fileObjects =
               directoryFileObject.findFiles(
                   new AllFileSelector() {
@@ -371,18 +368,19 @@ public class FileInputList {
               }
             }
           }
-          if (Utils.isEmpty(fileObjects) && oneRequired) {
-            fileInputList.addNonAccessibleFile(directoryFileObject);
-          }
 
-          // Sort the list: quicksort, only for regular files
           fileInputList.sortFiles();
-        } else {
-          if (oneRequired && (directoryFileObject == null || 
!directoryFileObject.exists())) {
+        } else if (oneRequired) {
+          if (directoryFileObject == null || !directoryFileObject.exists()) {
             fileInputList.addNonExistantFile(directoryFileObject);
+          } else {
+            fileInputList.addNonAccessibleFile(directoryFileObject);
           }
         }
       } catch (Exception e) {
+        if (oneRequired) {
+          fileInputList.addNonAccessibleFile(new 
NonAccessibleFileObject(oneFile));
+        }
         log.logError(Const.getStackTracker(e));
       }
       // Ignore
diff --git 
a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
 
b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
index 9914db1d6d..56683f40a5 100644
--- 
a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
+++ 
b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
@@ -25,8 +25,11 @@ under the License.
 |
 == Description
 
-The Get Subfolder Names transform gets subfolder names from a directory.
+The `Get Subfolder Names` transform recursively traverses one or more 
directories and outputs a row for every subfolder found.
+This includes all nested subfolders at any depth, not just the immediate 
children.
 
+The input directories can be configured statically in the transform's Folder 
tab, or provided dynamically from a field in a previous transform.
+Each subfolder produces one output row with metadata fields describing the 
folder.
 |
 == Supported Engines
 [%noheader,cols="2,1a",frame=none, role="table-supported-engines"]
@@ -63,4 +66,29 @@ The Get Subfolder Names transform gets subfolder names from 
a directory.
 |Include rownum in output?|Allows the row number to be added to the output.
 |Rownum fieldname|The field which contains the row number.
 |Limit|Limits the output rows.
-|===
\ No newline at end of file
+|===
+
+
+== Output fields
+[options="header"]
+|===
+|Field|Type|Description
+|`folderName`|String|Full path of the subfolder
+|`short_folderName`|String|Base name of the subfolder (last path segment)
+|`path`|String|Full path of the subfolder's parent directory
+|`ishidden`|Boolean|Whether the subfolder is hidden
+|`isreadable`|Boolean|Whether the subfolder is readable
+|`iswriteable`|Boolean|Whether the subfolder is writeable
+|`lastmodifiedtime`|Date|Last modified timestamp of the subfolder
+|`uri`|String|Full URI of the subfolder
+|`rooturi`|String|Root URI of the file system
+|`children`|Integer|Number of direct children (files and folders) in the 
subfolder
+|===
+
+== Cloud storage (VFS)
+This transform works with any file system supported through Apache Hop's VFS 
integration, including local file systems, S3, Azure Blob Storage, Google Cloud 
Storage, Google Drive, Dropbox and others.
+When working with cloud storage, be aware of the following:
+
+* The input directory must point to an actual folder, not a scheme root. For 
example, use `s3://my-bucket/my-folder` rather than `s3://` or `s3://my-bucket`.
+* Some metadata fields (`ishidden`, `isreadable`, `iswriteable`, 
`lastmodifiedtime`, `children`) may not be available for all cloud storage 
providers. When a value can't be determined, the field will be `null`. For 
example, `lastmodifiedtime` may not be available for virtual folders on S3 or 
MinIO.
+* The transform traverses the full folder tree recursively. On cloud storage 
with deeply nested or very large folder structures, this may result in a high 
number of API calls and slow performance.
diff --git 
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
 
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
index 3bca9561e5..46b3059816 100644
--- 
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
+++ 
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
@@ -122,16 +122,19 @@ public class GetSubFolders extends 
BaseTransform<GetSubFoldersMeta, GetSubFolder
         extraData[outputIndex++] = HopVfs.getFilename(data.file.getParent());
 
         // is hidden?
-        extraData[outputIndex++] = data.file.isHidden();
+        extraData[outputIndex++] = getFileAttributeSafe(() -> 
data.file.isHidden(), "isHidden");
 
         // is readable?
-        extraData[outputIndex++] = data.file.isReadable();
+        extraData[outputIndex++] = getFileAttributeSafe(() -> 
data.file.isReadable(), "isReadable");
 
         // is writeable?
-        extraData[outputIndex++] = data.file.isWriteable();
+        extraData[outputIndex++] =
+            getFileAttributeSafe(() -> data.file.isWriteable(), "isWriteable");
 
         // last modified time
-        extraData[outputIndex++] = new 
Date(data.file.getContent().getLastModifiedTime());
+        extraData[outputIndex++] =
+            getFileAttributeSafe(
+                () -> new Date(data.file.getContent().getLastModifiedTime()), 
"lastModifiedTime");
 
         // uri
         extraData[outputIndex++] = data.file.getName().getURI();
@@ -140,7 +143,8 @@ public class GetSubFolders extends 
BaseTransform<GetSubFoldersMeta, GetSubFolder
         extraData[outputIndex++] = data.file.getName().getRootURI();
 
         // nr of child files
-        extraData[outputIndex++] = (long) data.file.getChildren().length;
+        extraData[outputIndex++] =
+            getFileAttributeSafe(() -> (long) data.file.getChildren().length, 
"childrenCount");
 
         // See if we need to add the row number to the row...
         if (meta.isIncludeRowNumber() && 
!Utils.isEmpty(meta.getRowNumberField())) {
@@ -230,6 +234,24 @@ public class GetSubFolders extends 
BaseTransform<GetSubFoldersMeta, GetSubFolder
     }
   }
 
+  private <T> T getFileAttributeSafe(FileAttributeSupplier<T> supplier, String 
attributeName) {
+    try {
+      return supplier.get();
+    } catch (Exception e) {
+      if (isDebug()) {
+        logDebug(
+            "Could not determine ''{0}'' for {1}: {2}",
+            attributeName, data.file.getName().getFriendlyURI(), 
e.getMessage());
+      }
+      return null;
+    }
+  }
+
+  @FunctionalInterface
+  private interface FileAttributeSupplier<T> {
+    T get() throws Exception;
+  }
+
   @Override
   public boolean init() {
     if (super.init()) {
diff --git 
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
 
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
index dee9a669fa..17d6a10f89 100644
--- 
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
+++ 
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
@@ -79,6 +79,10 @@ public class GetSubFoldersMeta extends 
BaseTransformMeta<GetSubFolders, GetSubFo
   @HopMetadataProperty(key = "limit")
   private long rowLimit;
 
+  /** The name of the children count output field. Null means legacy default 
("childrens"). */
+  @HopMetadataProperty(key = "children_field")
+  private String childrenFieldName;
+
   public GetSubFoldersMeta() {
     super();
     files = new ArrayList<>();
@@ -91,6 +95,7 @@ public class GetSubFoldersMeta extends 
BaseTransformMeta<GetSubFolders, GetSubFo
     this.dynamicFolderNameField = m.dynamicFolderNameField;
     this.folderNameDynamic = m.folderNameDynamic;
     this.rowLimit = m.rowLimit;
+    this.childrenFieldName = m.childrenFieldName;
     m.files.forEach(f -> this.files.add(new GSFile(f)));
   }
 
@@ -105,6 +110,7 @@ public class GetSubFoldersMeta extends 
BaseTransformMeta<GetSubFolders, GetSubFo
     includeRowNumber = false;
     rowNumberField = "";
     dynamicFolderNameField = "";
+    childrenFieldName = "children";
   }
 
   @Override
@@ -168,7 +174,8 @@ public class GetSubFoldersMeta extends 
BaseTransformMeta<GetSubFolders, GetSubFo
     row.addValueMeta(rootUriValueMeta);
 
     // children
-    IValueMeta childrenValueMeta = new 
ValueMetaInteger(variables.resolve("childrens"));
+    String resolvedChildrenField = childrenFieldName != null ? 
childrenFieldName : "childrens";
+    IValueMeta childrenValueMeta = new 
ValueMetaInteger(variables.resolve(resolvedChildrenField));
     childrenValueMeta.setLength(IValueMeta.DEFAULT_INTEGER_LENGTH, 0);
     childrenValueMeta.setOrigin(name);
     row.addValueMeta(childrenValueMeta);
@@ -483,4 +490,22 @@ public class GetSubFoldersMeta extends 
BaseTransformMeta<GetSubFolders, GetSubFo
   public void setRowLimit(long rowLimit) {
     this.rowLimit = rowLimit;
   }
+
+  /**
+   * Gets childrenFieldName
+   *
+   * @return value of childrenFieldName
+   */
+  public String getChildrenFieldName() {
+    return childrenFieldName;
+  }
+
+  /**
+   * Sets childrenFieldName
+   *
+   * @param childrenFieldName value of childrenFieldName
+   */
+  public void setChildrenFieldName(String childrenFieldName) {
+    this.childrenFieldName = childrenFieldName;
+  }
 }

Reply via email to