This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new bf37edab37 get subfolder names transform hardening, doc updates. fixes
#6771 (#6773)
bf37edab37 is described below
commit bf37edab3713195211ec7e5700d3e42f7470e7ea
Author: Bart Maertens <[email protected]>
AuthorDate: Fri Mar 13 08:52:17 2026 +0000
get subfolder names transform hardening, doc updates. fixes #6771 (#6773)
---
.../apache/hop/core/fileinput/FileInputList.java | 18 ++++++------
.../pages/pipeline/transforms/getsubfolders.adoc | 32 ++++++++++++++++++++--
.../transforms/getsubfolders/GetSubFolders.java | 32 ++++++++++++++++++----
.../getsubfolders/GetSubFoldersMeta.java | 27 +++++++++++++++++-
4 files changed, 91 insertions(+), 18 deletions(-)
diff --git
a/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
b/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
index 4de6c55849..175e137b1f 100644
--- a/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
+++ b/core/src/main/java/org/apache/hop/core/fileinput/FileInputList.java
@@ -341,10 +341,7 @@ public class FileInputList {
}
try (FileObject directoryFileObject = HopVfs.getFileObject(oneFile,
variables)) {
- // Find all folder names in this directory
- //
- if (directoryFileObject != null
- && directoryFileObject.getType() == FileType.FOLDER) { // it's a
directory
+ if (directoryFileObject != null && directoryFileObject.getType() ==
FileType.FOLDER) {
FileObject[] fileObjects =
directoryFileObject.findFiles(
new AllFileSelector() {
@@ -371,18 +368,19 @@ public class FileInputList {
}
}
}
- if (Utils.isEmpty(fileObjects) && oneRequired) {
- fileInputList.addNonAccessibleFile(directoryFileObject);
- }
- // Sort the list: quicksort, only for regular files
fileInputList.sortFiles();
- } else {
- if (oneRequired && (directoryFileObject == null ||
!directoryFileObject.exists())) {
+ } else if (oneRequired) {
+ if (directoryFileObject == null || !directoryFileObject.exists()) {
fileInputList.addNonExistantFile(directoryFileObject);
+ } else {
+ fileInputList.addNonAccessibleFile(directoryFileObject);
}
}
} catch (Exception e) {
+ if (oneRequired) {
+ fileInputList.addNonAccessibleFile(new
NonAccessibleFileObject(oneFile));
+ }
log.logError(Const.getStackTracker(e));
}
// Ignore
diff --git
a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
index 9914db1d6d..56683f40a5 100644
---
a/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
+++
b/docs/hop-user-manual/modules/ROOT/pages/pipeline/transforms/getsubfolders.adoc
@@ -25,8 +25,11 @@ under the License.
|
== Description
-The Get Subfolder Names transform gets subfolder names from a directory.
+The `Get Subfolder Names` transform recursively traverses one or more
directories and outputs a row for every subfolder found.
+This includes all nested subfolders at any depth, not just the immediate
children.
+The input directories can be configured statically in the transform's Folder
tab, or provided dynamically from a field in a previous transform.
+Each subfolder produces one output row with metadata fields describing the
folder.
|
== Supported Engines
[%noheader,cols="2,1a",frame=none, role="table-supported-engines"]
@@ -63,4 +66,29 @@ The Get Subfolder Names transform gets subfolder names from
a directory.
|Include rownum in output?|Allows the row number to be added to the output.
|Rownum fieldname|The field which contains the row number.
|Limit|Limits the output rows.
-|===
\ No newline at end of file
+|===
+
+
+== Output fields
+[options="header"]
+|===
+|Field|Type|Description
+|`folderName`|String|Full path of the subfolder
+|`short_folderName`|String|Base name of the subfolder (last path segment)
+|`path`|String|Full path of the subfolder's parent directory
+|`ishidden`|Boolean|Whether the subfolder is hidden
+|`isreadable`|Boolean|Whether the subfolder is readable
+|`iswriteable`|Boolean|Whether the subfolder is writeable
+|`lastmodifiedtime`|Date|Last modified timestamp of the subfolder
+|`uri`|String|Full URI of the subfolder
+|`rooturi`|String|Root URI of the file system
+|`children`|Integer|Number of direct children (files and folders) in the
subfolder
+|===
+
+== Cloud storage (VFS)
+This transform works with any file system supported through Apache Hop's VFS
integration, including local file systems, S3, Azure Blob Storage, Google Cloud
Storage, Google Drive, Dropbox and others.
+When working with cloud storage, be aware of the following:
+
+* The input directory must point to an actual folder, not a scheme root. For
example, use `s3://my-bucket/my-folder` rather than `s3://` or `s3://my-bucket`.
+* Some metadata fields (`ishidden`, `isreadable`, `iswriteable`,
`lastmodifiedtime`, `children`) may not be available for all cloud storage
providers. When a value can't be determined, the field will be `null`. For
example, `lastmodifiedtime` may not be available for virtual folders on S3 or
MinIO.
+* The transform traverses the full folder tree recursively. On cloud storage
with deeply nested or very large folder structures, this may result in a high
number of API calls and slow performance.
diff --git
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
index 3bca9561e5..46b3059816 100644
---
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
+++
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFolders.java
@@ -122,16 +122,19 @@ public class GetSubFolders extends
BaseTransform<GetSubFoldersMeta, GetSubFolder
extraData[outputIndex++] = HopVfs.getFilename(data.file.getParent());
// is hidden?
- extraData[outputIndex++] = data.file.isHidden();
+ extraData[outputIndex++] = getFileAttributeSafe(() ->
data.file.isHidden(), "isHidden");
// is readable?
- extraData[outputIndex++] = data.file.isReadable();
+ extraData[outputIndex++] = getFileAttributeSafe(() ->
data.file.isReadable(), "isReadable");
// is writeable?
- extraData[outputIndex++] = data.file.isWriteable();
+ extraData[outputIndex++] =
+ getFileAttributeSafe(() -> data.file.isWriteable(), "isWriteable");
// last modified time
- extraData[outputIndex++] = new
Date(data.file.getContent().getLastModifiedTime());
+ extraData[outputIndex++] =
+ getFileAttributeSafe(
+ () -> new Date(data.file.getContent().getLastModifiedTime()),
"lastModifiedTime");
// uri
extraData[outputIndex++] = data.file.getName().getURI();
@@ -140,7 +143,8 @@ public class GetSubFolders extends
BaseTransform<GetSubFoldersMeta, GetSubFolder
extraData[outputIndex++] = data.file.getName().getRootURI();
// nr of child files
- extraData[outputIndex++] = (long) data.file.getChildren().length;
+ extraData[outputIndex++] =
+ getFileAttributeSafe(() -> (long) data.file.getChildren().length,
"childrenCount");
// See if we need to add the row number to the row...
if (meta.isIncludeRowNumber() &&
!Utils.isEmpty(meta.getRowNumberField())) {
@@ -230,6 +234,24 @@ public class GetSubFolders extends
BaseTransform<GetSubFoldersMeta, GetSubFolder
}
}
+ private <T> T getFileAttributeSafe(FileAttributeSupplier<T> supplier, String
attributeName) {
+ try {
+ return supplier.get();
+ } catch (Exception e) {
+ if (isDebug()) {
+ logDebug(
+ "Could not determine ''{0}'' for {1}: {2}",
+ attributeName, data.file.getName().getFriendlyURI(),
e.getMessage());
+ }
+ return null;
+ }
+ }
+
+ @FunctionalInterface
+ private interface FileAttributeSupplier<T> {
+ T get() throws Exception;
+ }
+
@Override
public boolean init() {
if (super.init()) {
diff --git
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
index dee9a669fa..17d6a10f89 100644
---
a/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
+++
b/plugins/transforms/getsubfolders/src/main/java/org/apache/hop/pipeline/transforms/getsubfolders/GetSubFoldersMeta.java
@@ -79,6 +79,10 @@ public class GetSubFoldersMeta extends
BaseTransformMeta<GetSubFolders, GetSubFo
@HopMetadataProperty(key = "limit")
private long rowLimit;
+ /** The name of the children count output field. Null means legacy default
("childrens"). */
+ @HopMetadataProperty(key = "children_field")
+ private String childrenFieldName;
+
public GetSubFoldersMeta() {
super();
files = new ArrayList<>();
@@ -91,6 +95,7 @@ public class GetSubFoldersMeta extends
BaseTransformMeta<GetSubFolders, GetSubFo
this.dynamicFolderNameField = m.dynamicFolderNameField;
this.folderNameDynamic = m.folderNameDynamic;
this.rowLimit = m.rowLimit;
+ this.childrenFieldName = m.childrenFieldName;
m.files.forEach(f -> this.files.add(new GSFile(f)));
}
@@ -105,6 +110,7 @@ public class GetSubFoldersMeta extends
BaseTransformMeta<GetSubFolders, GetSubFo
includeRowNumber = false;
rowNumberField = "";
dynamicFolderNameField = "";
+ childrenFieldName = "children";
}
@Override
@@ -168,7 +174,8 @@ public class GetSubFoldersMeta extends
BaseTransformMeta<GetSubFolders, GetSubFo
row.addValueMeta(rootUriValueMeta);
// children
- IValueMeta childrenValueMeta = new
ValueMetaInteger(variables.resolve("childrens"));
+ String resolvedChildrenField = childrenFieldName != null ?
childrenFieldName : "childrens";
+ IValueMeta childrenValueMeta = new
ValueMetaInteger(variables.resolve(resolvedChildrenField));
childrenValueMeta.setLength(IValueMeta.DEFAULT_INTEGER_LENGTH, 0);
childrenValueMeta.setOrigin(name);
row.addValueMeta(childrenValueMeta);
@@ -483,4 +490,22 @@ public class GetSubFoldersMeta extends
BaseTransformMeta<GetSubFolders, GetSubFo
public void setRowLimit(long rowLimit) {
this.rowLimit = rowLimit;
}
+
+ /**
+ * Gets childrenFieldName
+ *
+ * @return value of childrenFieldName
+ */
+ public String getChildrenFieldName() {
+ return childrenFieldName;
+ }
+
+ /**
+ * Sets childrenFieldName
+ *
+ * @param childrenFieldName value of childrenFieldName
+ */
+ public void setChildrenFieldName(String childrenFieldName) {
+ this.childrenFieldName = childrenFieldName;
+ }
}