This is an automated email from the ASF dual-hosted git repository.

turcsanyi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 7340bb8153 NIFI-11924 Closing FileSystem after using in 
HDFSExternalResourceProvider
7340bb8153 is described below

commit 7340bb81535d8095205f198f8363c7c70cc5d356
Author: Bence Simon <bsi...@apache.org>
AuthorDate: Wed Aug 9 13:36:26 2023 +0200

    NIFI-11924 Closing FileSystem after using in HDFSExternalResourceProvider
    
    This closes #7588.
    
    Signed-off-by: Peter Turcsanyi <turcsa...@apache.org>
---
 .../processors/hadoop/AbstractHadoopProcessor.java |  15 +--
 .../nifi/processors/hadoop/HDFSResourceHelper.java |  72 ++++++++++++++
 .../hadoop/HDFSExternalResourceProvider.java       |  28 ++++--
 .../resource/hadoop/HDFSResourceInputStream.java   | 106 +++++++++++++++++++++
 4 files changed, 199 insertions(+), 22 deletions(-)

diff --git 
a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java
 
b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java
index f594ed2549..960e7a5b7c 100644
--- 
a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java
+++ 
b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java
@@ -376,20 +376,7 @@ public abstract class AbstractHadoopProcessor extends 
AbstractProcessor implemen
         if (resources != null) {
             // Attempt to close the FileSystem
             final FileSystem fileSystem = resources.getFileSystem();
-            try {
-                interruptStatisticsThread(fileSystem);
-            } catch (Exception e) {
-                getLogger().warn("Error stopping FileSystem statistics thread: 
" + e.getMessage());
-                getLogger().debug("", e);
-            } finally {
-                if (fileSystem != null) {
-                    try {
-                        fileSystem.close();
-                    } catch (IOException e) {
-                        getLogger().warn("Error close FileSystem: " + 
e.getMessage(), e);
-                    }
-                }
-            }
+            HDFSResourceHelper.closeFileSystem(fileSystem);
         }
 
         // Clear out the reference to the resources
diff --git 
a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java
 
b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java
new file mode 100644
index 0000000000..18650ac33e
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.hadoop;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+
+public final class HDFSResourceHelper {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(HDFSResourceHelper.class);
+
+    private HDFSResourceHelper() {
+        // Not to be instantiated
+    }
+
+    public static void closeFileSystem(final FileSystem fileSystem) {
+        try {
+            interruptStatisticsThread(fileSystem);
+        } catch (Exception e) {
+            LOGGER.warn("Error stopping FileSystem statistics thread: " + 
e.getMessage());
+            LOGGER.debug("", e);
+        } finally {
+            if (fileSystem != null) {
+                try {
+                    fileSystem.close();
+                } catch (IOException e) {
+                    LOGGER.warn("Error close FileSystem: " + e.getMessage(), 
e);
+                }
+            }
+        }
+    }
+
+    private static void interruptStatisticsThread(final FileSystem fileSystem) 
throws NoSuchFieldException, IllegalAccessException {
+        final Field statsField = 
FileSystem.class.getDeclaredField("statistics");
+        statsField.setAccessible(true);
+
+        final Object statsObj = statsField.get(fileSystem);
+        if (statsObj instanceof FileSystem.Statistics) {
+            final FileSystem.Statistics statistics = (FileSystem.Statistics) 
statsObj;
+
+            final Field statsThreadField = 
statistics.getClass().getDeclaredField("STATS_DATA_CLEANER");
+            statsThreadField.setAccessible(true);
+
+            final Object statsThreadObj = statsThreadField.get(statistics);
+            if (statsThreadObj instanceof Thread) {
+                final Thread statsThread = (Thread) statsThreadObj;
+                try {
+                    statsThread.interrupt();
+                } catch (Exception e) {
+                    LOGGER.warn("Error interrupting thread: " + 
e.getMessage(), e);
+                }
+            }
+        }
+    }
+}
diff --git 
a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java
 
b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java
index 901e2cdaca..38d659d792 100644
--- 
a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java
+++ 
b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java
@@ -31,6 +31,7 @@ import 
org.apache.nifi.flow.resource.ExternalResourceProviderInitializationConte
 import org.apache.nifi.flow.resource.ImmutableExternalResourceDescriptor;
 import org.apache.nifi.hadoop.SecurityUtil;
 import org.apache.nifi.processors.hadoop.ExtendedConfiguration;
+import org.apache.nifi.processors.hadoop.HDFSResourceHelper;
 import org.apache.nifi.processors.hadoop.HdfsResources;
 import org.apache.nifi.security.krb.KerberosKeytabUser;
 import org.apache.nifi.security.krb.KerberosPasswordUser;
@@ -51,6 +52,11 @@ import java.util.List;
 import java.util.Objects;
 import java.util.stream.Collectors;
 
+// Implementation considerations: The public methods are considered as steps 
orchestrated by clients. As of this, there is no direct dependency
+// or connection between {@code listResources} and {@code 
fetchExternalResource}: both are self-sufficing actions. As of this they do not 
share
+// a {@code FileSystem} instance but every method is responsible for 
collecting and maintaining one. This comes with a minimal overhead but due to
+// the nature of the service the method calls are relatively rare. 
Alternatively a provider could have a FileService instance maintained during its
+// lifecycle but that is considered a more error-prone approach as it comes 
with logic regularly checking for the state of the maintained instance.
 @RequiresInstanceClassLoading(cloneAncestorResources = true)
 public class HDFSExternalResourceProvider implements ExternalResourceProvider {
     private static final Logger LOGGER = 
LoggerFactory.getLogger(HDFSExternalResourceProvider.class);
@@ -104,7 +110,6 @@ public class HDFSExternalResourceProvider implements 
ExternalResourceProvider {
         final HdfsResources hdfsResources = getHdfsResources();
 
         try {
-
             final FileStatus[] fileStatuses = 
hdfsResources.getUserGroupInformation()
                     .doAs((PrivilegedExceptionAction<FileStatus[]>) () -> 
hdfsResources.getFileSystem().listStatus(sourceDirectory));
 
@@ -122,6 +127,8 @@ public class HDFSExternalResourceProvider implements 
ExternalResourceProvider {
         } catch (final InterruptedException e) {
             Thread.currentThread().interrupt();
             throw new IOException("Provider cannot list resources", e);
+        } finally {
+            HDFSResourceHelper.closeFileSystem(hdfsResources.getFileSystem());
         }
     }
 
@@ -140,13 +147,18 @@ public class HDFSExternalResourceProvider implements 
ExternalResourceProvider {
         final HdfsResources hdfsResources = getHdfsResources();
 
         try {
-            return 
hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>)
 () -> {
-                if (!hdfsResources.getFileSystem().exists(path)) {
-                    throw new IOException("Cannot find file in HDFS at 
location " + location);
-                }
-
-                return hdfsResources.getFileSystem().open(path, 
BUFFER_SIZE_DEFAULT);
-            });
+            final FSDataInputStream fsDataInputStream =
+                    
hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>)
 () -> {
+                        if (!hdfsResources.getFileSystem().exists(path)) {
+                            throw new IOException("Cannot find file in HDFS at 
location " + location);
+                        }
+
+                        return hdfsResources.getFileSystem().open(path, 
BUFFER_SIZE_DEFAULT);
+                    });
+            // The acquired InputStream is used by the client and for this 
reason the FileSystem cannot be closed here.
+            // The closing of the file system is delegated to the decorator 
(HDFSResourceInputStream) which will close
+            // it when the decorated input stream is closed.
+            return new HDFSResourceInputStream(hdfsResources.getFileSystem(), 
fsDataInputStream);
         } catch (final InterruptedException e) {
             Thread.currentThread().interrupt();
             throw new IOException("Error during acquiring file", e);
diff --git 
a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java
 
b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java
new file mode 100644
index 0000000000..c8108b660b
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.flow.resource.hadoop;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.nifi.processors.hadoop.HDFSResourceHelper;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+final class HDFSResourceInputStream extends InputStream {
+    private final FileSystem fileSystem;
+    private final FSDataInputStream inputStream;
+
+    HDFSResourceInputStream(final FileSystem fileSystem, final 
FSDataInputStream inputStream) {
+        this.fileSystem = fileSystem;
+        this.inputStream = inputStream;
+    }
+
+    @Override
+    public int read() throws IOException {
+        return inputStream.read();
+    }
+
+    @Override
+    public int read(final byte[] b) throws IOException {
+        return inputStream.read(b);
+    }
+
+    @Override
+    public int read(final byte[] b, final int off, final int len) throws 
IOException {
+        return inputStream.read(b, off, len);
+    }
+
+    @Override
+    public byte[] readAllBytes() throws IOException {
+        return inputStream.readAllBytes();
+    }
+
+    @Override
+    public byte[] readNBytes(final int len) throws IOException {
+        return inputStream.readNBytes(len);
+    }
+
+    @Override
+    public int readNBytes(final byte[] b, final int off, final int len) throws 
IOException {
+        return inputStream.readNBytes(b, off, len);
+    }
+
+    @Override
+    public long skip(final long n) throws IOException {
+        return inputStream.skip(n);
+    }
+
+    @Override
+    public void skipNBytes(final long n) throws IOException {
+        inputStream.skipNBytes(n);
+    }
+
+    @Override
+    public int available() throws IOException {
+        return inputStream.available();
+    }
+
+    @Override
+    public void close() throws IOException {
+        inputStream.close();
+        HDFSResourceHelper.closeFileSystem(fileSystem);
+    }
+
+    @Override
+    public synchronized void mark(final int readlimit) {
+        inputStream.mark(readlimit);
+    }
+
+    @Override
+    public synchronized void reset() throws IOException {
+        inputStream.reset();
+    }
+
+    @Override
+    public boolean markSupported() {
+        return inputStream.markSupported();
+    }
+
+    @Override
+    public long transferTo(final OutputStream out) throws IOException {
+        return inputStream.transferTo(out);
+    }
+}

Reply via email to