This is an automated email from the ASF dual-hosted git repository. turcsanyi pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push: new 7340bb8153 NIFI-11924 Closing FileSystem after using in HDFSExternalResourceProvider 7340bb8153 is described below commit 7340bb81535d8095205f198f8363c7c70cc5d356 Author: Bence Simon <bsi...@apache.org> AuthorDate: Wed Aug 9 13:36:26 2023 +0200 NIFI-11924 Closing FileSystem after using in HDFSExternalResourceProvider This closes #7588. Signed-off-by: Peter Turcsanyi <turcsa...@apache.org> --- .../processors/hadoop/AbstractHadoopProcessor.java | 15 +-- .../nifi/processors/hadoop/HDFSResourceHelper.java | 72 ++++++++++++++ .../hadoop/HDFSExternalResourceProvider.java | 28 ++++-- .../resource/hadoop/HDFSResourceInputStream.java | 106 +++++++++++++++++++++ 4 files changed, 199 insertions(+), 22 deletions(-) diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java index f594ed2549..960e7a5b7c 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractHadoopProcessor.java @@ -376,20 +376,7 @@ public abstract class AbstractHadoopProcessor extends AbstractProcessor implemen if (resources != null) { // Attempt to close the FileSystem final FileSystem fileSystem = resources.getFileSystem(); - try { - interruptStatisticsThread(fileSystem); - } catch (Exception e) { - getLogger().warn("Error stopping FileSystem statistics thread: " + e.getMessage()); - getLogger().debug("", e); - } finally { - if (fileSystem != null) { - try { - fileSystem.close(); - } catch (IOException e) { - getLogger().warn("Error close FileSystem: " + e.getMessage(), e); - } - } - } + HDFSResourceHelper.closeFileSystem(fileSystem); } // Clear out the reference to the resources diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java new file mode 100644 index 0000000000..18650ac33e --- /dev/null +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-hadoop-utils/src/main/java/org/apache/nifi/processors/hadoop/HDFSResourceHelper.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.hadoop; + +import org.apache.hadoop.fs.FileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; + +public final class HDFSResourceHelper { + private static final Logger LOGGER = LoggerFactory.getLogger(HDFSResourceHelper.class); + + private HDFSResourceHelper() { + // Not to be instantiated + } + + public static void closeFileSystem(final FileSystem fileSystem) { + try { + interruptStatisticsThread(fileSystem); + } catch (Exception e) { + LOGGER.warn("Error stopping FileSystem statistics thread: " + e.getMessage()); + LOGGER.debug("", e); + } finally { + if (fileSystem != null) { + try { + fileSystem.close(); + } catch (IOException e) { + LOGGER.warn("Error close FileSystem: " + e.getMessage(), e); + } + } + } + } + + private static void interruptStatisticsThread(final FileSystem fileSystem) throws NoSuchFieldException, IllegalAccessException { + final Field statsField = FileSystem.class.getDeclaredField("statistics"); + statsField.setAccessible(true); + + final Object statsObj = statsField.get(fileSystem); + if (statsObj instanceof FileSystem.Statistics) { + final FileSystem.Statistics statistics = (FileSystem.Statistics) statsObj; + + final Field statsThreadField = statistics.getClass().getDeclaredField("STATS_DATA_CLEANER"); + statsThreadField.setAccessible(true); + + final Object statsThreadObj = statsThreadField.get(statistics); + if (statsThreadObj instanceof Thread) { + final Thread statsThread = (Thread) statsThreadObj; + try { + statsThread.interrupt(); + } catch (Exception e) { + LOGGER.warn("Error interrupting thread: " + e.getMessage(), e); + } + } + } + } +} diff --git a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java index 901e2cdaca..38d659d792 100644 --- a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java +++ b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSExternalResourceProvider.java @@ -31,6 +31,7 @@ import org.apache.nifi.flow.resource.ExternalResourceProviderInitializationConte import org.apache.nifi.flow.resource.ImmutableExternalResourceDescriptor; import org.apache.nifi.hadoop.SecurityUtil; import org.apache.nifi.processors.hadoop.ExtendedConfiguration; +import org.apache.nifi.processors.hadoop.HDFSResourceHelper; import org.apache.nifi.processors.hadoop.HdfsResources; import org.apache.nifi.security.krb.KerberosKeytabUser; import org.apache.nifi.security.krb.KerberosPasswordUser; @@ -51,6 +52,11 @@ import java.util.List; import java.util.Objects; import java.util.stream.Collectors; +// Implementation considerations: The public methods are considered as steps orchestrated by clients. As of this, there is no direct dependency +// or connection between {@code listResources} and {@code fetchExternalResource}: both are self-sufficing actions. As of this they do not share +// a {@code FileSystem} instance but every method is responsible for collecting and maintaining one. This comes with a minimal overhead but due to +// the nature of the service the method calls are relatively rare. Alternatively a provider could have a FileService instance maintained during its +// lifecycle but that is considered a more error-prone approach as it comes with logic regularly checking for the state of the maintained instance. @RequiresInstanceClassLoading(cloneAncestorResources = true) public class HDFSExternalResourceProvider implements ExternalResourceProvider { private static final Logger LOGGER = LoggerFactory.getLogger(HDFSExternalResourceProvider.class); @@ -104,7 +110,6 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider { final HdfsResources hdfsResources = getHdfsResources(); try { - final FileStatus[] fileStatuses = hdfsResources.getUserGroupInformation() .doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfsResources.getFileSystem().listStatus(sourceDirectory)); @@ -122,6 +127,8 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider { } catch (final InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException("Provider cannot list resources", e); + } finally { + HDFSResourceHelper.closeFileSystem(hdfsResources.getFileSystem()); } } @@ -140,13 +147,18 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider { final HdfsResources hdfsResources = getHdfsResources(); try { - return hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> { - if (!hdfsResources.getFileSystem().exists(path)) { - throw new IOException("Cannot find file in HDFS at location " + location); - } - - return hdfsResources.getFileSystem().open(path, BUFFER_SIZE_DEFAULT); - }); + final FSDataInputStream fsDataInputStream = + hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> { + if (!hdfsResources.getFileSystem().exists(path)) { + throw new IOException("Cannot find file in HDFS at location " + location); + } + + return hdfsResources.getFileSystem().open(path, BUFFER_SIZE_DEFAULT); + }); + // The acquired InputStream is used by the client and for this reason the FileSystem cannot be closed here. + // The closing of the file system is delegated to the decorator (HDFSResourceInputStream) which will close + // it when the decorated input stream is closed. + return new HDFSResourceInputStream(hdfsResources.getFileSystem(), fsDataInputStream); } catch (final InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException("Error during acquiring file", e); diff --git a/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java new file mode 100644 index 0000000000..c8108b660b --- /dev/null +++ b/nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/main/java/org/apache/nifi/flow/resource/hadoop/HDFSResourceInputStream.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.flow.resource.hadoop; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.nifi.processors.hadoop.HDFSResourceHelper; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +final class HDFSResourceInputStream extends InputStream { + private final FileSystem fileSystem; + private final FSDataInputStream inputStream; + + HDFSResourceInputStream(final FileSystem fileSystem, final FSDataInputStream inputStream) { + this.fileSystem = fileSystem; + this.inputStream = inputStream; + } + + @Override + public int read() throws IOException { + return inputStream.read(); + } + + @Override + public int read(final byte[] b) throws IOException { + return inputStream.read(b); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return inputStream.read(b, off, len); + } + + @Override + public byte[] readAllBytes() throws IOException { + return inputStream.readAllBytes(); + } + + @Override + public byte[] readNBytes(final int len) throws IOException { + return inputStream.readNBytes(len); + } + + @Override + public int readNBytes(final byte[] b, final int off, final int len) throws IOException { + return inputStream.readNBytes(b, off, len); + } + + @Override + public long skip(final long n) throws IOException { + return inputStream.skip(n); + } + + @Override + public void skipNBytes(final long n) throws IOException { + inputStream.skipNBytes(n); + } + + @Override + public int available() throws IOException { + return inputStream.available(); + } + + @Override + public void close() throws IOException { + inputStream.close(); + HDFSResourceHelper.closeFileSystem(fileSystem); + } + + @Override + public synchronized void mark(final int readlimit) { + inputStream.mark(readlimit); + } + + @Override + public synchronized void reset() throws IOException { + inputStream.reset(); + } + + @Override + public boolean markSupported() { + return inputStream.markSupported(); + } + + @Override + public long transferTo(final OutputStream out) throws IOException { + return inputStream.transferTo(out); + } +}