From 6f937ccef54afdcebaa52a036591523c67ac9d41 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 3 Sep 2020 13:58:17 +1200
Subject: [PATCH v3 1/3] Skip unnecessary stat() calls in walkdir().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some kernels can tell us the type of a "dirent", so we can avoid a call
to stat() or lstat() in many cases.  In order to be able to apply this
change to both frontend and backend versions of walkdir(), define a new
function get_dirent_type() in a new translation unit file_utils_febe.c.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Juan José Santamaría Flecha <juanjo.santamaria@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BFzxupGGN4GpUdbzZN%2Btn6FQPHo8w0Q%2BAPH5Wz8RG%2Bww%40mail.gmail.com
---
 src/backend/storage/file/fd.c    | 33 ++++++-----
 src/common/Makefile              |  1 +
 src/common/file_utils.c          | 32 +++++------
 src/common/file_utils_febe.c     | 99 ++++++++++++++++++++++++++++++++
 src/include/common/file_utils.h  | 22 ++++++-
 src/tools/msvc/Mkvcbuild.pm      |  2 +-
 src/tools/pgindent/typedefs.list |  1 +
 7 files changed, 154 insertions(+), 36 deletions(-)
 create mode 100644 src/common/file_utils_febe.c

diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f376a97ed6..bd72a87ee3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -89,6 +89,7 @@
 #include "access/xlog.h"
 #include "catalog/pg_tablespace.h"
 #include "common/file_perm.h"
+#include "common/file_utils.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "portability/mem.h"
@@ -3340,8 +3341,6 @@ walkdir(const char *path,
 	while ((de = ReadDirExtended(dir, path, elevel)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -3351,23 +3350,23 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks, elevel))
 		{
-			ereport(elevel,
-					(errcode_for_file_access(),
-					 errmsg("could not stat file \"%s\": %m", subpath)));
-			continue;
-		}
+			case PGFILETYPE_REG:
+				(*action) (subpath, false, elevel);
+				break;
+			case PGFILETYPE_DIR:
+				walkdir(subpath, action, false, elevel);
+				break;
+			default:
 
-		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false, elevel);
-		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action, false, elevel);
+				/*
+				 * Errors are already reported directly by get_dirent_type(),
+				 * and any remaining symlinks and unknown file types are
+				 * ignored.
+				 */
+				break;
+		}
 	}
 
 	FreeDir(dir);				/* we ignore any error here */
diff --git a/src/common/Makefile b/src/common/Makefile
index 16619e4ba8..aac92aabe1 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -56,6 +56,7 @@ OBJS_COMMON = \
 	exec.o \
 	f2s.o \
 	file_perm.o \
+	file_utils_febe.o \
 	hashfn.o \
 	ip.o \
 	jsonapi.o \
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index a2faafdf13..e24f31dd3b 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -2,7 +2,7 @@
  *
  * File-processing utility routines.
  *
- * Assorted utility functions to work on files.
+ * Assorted utility functions to work on files, frontend only.
  *
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
@@ -167,8 +167,6 @@ walkdir(const char *path,
 	while (errno = 0, (de = readdir(dir)) != NULL)
 	{
 		char		subpath[MAXPGPATH * 2];
-		struct stat fst;
-		int			sret;
 
 		if (strcmp(de->d_name, ".") == 0 ||
 			strcmp(de->d_name, "..") == 0)
@@ -176,21 +174,23 @@ walkdir(const char *path,
 
 		snprintf(subpath, sizeof(subpath), "%s/%s", path, de->d_name);
 
-		if (process_symlinks)
-			sret = stat(subpath, &fst);
-		else
-			sret = lstat(subpath, &fst);
-
-		if (sret < 0)
+		switch (get_dirent_type(subpath, de, process_symlinks, PG_LOG_ERROR))
 		{
-			pg_log_error("could not stat file \"%s\": %m", subpath);
-			continue;
+			case PGFILETYPE_REG:
+				(*action) (subpath, false);
+				break;
+			case PGFILETYPE_DIR:
+				walkdir(subpath, action, false);
+				break;
+			default:
+
+				/*
+				 * Errors are already reported directly by get_dirent_type(),
+				 * and any remaining symlinks and unknown file types are
+				 * ignored.
+				 */
+				break;
 		}
-
-		if (S_ISREG(fst.st_mode))
-			(*action) (subpath, false);
-		else if (S_ISDIR(fst.st_mode))
-			walkdir(subpath, action, false);
 	}
 
 	if (errno)
diff --git a/src/common/file_utils_febe.c b/src/common/file_utils_febe.c
new file mode 100644
index 0000000000..3c00ab7c4d
--- /dev/null
+++ b/src/common/file_utils_febe.c
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * File-processing utility routines.
+ *
+ * Assorted utility functions to work on files, frontend and backend.
+ *
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/common/file_utils_febe.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef FRONTEND
+#include "postgres_fe.h"
+#else
+#include "postgres.h"
+#endif
+
+#include <dirent.h>
+#include <sys/stat.h>
+
+#include "common/file_utils.h"
+#ifdef FRONTEND
+#include "common/logging.h"
+#else
+#include "utils/elog.h"
+#endif
+
+/*
+ * Return the type of a directory entry.
+ *
+ * In frontend code, elevel should be a level from logging.h; in backend code
+ * it should be an error level from elog.h.
+ */
+PGFileType
+get_dirent_type(const char *path,
+				const struct dirent *de,
+				bool look_through_symlinks,
+				int elevel)
+{
+	struct stat fst;
+	int	sret;
+
+	/*
+	 * We want to know the type of a directory entry.  Some systems tell us
+	 * that directly in the dirent struct, but that's a BSD/GNU extension.
+	 * Even when the interface is present, sometimes the type is unknown,
+	 * depending on the filesystem in use or in some cases options used at
+	 * filesystem creation time.
+	 */
+#if defined(DT_UNKNOWN) && defined(DT_REG) && defined(DT_DIR) && defined(DT_LNK)
+	if (de->d_type == DT_REG)
+		return PGFILETYPE_REG;
+	else if (de->d_type == DT_DIR)
+		return PGFILETYPE_DIR;
+	else if (de->d_type == DT_LNK && !look_through_symlinks)
+		return PGFILETYPE_LNK;
+#endif
+
+	if (look_through_symlinks)
+		sret = stat(path, &fst);
+	else
+		sret = lstat(path, &fst);
+
+	if (sret < 0)
+	{
+#ifdef FRONTEND
+		pg_log_generic(elevel, "could not stat file \"%s\": %m", path);
+#else
+		ereport(elevel,
+				(errcode_for_file_access(),
+				 errmsg("could not stat file \"%s\": %m", path)));
+#endif
+		return PGFILETYPE_ERROR;
+	}
+	else if (S_ISREG(fst.st_mode))
+		return PGFILETYPE_REG;
+	else if (S_ISDIR(fst.st_mode))
+		return PGFILETYPE_DIR;
+#ifdef S_ISLNK
+	else if (S_ISLNK(fst.st_mode))
+		return PGFILETYPE_LNK;
+#endif
+	}
+
+	return PGFILETYPE_UNKNOWN;
+}
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index a7add75efa..16c7e7e249 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -1,6 +1,4 @@
 /*-------------------------------------------------------------------------
- *
- * File-processing utility routines for frontend code
  *
  * Assorted utility functions to work on files.
  *
@@ -15,10 +13,30 @@
 #ifndef FILE_UTILS_H
 #define FILE_UTILS_H
 
+#include <dirent.h>
+
+typedef enum PGFileType
+{
+	PGFILETYPE_ERROR,
+	PGFILETYPE_UNKNOWN,
+	PGFILETYPE_REG,
+	PGFILETYPE_DIR,
+	PGFILETYPE_LNK
+} PGFileType;
+
+/* Functions defined in file_utils_febe.c for both FE and BE code. */
+extern PGFileType get_dirent_type(const char *path,
+								  const struct dirent *de,
+								  bool look_through_symlinks,
+								  int elevel);
+
+/* Functions defined in file_utils.c only for FE code. */
+#ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
 extern void fsync_pgdata(const char *pg_data, int serverVersion);
 extern void fsync_dir_recurse(const char *dir);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
+#endif
 
 #endif							/* FILE_UTILS_H */
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 20da7985c1..a64127a196 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -121,7 +121,7 @@ sub mkvcbuild
 	our @pgcommonallfiles = qw(
 	  archive.c base64.c checksum_helper.c
 	  config_info.c controldata_utils.c d2s.c encnames.c exec.c
-	  f2s.c file_perm.c hashfn.c ip.c jsonapi.c
+	  f2s.c file_perm.c file_utils_febe.c hashfn.c ip.c jsonapi.c
 	  keywords.c kwlookup.c link-canary.c md5.c
 	  pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c
 	  saslprep.c scram-common.c string.c stringinfo.c unicode_norm.c username.c
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3d990463ce..b4d40dda16 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1514,6 +1514,7 @@ PGEventResultCopy
 PGEventResultCreate
 PGEventResultDestroy
 PGFInfoFunction
+PGFileType
 PGFunction
 PGLZ_HistEntry
 PGLZ_Strategy
-- 
2.20.1

