Hello List,

It looks like after crond call start_jobs() -> vfork() children,
there might be an incosistent state between these two objects:
1. crond's file descriptors table.
2. glibc syslog()'s internal static variable 'LogFile'.

For example, crond opens a config file and got fd=4 for it,
which is a regular file for parsing jobs.
When load_crontab() is parsing an invalid token (a line with a prefix '<'),
ParseField() -> bb_error_msg() would call syslog() for logging but failed,
because syslog() call sendto() with wrong fd=4, which is indicated by 
the glibc syslog.c static varlable 'LogFile=4', and returned ENOTSOCK;
then fd=4 is closed, a DGRAM socket is created with the same fd=4,
this time syslog() is able to connect it and logging successfully.
The 'LogFile' in glibc syslog.c:
https://elixir.bootlin.com/glibc/glibc-2.21/source/misc/syslog.c#L64

And here is the problem: while crond goes back to load_crontab()'s loop, 
config_read() -> ... -> read() would be blocked on fd=4; crond is blocked.
Now fd=4 is a DGRAM socket connected to /dev/log for daemon logging, 
not a regular file for parsing jobs any more as earlier mentioned.

This patch adds 'LOG_NDELAY' into crond's openlog(), so the socket fd to
/dev/log is ready in file descriptors table in the first place.
Without 'LOG_NDELAY' the socket might be delay opened until the first 
syslog() call, and this might a bit hard for troubleshooting with the 
upcoming and interleaving vfork() matrix.

TL;DR

Let me explain a bit more details what it looks like with an example :)

An invalid and mis-configured line with a prefix '<' is added to 
config file, in order to reproduce this issue, like this:
< 1 * * * /etc/init.d/flush_memory.sh >/dev/null 2>&1

The crond is launched as daemon when system bootup like this:
/usr/sbin/crond -l 9 -c /tmp/cron/crontabs

Before applied this patch, strace show crond is blocked while 'read(4,'
because syslog() close fd=4 and re-create a socket fd for it:

read(4, "# m h dom m dow cmd\n0-59/20 3 * * * /sbin/adjust_time\n0 1 * * * 
/etc/i"..., 4096) = 2293
 > /lib/libc-2.21.so(__read_nocancel+0x7) [0xdb570]
 > /lib/libc-2.21.so(_IO_file_underflow+0x150) [0x73c90]
 > /lib/libc-2.21.so(getdelim+0x2a0) [0x69740]
 > /bin/busybox(get_line_with_continuation+0x2e) [0xa55f9]
sendto(4, "<75>Dec  8 16:45:00 crond[20557]: user admin: parse error at <\n", 
63, MSG_NOSIGNAL, NULL, 0) = -1 ENOTSOCK (Socket operation on non-socket)
 > /lib/libc-2.21.so(__send+0x1d) [0xe92fd]
 > /lib/libc-2.21.so(__vsyslog_chk+0x26b) [0xe3abb]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
close(4)                                = 0
 > /lib/libc-2.21.so(__close_nocancel+0x7) [0xdbb90]
 > /lib/libc-2.21.so(closelog_internal.part.0+0xf) [0xe382f]
 > /lib/libc-2.21.so(__vsyslog_chk+0x493) [0xe3ce3]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
 > /lib/libc-2.21.so(socket+0x7) [0xe94c7]
 > /lib/libc-2.21.so(openlog_internal+0x1a0) [0xe37f0]
 > /lib/libc-2.21.so(__vsyslog_chk+0x4aa) [0xe3cfa]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
connect(4, {sa_family=AF_UNIX, sun_path="/dev/log"}, 110) = 0
 > /lib/libc-2.21.so(__connect_nocancel+0x7) [0xe9050]
 > /lib/libc-2.21.so(openlog_internal+0x62) [0xe36b2]
 > /lib/libc-2.21.so(__vsyslog_chk+0x4aa) [0xe3cfa]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
sendto(4, "<75>Dec  8 16:45:00 crond[20557]: user admin: parse error at <\n", 
63, MSG_NOSIGNAL, NULL, 0) = 63
 > /lib/libc-2.21.so(__send+0x1d) [0xe92fd]
 > /lib/libc-2.21.so(__vsyslog_chk+0x4d2) [0xe3d22]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
read(4, 


And /proc/${crond}/stack show that read() is blocked by a socket:
# cat /proc/19693/stack
[<0>] __skb_wait_for_more_packets+0x105/0x160
[<0>] unix_dgram_recvmsg+0xd2/0x540
[<0>] sock_read_iter+0x8d/0xf0
[<0>] new_sync_read+0x169/0x180
[<0>] vfs_read+0x16f/0x210
[<0>] ksys_read+0xaa/0xc0
[<0>] do_syscall_64+0x2d/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

After applied this patch, crond would get the fd=3 for '/dev/log' in the 
first place, while the invalid line is parsing, syslog() would log message 
with correct fd=3, not overwrite the regular file fd=5, strace show that:

socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 3
 > /lib/libc-2.21.so(socket+0x7) [0xe94c7]
 > /lib/libc-2.21.so(openlog_internal+0x1a0) [0xe37f0]
 > /lib/libc-2.21.so(openlog+0x51) [0xe3f31]
 > /bin/busybox(crond_main+0xb2) [0xf879]
connect(3, {sa_family=AF_UNIX, sun_path="/dev/log"}, 110) = 0
 > /lib/libc-2.21.so(__connect_nocancel+0x7) [0xe9050]
 > /lib/libc-2.21.so(openlog_internal+0x62) [0xe36b2]
 > /lib/libc-2.21.so(openlog+0x51) [0xe3f31]
 > /bin/busybox(crond_main+0xb2) [0xf879]
chdir("/tmp/cron/crontabs")             = 0
 > /lib/libc-2.21.so(chdir+0x7) [0xdbd37]
 > /bin/busybox(xchdir+0x16) [0x99f4]
<... cut dummy ...>
read(5, "# m h dom m dow cmd\n0-59/20 3 * * * /sbin/adjust_time\n0 1 * * * 
/etc/i"..., 4096) = 1967
... <cut localtime loading dummy> ...
sendto(3, "<75>Dec  8 15:37:18 crond[23384]: user admin: parse error at <\n", 
63, MSG_NOSIGNAL, NULL, 0) = 63
 > /lib/libc-2.21.so(__send+0x1d) [0xe92fd]
 > /lib/libc-2.21.so(__vsyslog_chk+0x26b) [0xe3abb]
 > /lib/libc-2.21.so(syslog+0x8f) [0xe3e2f]
 > /bin/busybox(bb_verror_msg+0x27c) [0x8711]
read(5, "", 4096)                       = 0
 > /lib/libc-2.21.so(__read_nocancel+0x7) [0xdb570]
 > /lib/libc-2.21.so(_IO_file_underflow+0x150) [0x73c90]
 > /lib/libc-2.21.so(getdelim+0x2a0) [0x69740]
 > /bin/busybox(get_line_with_continuation+0x2e) [0xa55f9]
close(5)                                = 0
 > /lib/libc-2.21.so(__close_nocancel+0x7) [0xdbb90]
 > /lib/libc-2.21.so(_IO_file_close_it+0x120) [0x73440]
 > /lib/libc-2.21.so(fclose+0x1a2) [0x680f2]
 > /bin/busybox(config_close+0x32) [0xa5597]


--
Regards,
Jones Syue | 薛懷宗
QNAP Systems, Inc.
From 4d255e93caf814efddad30e15ccf4e81a17c5e52 Mon Sep 17 00:00:00 2001
From: Jones Syue <joness...@qnap.com>
Date: Fri, 8 Dec 2023 17:44:00 +0800
Subject: [PATCH] crond: add LOG_NDELAY to openlog

It looks like after crond call start_jobs() -> vfork() children,
there might be an incosistent state between these two objects:
1. crond's file descriptors table.
2. glibc syslog()'s internal static variable 'LogFile'.

For example, crond opens a config file and got fd=4 for it,
which is a regular file for parsing jobs.
When load_crontab() is parsing an invalid token (a line with a prefix '<'),
ParseField() -> bb_error_msg() would call syslog() for logging but failed,
because syslog() call sendto() with wrong fd=4, which is indicated by
the glibc syslog.c static varlable 'LogFile=4', and returned ENOTSOCK;
then fd=4 is closed, a DGRAM socket is created with the same fd=4,
this time syslog() is able to connect it and logging successfully.
The 'LogFile' in glibc syslog.c:
https://elixir.bootlin.com/glibc/glibc-2.21/source/misc/syslog.c#L64

And here is the problem: while crond goes back to load_crontab()'s loop,
config_read() -> ... -> read() would be blocked on fd=4; crond is blocked.
Now fd=4 is a DGRAM socket connected to /dev/log for daemon logging,
not a regular file for parsing jobs any more as earlier mentioned.

This patch adds 'LOG_NDELAY' into crond's openlog(), so the socket fd to
/dev/log is ready in file descriptors table in the first place.
Without 'LOG_NDELAY' the socket might be delay opened until the first
syslog() call, and this might a bit hard for troubleshooting with the
upcoming and interleaving vfork() matrix.

Signed-off-by: Jones Syue <joness...@qnap.com>
---
 miscutils/crond.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/miscutils/crond.c b/miscutils/crond.c
index 3bbb5ac..2b7ac54 100644
--- a/miscutils/crond.c
+++ b/miscutils/crond.c
@@ -1048,7 +1048,7 @@ int crond_main(int argc UNUSED_PARAM, char **argv)
 
        if (!(opts & OPT_d) && G.log_filename == NULL) {
                /* logging to syslog */
-               openlog(applet_name, LOG_CONS | LOG_PID, LOG_CRON);
+               openlog(applet_name, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_CRON);
                logmode = LOGMODE_SYSLOG;
        }
 
-- 
2.1.4

_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to