On Sat, Jul 4, 2020 at 11:15 AM Max Englander <max.englan...@gmail.com> wrote: > > In environments where the preservation of audit events and predictable > usage of system memory are prioritized, admins may use a combination of > --backlog_wait_time and -b options at the risk of degraded performance > resulting from backlog waiting. In some cases, this risk may be > preferred to lost events or unbounded memory usage. Ideally, this risk > can be mitigated by making adjustments when backlog waiting is detected. > > However, detection can be difficult using the currently available > metrics. For example, an admin attempting to debug degraded performance > may falsely believe a full backlog indicates backlog waiting. It may > turn out the backlog frequently fills up but drains quickly. > > To make it easier to reliably track degraded performance to backlog > waiting, this patch makes the following changes: > > Add a new field backlog_wait_time_total to the audit status reply. > Initialize this field to zero. Add to this field the total time spent > by the current task on scheduled timeouts while the backlog limit is > exceeded. Reset field to zero upon request via AUDIT_SET. > > Tested on Ubuntu 18.04 using complementary changes to the > audit-userspace and audit-testsuite: > - https://github.com/linux-audit/audit-userspace/pull/134 > - https://github.com/linux-audit/audit-testsuite/pull/97 > > Signed-off-by: Max Englander <max.englan...@gmail.com> > --- > Patch changelogs between v1 and v2: > - Instead of printing a warning when backlog waiting occurs, add > duration of backlog waiting to cumulative sum, and report this > sum in audit status reply. > > Patch changelogs between v2 and v3: > - Rename backlog_wait_sum to backlog_wait_time_actual. > - Drop unneeded and unwanted header flags > AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_SUM and > AUDIT_VERSION_BACKLOG_WAIT_SUM. > - Increment backlog_wait_time_actual counter after every call to > schedule_timeout rather than once after enqueuing (or losing) an > audit record. > - Add support for resetting backlog_wait_time_actual counter to zero > upon request via AUDIT_SET. > > include/uapi/linux/audit.h | 18 +++++++++++------- > kernel/audit.c | 35 +++++++++++++++++++++++++---------- > 2 files changed, 36 insertions(+), 17 deletions(-)
This looks okay to me, thanks for the fixes Max. Steve, does the associated userspace patch look okay to you? > diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h > index a534d71e689a..92d72965ad44 100644 > --- a/include/uapi/linux/audit.h > +++ b/include/uapi/linux/audit.h > @@ -332,14 +332,15 @@ enum { > }; > > /* Status symbols */ > - /* Mask values */ > -#define AUDIT_STATUS_ENABLED 0x0001 > -#define AUDIT_STATUS_FAILURE 0x0002 > -#define AUDIT_STATUS_PID 0x0004 > + /* Mask values */ > +#define AUDIT_STATUS_ENABLED 0x0001 > +#define AUDIT_STATUS_FAILURE 0x0002 > +#define AUDIT_STATUS_PID 0x0004 > #define AUDIT_STATUS_RATE_LIMIT 0x0008 > -#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 > -#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 > -#define AUDIT_STATUS_LOST 0x0040 > +#define AUDIT_STATUS_BACKLOG_LIMIT 0x0010 > +#define AUDIT_STATUS_BACKLOG_WAIT_TIME 0x0020 > +#define AUDIT_STATUS_LOST 0x0040 > +#define AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL 0x0080 > > #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 > #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 > @@ -466,6 +467,9 @@ struct audit_status { > __u32 feature_bitmap; /* bitmap of kernel audit features */ > }; > __u32 backlog_wait_time;/* message queue wait timeout */ > + __u32 backlog_wait_time_actual;/* time spent waiting while > + * message limit exceeded > + */ > }; > > struct audit_features { > diff --git a/kernel/audit.c b/kernel/audit.c > index 87f31bf1f0a0..33c640fdacf7 100644 > --- a/kernel/audit.c > +++ b/kernel/audit.c > @@ -136,6 +136,11 @@ u32 audit_sig_sid = 0; > */ > static atomic_t audit_lost = ATOMIC_INIT(0); > > +/* Monotonically increasing sum of time the kernel has spent > + * waiting while the backlog limit is exceeded. > + */ > +static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0); > + > /* Hash for inode-based rules */ > struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; > > @@ -1193,17 +1198,18 @@ static int audit_receive_msg(struct sk_buff *skb, > struct nlmsghdr *nlh) > case AUDIT_GET: { > struct audit_status s; > memset(&s, 0, sizeof(s)); > - s.enabled = audit_enabled; > - s.failure = audit_failure; > + s.enabled = audit_enabled; > + s.failure = audit_failure; > /* NOTE: use pid_vnr() so the PID is relative to the current > * namespace */ > - s.pid = auditd_pid_vnr(); > - s.rate_limit = audit_rate_limit; > - s.backlog_limit = audit_backlog_limit; > - s.lost = atomic_read(&audit_lost); > - s.backlog = skb_queue_len(&audit_queue); > - s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; > - s.backlog_wait_time = audit_backlog_wait_time; > + s.pid = auditd_pid_vnr(); > + s.rate_limit = audit_rate_limit; > + s.backlog_limit = audit_backlog_limit; > + s.lost = atomic_read(&audit_lost); > + s.backlog = skb_queue_len(&audit_queue); > + s.feature_bitmap = AUDIT_FEATURE_BITMAP_ALL; > + s.backlog_wait_time = audit_backlog_wait_time; > + s.backlog_wait_time_actual = > atomic_read(&audit_backlog_wait_time_actual); > audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); > break; > } > @@ -1307,6 +1313,12 @@ static int audit_receive_msg(struct sk_buff *skb, > struct nlmsghdr *nlh) > audit_log_config_change("lost", 0, lost, 1); > return lost; > } > + if (s.mask == AUDIT_STATUS_BACKLOG_WAIT_TIME_ACTUAL) { > + u32 actual = > atomic_xchg(&audit_backlog_wait_time_actual, 0); > + > + audit_log_config_change("backlog_wait_time_actual", > 0, actual, 1); > + return actual; > + } > break; > } > case AUDIT_GET_FEATURE: > @@ -1778,12 +1790,15 @@ struct audit_buffer *audit_log_start(struct > audit_context *ctx, gfp_t gfp_mask, > /* sleep if we are allowed and we haven't exhausted > our > * backlog wait limit */ > if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) > { > + long rtime = stime; > + > DECLARE_WAITQUEUE(wait, current); > > add_wait_queue_exclusive(&audit_backlog_wait, > &wait); > set_current_state(TASK_UNINTERRUPTIBLE); > - stime = schedule_timeout(stime); > + stime = schedule_timeout(rtime); > + atomic_add(rtime - stime, > &audit_backlog_wait_time_actual); > remove_wait_queue(&audit_backlog_wait, &wait); > } else { > if (audit_rate_check() && printk_ratelimit()) > -- > 2.17.1 > -- paul moore www.paul-moore.com -- Linux-audit mailing list Linux-audit@redhat.com https://www.redhat.com/mailman/listinfo/linux-audit