URL: https://github.com/SSSD/sssd/pull/5712
Author: justin-stephenson
 Title: #5712: Health and Support Analyzer - Add request log parsing utility
Action: synchronized

To pull the PR as Git branch:
git remote add ghsssd https://github.com/SSSD/sssd
git fetch ghsssd pull/5712/head:pr5712
git checkout pr5712
From 9ab3187cea5a9e88feb8ebb8bb9afd351f5f0906 Mon Sep 17 00:00:00 2001
From: Justin Stephenson <jstep...@redhat.com>
Date: Fri, 11 Jun 2021 11:04:59 -0400
Subject: [PATCH 1/2] TOOLS: Add sss_analyze utility

Add log parsing tool which can be used to track requests across
responder and backend logs.
---
 Makefile.am                           |   2 +-
 configure.ac                          |   1 +
 contrib/sssd.spec.in                  |   3 +
 src/tools/analyzer/Makefile.am        |  11 ++
 src/tools/analyzer/module_request.py  | 179 ++++++++++++++++++++++++++
 src/tools/analyzer/source_files.py    |  58 +++++++++
 src/tools/analyzer/source_journald.py |  31 +++++
 src/tools/analyzer/sss_analyze.py     |  51 ++++++++
 8 files changed, 335 insertions(+), 1 deletion(-)
 create mode 100644 src/tools/analyzer/Makefile.am
 create mode 100644 src/tools/analyzer/module_request.py
 create mode 100644 src/tools/analyzer/source_files.py
 create mode 100644 src/tools/analyzer/source_journald.py
 create mode 100755 src/tools/analyzer/sss_analyze.py

diff --git a/Makefile.am b/Makefile.am
index 1dd06d74c6..37b3a28a8e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,7 @@ SUBDIRS += src/man
 endif
 
 SUBDIRS += . src/tests/cwrap src/tests/intg src/tests/test_CA \
-             src/tests/test_ECC_CA
+             src/tests/test_ECC_CA src/tools/analyzer
 
 # Some old versions of automake don't define builddir
 builddir ?= .
diff --git a/configure.ac b/configure.ac
index e98487cae0..3b595d25ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -538,6 +538,7 @@ AC_CONFIG_FILES([Makefile contrib/sssd.spec src/examples/rwtab src/doxy.config
                  src/lib/sifp/sss_simpleifp.doxy
                  src/config/setup.py
                  src/systemtap/sssd.stp
+                 src/tools/analyzer/Makefile
                  src/config/SSSDConfig/__init__.py])
 AC_CONFIG_FILES([sbus_generate.sh], [chmod +x sbus_generate.sh])
 AC_OUTPUT
diff --git a/contrib/sssd.spec.in b/contrib/sssd.spec.in
index e2877b64b1..01a18400e4 100644
--- a/contrib/sssd.spec.in
+++ b/contrib/sssd.spec.in
@@ -199,6 +199,8 @@ Requires: sssd-common = %{version}-%{release}
 Requires: python3-sss = %{version}-%{release}
 Requires: python3-sssdconfig = %{version}-%{release}
 Requires: libsss_certmap = %{version}-%{release}
+# required by sss_analyze
+Requires: python3-systemd
 Recommends: sssd-dbus
 
 %description tools
@@ -843,6 +845,7 @@ done
 %{_sbindir}/sss_debuglevel
 %{_sbindir}/sss_seed
 %{_sbindir}/sssctl
+%{python3_sitelib}/sssd/
 %{_mandir}/man8/sss_obfuscate.8*
 %{_mandir}/man8/sss_override.8*
 %{_mandir}/man8/sss_debuglevel.8*
diff --git a/src/tools/analyzer/Makefile.am b/src/tools/analyzer/Makefile.am
new file mode 100644
index 0000000000..921e7d4335
--- /dev/null
+++ b/src/tools/analyzer/Makefile.am
@@ -0,0 +1,11 @@
+pkgpythondir = $(python3dir)/sssd
+
+dist_pkgpython_SCRIPTS = \
+    sss_analyze.py \
+    $(NULL)
+
+dist_pkgpython_DATA = \
+    module_request.py \
+    source_files.py \
+    source_journald.py \
+    $(NULL)
diff --git a/src/tools/analyzer/module_request.py b/src/tools/analyzer/module_request.py
new file mode 100644
index 0000000000..a333191195
--- /dev/null
+++ b/src/tools/analyzer/module_request.py
@@ -0,0 +1,179 @@
+import re
+import copy
+from enum import Enum
+
+import source_files
+
+
+class Analyzer:
+    consumed_logs = []
+    done = ""
+
+    """ Request tracking module """
+    def add_options(self, parser):
+        subparser = parser.add_subparsers(dest='subcommand', metavar=None,
+                                          required=True)
+        request_parser = subparser.add_parser('request', help='Track requests'
+                                              ' across SSSD components')
+        request_parser.add_argument("--list",
+                                    help="List recent client requests",
+                                    action="store_true")
+        request_parser.add_argument("--merge",
+                                    help="Merge logs together, formatted by"
+                                    " timestamp", action="store_true")
+        request_parser.add_argument("--cid",
+                                    help="Print logs related to the "
+                                    "provided Client ID", nargs=1)
+        request_parser.add_argument("--pam",
+                                    help="Use with --cid to track PAM related "
+                                    "requests", action="store_true")
+        request_parser.add_argument("--cachereq",
+                                    help="Include cache request related logs",
+                                    action="store_true")
+
+    # retrieve list of associated REQ_TRACE ids
+    def get_linked_ids(self, client_id, source, pattern, regex):
+        linked_ids = []
+        for line in source:
+            re_obj = re.compile(pattern)
+            if re_obj.search(line):
+                # exclude backtrace logs
+                if line.startswith('   *  '):
+                    continue
+                cr_re = re.compile(regex)
+                match = cr_re.search(line)
+                if match:
+                    found = match.group(0)
+                    linked_ids.append(found)
+        return linked_ids
+
+    def consume_line(self, line, source, consume):
+        found_results = True
+        if consume:
+           self.consumed_logs.append(line.rstrip(line[-1]))
+        else:
+            # files source includes newline
+            if type(source) == source_files.Reader:
+                print(line, end='')
+            else:
+                print(line)
+        return found_results
+
+    # iterate through source and search for any number of patterns in list
+    # match will be consumed, or printed
+    def pattern_match(self, patterns, source, consume):
+        found_results = False
+        for line in source:
+            for pattern in patterns:
+                re_obj = re.compile(pattern)
+                if re_obj.search(line):
+                # exclude backtrace logs
+                    if line.startswith('   *  '):
+                        continue
+                    found_results = self.consume_line(line, source, consume);
+        return found_results
+
+    def execute(self, source, options):
+        if options['list']:
+            if options['merge']:
+                print("--merge has no effect with --list")
+            self.list_requests(source, options['pam'])
+        elif options['cid']:
+            cid = options['cid'][0]
+            if not cid:
+                print("sss_analyze request: error: argument"
+                      " --cid: expected 1 argument")
+            else:
+                self.track_request(source, options)
+
+    def parse_matched_line(self, line):
+        # exclude backtrace logs
+        if line.startswith('   *  '):
+            return
+        # files source includes newline
+        fields = line.split("[")
+        cr_field = fields[2].split(":")[1]
+        cr = cr_field[5:]
+        if "refreshed" in line:
+            return
+        if re.search("cache_req_send", line):
+            plugin = line.split('\'')[1]
+            print("   - " + plugin)
+        elif re.search("cache_req_process_input", line):
+            name = line.rsplit('[')[-1]
+            if cr not in self.done:
+                print("       - " + name[:-2])
+                self.done = cr
+        elif re.search("cache_req_search_send", line):
+            id = line.rsplit()[-1]
+            if cr not in self.done:
+                print("       - " + id)
+                self.done = cr
+        else:
+            ts = line.split(")")[0]
+            fields = line.split("[")
+            cid = fields[3][5:-1]
+            cmd = fields[4][4:-1]
+            print(ts + ": " + "CID #" + cid + ": " + cmd)
+
+    def list_requests(self, source, pam):
+        component = source.Component.NSS
+        resp = "nss"
+        patterns = ['\[cmd']
+        patterns.append("(cache_req_send|cache_req_process_input|" \
+                        "cache_req_search_send)")
+        consume = True
+        if pam:
+            component = source.Component.PAM
+            resp = "pam"
+
+        print(f"******** Listing {resp} client requests ********")
+        source.set_component(component)
+        self.done = ""
+        ## For each CID
+        for line in source:
+            for pattern in patterns:
+                re_obj = re.compile(pattern)
+                if re_obj.search(line):
+                    self.parse_matched_line(line)
+
+    def track_request(self, source, options):
+        merge = options['merge']
+        client_id = options['cid'][0]
+        component = source.Component.NSS
+        resp = "nss"
+        pattern = [f'REQ_TRACE.*\[CID #{client_id}\\]']
+
+        if options['pam']:
+            component = source.Component.PAM
+            resp = "pam"
+
+        print(f"******** Checking {resp} responder for Client ID"
+              f" {client_id} *******")
+        source.set_component(component)
+        if options['cachereq']:
+            cr_id_regex = 'CR #[0-9]+'
+            cr_ids = self.get_linked_ids(client_id, source,
+                                         pattern[0], cr_id_regex)
+            [pattern.append(f'{id}\:') for id in cr_ids]
+
+        resp_results = self.pattern_match(pattern, source, merge)
+
+        print(f"********* Checking Backend for Client ID {client_id} ********")
+        pattern = [f'REQ_TRACE.*\[sssd.{resp} CID #{client_id}\]']
+        source.set_component(source.Component.BE)
+
+        be_id_regex = '\[RID#[0-9]+\]'
+        be_ids = self.get_linked_ids(client_id, source,
+                                     pattern[0], be_id_regex)
+        pattern.clear()
+        [pattern.append(f'\\{id}') for id in be_ids]
+        be_results = self.pattern_match(pattern, source, merge)
+
+        if merge:
+            # sort by date/timestamp
+            sorted_list = sorted(self.consumed_logs, key=lambda s: s.split(')')[0])
+            for entry in sorted_list:
+                print(entry)
+        if not resp_results and not be_results:
+            print(f"ID {client_id} not found in logs!")
diff --git a/src/tools/analyzer/source_files.py b/src/tools/analyzer/source_files.py
new file mode 100644
index 0000000000..12be10f895
--- /dev/null
+++ b/src/tools/analyzer/source_files.py
@@ -0,0 +1,58 @@
+from enum import Enum
+import configparser
+from os import listdir
+from os.path import isfile, join
+import glob
+
+
+class Reader:
+    class Component(Enum):
+        NSS = 1   # NSS Responder
+        PAM = 2   # PAM Responder
+        BE = 3    # Backend
+
+    def __init__(self, path):
+        self.log_files = []
+        self.path = self.resolve_path(path)
+        self.domains = self.get_domain_logfiles()
+
+    def __iter__(self):
+        for files in self.log_files:
+            try:
+                with open(files) as file:
+                    for line in file:
+                        yield line
+            except FileNotFoundError as err:
+                print("Could not find domain log file, skipping")
+                print(err)
+                continue
+
+    def resolve_path(self, path):
+        if path.endswith("/"):
+            return path
+        else:
+            return path + "/"
+
+    def get_domain_logfiles(self):
+        domain_files = []
+        exclude_list = ["ifp", "nss", "pam", "sudo", "autofs",
+                        "ssh", "pac", "kcm", ".gz"]
+        file_list = glob.glob(self.path + "sssd_*")
+        for file in file_list:
+            if not any(s in file for s in exclude_list):
+                domain_files.append(file)
+
+        return domain_files
+
+    def set_component(self, component):
+        self.log_files = []
+        if component == self.Component.NSS:
+            self.log_files.append(self.path + "sssd_nss.log")
+        elif component == self.Component.PAM:
+            self.log_files.append(self.path + "sssd_pam.log")
+        elif component == self.Component.BE:
+            if not self.domains:
+                raise IOError
+            # error: No domains found?
+            for dom in self.domains:
+                self.log_files.append(dom)
diff --git a/src/tools/analyzer/source_journald.py b/src/tools/analyzer/source_journald.py
new file mode 100644
index 0000000000..8011209d4a
--- /dev/null
+++ b/src/tools/analyzer/source_journald.py
@@ -0,0 +1,31 @@
+from systemd import journal
+
+from enum import Enum
+
+_EXE_PREFIX = "/usr/libexec/sssd/"
+_NSS_MATCH = _EXE_PREFIX + "sssd_nss"
+_PAM_MATCH = _EXE_PREFIX + "sssd_pam"
+_BE_MATCH = _EXE_PREFIX + "sssd_be"
+
+
+class Reader:
+    class Component(Enum):
+        NSS = 1   # NSS Responder
+        PAM = 2   # PAM Responder
+        BE = 3    # Backend
+
+    def __init__(self):
+        self.reader = journal.Reader()
+        self.reader.this_boot()
+
+    def __iter__(self):
+        for entry in self.reader:
+            yield entry['MESSAGE']
+
+    def set_component(self, component):
+        if component == self.Component.NSS:
+            self.reader.add_match(_EXE=_NSS_MATCH)
+        elif component == self.Component.PAM:
+            self.reader.add_match(_EXE=_PAM_MATCH)
+        elif component == self.Component.BE:
+            self.reader.add_match(_EXE=_BE_MATCH)
diff --git a/src/tools/analyzer/sss_analyze.py b/src/tools/analyzer/sss_analyze.py
new file mode 100755
index 0000000000..fc2fb661fa
--- /dev/null
+++ b/src/tools/analyzer/sss_analyze.py
@@ -0,0 +1,51 @@
+#!/bin/python3
+
+import sys
+import argparse
+import re
+
+import source_files
+import module_request
+
+
+def add_options():
+    parser = argparse.ArgumentParser('sss_analyze')
+    parser.add_argument(
+            '--source', action='store', type=str, default='files',
+            choices=['files', 'journald']
+    )
+    parser.add_argument(
+            '--logdir', action='store', type=str, default='/var/log/sssd/'
+    )
+    return parser
+
+
+def load_module(module, parser):
+    if module == 'request':
+        analyzer = module_request.Analyzer()
+    else:
+        return
+    analyzer.add_options(parser)
+    return analyzer
+
+
+def main():
+    parser = add_options()
+
+    module = load_module('request', parser)
+
+    ns = parser.parse_args()
+    args = vars(ns)
+    path = args['logdir']
+
+    if args['source'] == "journald":
+        import source_journald
+        reader = source_journald.Reader()
+    else:
+        reader = source_files.Reader(path)
+
+    module.execute(reader, args)
+
+
+if __name__ == '__main__':
+    main()

From beb5b95c6c30ab25e1e6a28e293297924007dc2e Mon Sep 17 00:00:00 2001
From: Justin Stephenson <jstep...@redhat.com>
Date: Fri, 21 May 2021 14:42:41 -0400
Subject: [PATCH 2/2] SSSCTL: Add analyze command

Wrapper for sss_analyze

Print a message about limited functionality when tevent chain ID
support is not built.
---
 Makefile.am                    |  1 +
 src/tools/sssctl/sssctl.c      |  1 +
 src/tools/sssctl/sssctl.h      |  4 +++
 src/tools/sssctl/sssctl_logs.c | 49 ++++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+)

diff --git a/Makefile.am b/Makefile.am
index 37b3a28a8e..6e8595d5a7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2053,6 +2053,7 @@ sssctl_LDADD = \
     $(NULL)
 sssctl_CFLAGS = \
     $(AM_CFLAGS) \
+    -DPYTHONDIR_PATH=\"$(python3dir)/sssd\" \
     $(NULL)
 
 if BUILD_SUDO
diff --git a/src/tools/sssctl/sssctl.c b/src/tools/sssctl/sssctl.c
index 2997dbf968..3bc16cdcd0 100644
--- a/src/tools/sssctl/sssctl.c
+++ b/src/tools/sssctl/sssctl.c
@@ -279,6 +279,7 @@ int main(int argc, const char **argv)
         SSS_TOOL_COMMAND("logs-remove", "Remove existing SSSD log files", 0, sssctl_logs_remove),
         SSS_TOOL_COMMAND("logs-fetch", "Archive SSSD log files in tarball", 0, sssctl_logs_fetch),
         SSS_TOOL_COMMAND("debug-level", "Change SSSD debug level", 0, sssctl_debug_level),
+        SSS_TOOL_COMMAND("analyze", "Analyze logged data", 0, sssctl_analyze),
 #ifdef HAVE_LIBINI_CONFIG_V1_3
         SSS_TOOL_DELIMITER("Configuration files tools:"),
         SSS_TOOL_COMMAND_FLAGS("config-check", "Perform static analysis of SSSD configuration", 0, sssctl_config_check, SSS_TOOL_FLAG_SKIP_CMD_INIT),
diff --git a/src/tools/sssctl/sssctl.h b/src/tools/sssctl/sssctl.h
index 0115b2457c..29fdb7f480 100644
--- a/src/tools/sssctl/sssctl.h
+++ b/src/tools/sssctl/sssctl.h
@@ -96,6 +96,10 @@ errno_t sssctl_debug_level(struct sss_cmdline *cmdline,
                            struct sss_tool_ctx *tool_ctx,
                            void *pvt);
 
+errno_t sssctl_analyze(struct sss_cmdline *cmdline,
+                       struct sss_tool_ctx *tool_ctx,
+                       void *pvt);
+
 errno_t sssctl_user_show(struct sss_cmdline *cmdline,
                          struct sss_tool_ctx *tool_ctx,
                          void *pvt);
diff --git a/src/tools/sssctl/sssctl_logs.c b/src/tools/sssctl/sssctl_logs.c
index 9ff2be05b6..1ac17173f3 100644
--- a/src/tools/sssctl/sssctl_logs.c
+++ b/src/tools/sssctl/sssctl_logs.c
@@ -40,6 +40,7 @@
 
 #define LOG_FILE(file) " " LOG_PATH "/" file
 #define LOG_FILES LOG_FILE("*.log")
+#define SSS_ANALYZE PYTHONDIR_PATH"/sss_analyze.py"
 
 #define CHECK(expr, done, msg) do { \
     if (expr) { \
@@ -368,3 +369,51 @@ errno_t sssctl_debug_level(struct sss_cmdline *cmdline,
     talloc_free(ctx);
     return ret;
 }
+
+errno_t sssctl_analyze(struct sss_cmdline *cmdline,
+                       struct sss_tool_ctx *tool_ctx,
+                       void *pvt)
+{
+    errno_t ret;
+    char *cmd_args = NULL;
+    const char *analyzecmd = SSS_ANALYZE;
+    char *cmd = NULL;
+    int i;
+
+#ifndef BUILD_CHAIN_ID
+    PRINT("NOTE: Tevent chain ID support missing, request analysis will be limited.\n");
+    PRINT("It is recommended to use the --logdir option against tevent chain ID "
+          "supported SSSD logs.\n");
+#endif
+    if (cmdline->argc == 0) {
+        ret = sssctl_run_command(analyzecmd);
+        goto done;
+    }
+
+    cmd_args = talloc_strdup(tool_ctx, "");
+    if (cmd_args == NULL) {
+        ret = ENOMEM;
+        goto done;
+    }
+
+    for (i = 0; i < cmdline->argc; i++) {
+        cmd_args = talloc_strdup_append(cmd_args, cmdline->argv[i]);
+        if (i != cmdline->argc - 1) {
+            cmd_args = talloc_strdup_append(cmd_args, " ");
+        }
+    }
+
+    cmd = talloc_asprintf(tool_ctx, "%s %s", analyzecmd, cmd_args);
+    if (cmd == NULL) {
+        ret = ENOMEM;
+        goto done;
+    }
+
+    ret = sssctl_run_command(cmd);
+
+done:
+    talloc_free(cmd_args);
+    talloc_free(cmd);
+
+    return ret;
+}
_______________________________________________
sssd-devel mailing list -- sssd-devel@lists.fedorahosted.org
To unsubscribe send an email to sssd-devel-le...@lists.fedorahosted.org
Fedora Code of Conduct: 
https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedorahosted.org/archives/list/sssd-devel@lists.fedorahosted.org
Do not reply to spam on the list, report it: 
https://pagure.io/fedora-infrastructure

Reply via email to