URL: https://github.com/SSSD/sssd/pull/5712
Author: justin-stephenson
 Title: #5712: Health and Support Analyzer - Add request log parsing utility
Action: synchronized

To pull the PR as Git branch:
git remote add ghsssd https://github.com/SSSD/sssd
git fetch ghsssd pull/5712/head:pr5712
git checkout pr5712
From 50f85c6a3d72769d3c02cbe06499d9b2c9162086 Mon Sep 17 00:00:00 2001
From: Justin Stephenson <jstep...@redhat.com>
Date: Wed, 4 Aug 2021 14:57:55 +0000
Subject: [PATCH 1/3] DP: Log offline warning for REQ_TRACE tracking

This allows the sssctl analyze parsing tool to report if the
backend was offline when the request came in to the data
provider.
---
 src/providers/data_provider/dp_request.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/providers/data_provider/dp_request.c b/src/providers/data_provider/dp_request.c
index 077b361f3c..3cbd55c1ec 100644
--- a/src/providers/data_provider/dp_request.c
+++ b/src/providers/data_provider/dp_request.c
@@ -133,6 +133,10 @@ static errno_t dp_attach_req(struct dp_req *dp_req,
         SSS_REQ_TRACE_CID_DP_REQ(SSSDBG_TRACE_FUNC, dp_req->name,
                                  "New request. [%s CID #%u] Flags [%#.4x].",
                                  sender_name, cli_id, dp_flags);
+        if (be_is_offline(provider->be_ctx)) {
+            DEBUG(SSSDBG_TRACE_FUNC, "[CID #%u] Backend is offline! " \
+                                     "Using cached data if available\n", cli_id);
+        }
     } else {
         SSS_REQ_TRACE_CID_DP_REQ(SSSDBG_TRACE_FUNC, dp_req->name,
                                  "New request. Flags [%#.4x].",

From a22c80cab5e52999b9b0f35980ba56a714bfffc9 Mon Sep 17 00:00:00 2001
From: Justin Stephenson <jstep...@redhat.com>
Date: Fri, 11 Jun 2021 11:04:59 -0400
Subject: [PATCH 2/3] TOOLS: Add sss_analyze utility

Add log parsing tool which can be used to track requests across
responder and backend logs.
---
 Makefile.am                            |   2 +-
 configure.ac                           |   3 +
 contrib/sssd.spec.in                   |   5 +
 src/conf_macros.m4                     |  14 ++
 src/tools/analyzer/Makefile.am         |  15 ++
 src/tools/analyzer/modules/__init__.py |   0
 src/tools/analyzer/modules/request.py  | 196 +++++++++++++++++++++++++
 src/tools/analyzer/source_files.py     |  58 ++++++++
 src/tools/analyzer/source_journald.py  |  39 +++++
 src/tools/analyzer/sss_analyze.py.in   |  24 +++
 10 files changed, 355 insertions(+), 1 deletion(-)
 create mode 100644 src/tools/analyzer/Makefile.am
 create mode 100644 src/tools/analyzer/modules/__init__.py
 create mode 100644 src/tools/analyzer/modules/request.py
 create mode 100644 src/tools/analyzer/source_files.py
 create mode 100644 src/tools/analyzer/source_journald.py
 create mode 100755 src/tools/analyzer/sss_analyze.py.in

diff --git a/Makefile.am b/Makefile.am
index 6d84b082ec..c7c429965d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,7 @@ SUBDIRS += src/man
 endif
 
 SUBDIRS += . src/tests/cwrap src/tests/intg src/tests/test_CA \
-             src/tests/test_ECC_CA
+             src/tests/test_ECC_CA src/tools/analyzer
 
 # Some old versions of automake don't define builddir
 builddir ?= .
diff --git a/configure.ac b/configure.ac
index c5c3a903b0..2b1f5fc828 100644
--- a/configure.ac
+++ b/configure.ac
@@ -152,6 +152,7 @@ WITH_WINBIND_PLUGIN_PATH
 WITH_SELINUX
 WITH_NSCD
 WITH_IPA_GETKEYTAB
+WITH_PYTHON_BIN
 WITH_SEMANAGE
 WITH_AD_GPO_DEFAULT
 WITH_GPO_CACHE_PATH
@@ -531,6 +532,8 @@ AC_CONFIG_FILES([Makefile contrib/sssd.spec src/examples/rwtab src/doxy.config
                  src/lib/sifp/sss_simpleifp.doxy
                  src/config/setup.py
                  src/systemtap/sssd.stp
+                 src/tools/analyzer/Makefile
+                 src/tools/analyzer/sss_analyze.py
                  src/config/SSSDConfig/__init__.py])
 AC_CONFIG_FILES([sbus_generate.sh], [chmod +x sbus_generate.sh])
 AC_OUTPUT
diff --git a/contrib/sssd.spec.in b/contrib/sssd.spec.in
index c8a4f62c7e..17fb7440ad 100644
--- a/contrib/sssd.spec.in
+++ b/contrib/sssd.spec.in
@@ -213,6 +213,9 @@ Requires: sssd-common = %{version}-%{release}
 Requires: python3-sss = %{version}-%{release}
 Requires: python3-sssdconfig = %{version}-%{release}
 Requires: libsss_certmap = %{version}-%{release}
+# required by sss_analyze
+Requires: python3-systemd
+Requires: python3-click
 Recommends: sssd-dbus
 
 %description tools
@@ -517,6 +520,7 @@ autoreconf -ivf
     --with-sssd-user=%{sssd_user} \
     --with-syslog=journald \
     --with-test-dir=/dev/shm \
+    --with-python-bin="%{__python3}"
 %if 0%{?fedora}
     --enable-files-domain \
     --disable-polkit-rules-path \
@@ -854,6 +858,7 @@ done
 %{_sbindir}/sss_debuglevel
 %{_sbindir}/sss_seed
 %{_sbindir}/sssctl
+%{python3_sitelib}/sssd/
 %{_mandir}/man8/sss_obfuscate.8*
 %{_mandir}/man8/sss_override.8*
 %{_mandir}/man8/sss_debuglevel.8*
diff --git a/src/conf_macros.m4 b/src/conf_macros.m4
index f050df0eb5..a2abb2979a 100644
--- a/src/conf_macros.m4
+++ b/src/conf_macros.m4
@@ -408,6 +408,20 @@ AC_DEFUN([WITH_PYTHON3_BINDINGS],
                    [test x"$with_python3_bindings" = xyes])
   ])
 
+AC_DEFUN([WITH_PYTHON_BIN],
+  [ AC_ARG_WITH([python_bin],
+                [AC_HELP_STRING([--with-python-bin=PATH], [Path to the python interpreter [/usr/bin/python3]])
+                ]
+               )
+
+    PYTHON_BIN_PATH="/usr/bin/python3"
+    if test x"$with_python_bin" != x; then
+        PYTHON_BIN_PATH=$with_python_bin
+        AC_SUBST(PYTHON_BIN_PATH)
+    fi
+    AC_DEFINE_UNQUOTED([PYTHON_BIN_PATH], ["$PYTHON_BIN_PATH"], [Path to the python interpreter])
+  ])
+
 AC_DEFUN([WITH_SELINUX],
   [ AC_ARG_WITH([selinux],
                 [AC_HELP_STRING([--with-selinux],
diff --git a/src/tools/analyzer/Makefile.am b/src/tools/analyzer/Makefile.am
new file mode 100644
index 0000000000..26b9a1c763
--- /dev/null
+++ b/src/tools/analyzer/Makefile.am
@@ -0,0 +1,15 @@
+pkgpythondir = $(python3dir)/sssd
+
+dist_pkgpython_SCRIPTS = \
+    sss_analyze.py \
+    $(NULL)
+
+dist_pkgpython_DATA = \
+    source_files.py \
+    source_journald.py \
+    $(NULL)
+
+modulesdir = $(pkgpythondir)/modules
+dist_modules_DATA = \
+    modules/request.py \
+    $(NULL)
diff --git a/src/tools/analyzer/modules/__init__.py b/src/tools/analyzer/modules/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/tools/analyzer/modules/request.py b/src/tools/analyzer/modules/request.py
new file mode 100644
index 0000000000..98d68a333e
--- /dev/null
+++ b/src/tools/analyzer/modules/request.py
@@ -0,0 +1,196 @@
+import re
+import copy
+import click
+
+from enum import Enum
+
+import source_files
+import source_journald
+
+
+@click.group(help="Request module")
+def request():
+    pass
+
+
+@request.command()
+@click.option("-v", "--verbose", is_flag=True, help="Enables verbose output")
+@click.option("--pam", is_flag=True, help="Filter only PAM requests")
+@click.pass_obj
+def list(ctx, verbose, pam):
+    analyzer = RequestAnalyzer()
+    source = analyzer.load(ctx)
+    analyzer.list_requests(source, verbose, pam)
+
+
+@request.command()
+@click.argument("cid", nargs=1, type=int, required=True)
+@click.option("--merge", is_flag=True, help="Merge logs together sorted"
+              " by timestamp (requires debug_microseconds = True)")
+@click.option("--cachereq", is_flag=True, help="Include cache request "
+              "related logs")
+@click.option("--pam", is_flag=True, help="Track only PAM requests")
+@click.pass_obj
+def show(ctx, cid, merge, cachereq, pam):
+    analyzer = RequestAnalyzer()
+    source = analyzer.load(ctx)
+    analyzer.track_request(source, cid, merge, cachereq, pam)
+
+
+class RequestAnalyzer:
+    consumed_logs = []
+    done = ""
+
+    def load(self, ctx):
+        if ctx.source == "journald":
+            import source_journald
+            source = source_journald.Reader()
+        else:
+            source = source_files.Reader(ctx.logdir)
+        return source
+
+    # iterate through source, returning lines which match
+    # any number of patterns
+    def matched_line(self, source, patterns):
+        for line in source:
+            for pattern in patterns:
+                re_obj = re.compile(pattern)
+                if re_obj.search(line):
+                    if line.startswith('   *  '):
+                        continue
+                    yield line
+
+    # retrieve list of associated REQ_TRACE ids
+    def get_linked_ids(self, source, pattern, regex):
+        linked_ids = []
+        for match in self.matched_line(source, pattern):
+            id_re = re.compile(regex)
+            match = id_re.search(match)
+            if match:
+                found = match.group(0)
+                linked_ids.append(found)
+        return linked_ids
+
+    def consume_line(self, line, source, consume):
+        found_results = True
+        if consume:
+            self.consumed_logs.append(line.rstrip(line[-1]))
+        else:
+            # files source includes newline
+            if type(source) == source_files.Reader:
+                print(line, end='')
+            else:
+                print(line)
+        return found_results
+
+    def print_formatted(self, line, verbose):
+        plugin = ""
+        name = ""
+        id = ""
+
+        # exclude backtrace logs
+        if line.startswith('   *  '):
+            return
+        fields = line.split("[")
+        cr_field = fields[2].split(":")[1]
+        cr = cr_field[5:]
+        if "refreshed" in line:
+            return
+        # CR Plugin name
+        if re.search("cache_req_send", line):
+            plugin = line.split('\'')[1]
+        # CR Input name
+        elif re.search("cache_req_process_input", line):
+            name = line.rsplit('[')[-1]
+        # CR Input id
+        elif re.search("cache_req_search_send", line):
+            id = line.rsplit()[-1]
+        # CID and client process name
+        else:
+            ts = line.split(")")[0]
+            ts = ts[1:]
+            fields = line.split("[")
+            cid = fields[3][5:-1]
+            cmd = fields[4][4:-1]
+            print(ts + ": " + "CID #" + cid + ": " + cmd)
+
+        if verbose:
+            if plugin:
+                print("   - " + plugin)
+            if name:
+                if cr not in self.done:
+                    print("       - " + name[:-2])
+                    self.done = cr
+            if id:
+                if cr not in self.done:
+                    print("       - " + id)
+                    self.done = cr
+
+    def list_requests(self, source, verbose, pam):
+        component = source.Component.NSS
+        resp = "nss"
+        patterns = ['\[cmd']
+        patterns.append("(cache_req_send|cache_req_process_input|"
+                        "cache_req_search_send)")
+        consume = True
+        if pam:
+            component = source.Component.PAM
+            resp = "pam"
+
+        print(f"******** Listing {resp} client requests ********")
+        source.set_component(component)
+        self.done = ""
+        # For each CID
+        for line in self.matched_line(source, patterns):
+            if type(source) == source_journald.Reader:
+                print(line)
+            else:
+                self.print_formatted(line, verbose)
+
+    def track_request(self, source, cid, merge, cachereq, pam):
+        resp_results = False
+        be_results = False
+        component = source.Component.NSS
+        resp = "nss"
+        pattern = [f'REQ_TRACE.*\[CID #{cid}\\]']
+        pattern.append(f"\[CID #{cid}\\].*connected")
+
+        if pam:
+            component = source.Component.PAM
+            resp = "pam"
+            pam_data_regex = f'pam_print_data.*\[CID #{cid}\]'
+
+        print(f"******** Checking {resp} responder for Client ID"
+              f" {cid} *******")
+        source.set_component(component)
+        if cachereq:
+            cr_id_regex = 'CR #[0-9]+'
+            cr_ids = self.get_linked_ids(source, pattern, cr_id_regex)
+            [pattern.append(f'{id}\:') for id in cr_ids]
+
+        for match in self.matched_line(source, pattern):
+            resp_results = self.consume_line(match, source, merge)
+
+        print(f"********* Checking Backend for Client ID {cid} ********")
+        pattern = [f'REQ_TRACE.*\[sssd.{resp} CID #{cid}\]']
+        source.set_component(source.Component.BE)
+
+        be_id_regex = '\[RID#[0-9]+\]'
+        be_ids = self.get_linked_ids(source, pattern, be_id_regex)
+
+        pattern.clear()
+        [pattern.append(f'\\{id}') for id in be_ids]
+
+        if pam:
+            pattern.append(pam_data_regex)
+        for match in self.matched_line(source, pattern):
+            be_results = self.consume_line(match, source, merge)
+
+        if merge:
+            # sort by date/timestamp
+            sorted_list = sorted(self.consumed_logs,
+                                 key=lambda s: s.split(')')[0])
+            for entry in sorted_list:
+                print(entry)
+        if not resp_results and not be_results:
+            print(f"ID {cid} not found in logs!")
diff --git a/src/tools/analyzer/source_files.py b/src/tools/analyzer/source_files.py
new file mode 100644
index 0000000000..12be10f895
--- /dev/null
+++ b/src/tools/analyzer/source_files.py
@@ -0,0 +1,58 @@
+from enum import Enum
+import configparser
+from os import listdir
+from os.path import isfile, join
+import glob
+
+
+class Reader:
+    class Component(Enum):
+        NSS = 1   # NSS Responder
+        PAM = 2   # PAM Responder
+        BE = 3    # Backend
+
+    def __init__(self, path):
+        self.log_files = []
+        self.path = self.resolve_path(path)
+        self.domains = self.get_domain_logfiles()
+
+    def __iter__(self):
+        for files in self.log_files:
+            try:
+                with open(files) as file:
+                    for line in file:
+                        yield line
+            except FileNotFoundError as err:
+                print("Could not find domain log file, skipping")
+                print(err)
+                continue
+
+    def resolve_path(self, path):
+        if path.endswith("/"):
+            return path
+        else:
+            return path + "/"
+
+    def get_domain_logfiles(self):
+        domain_files = []
+        exclude_list = ["ifp", "nss", "pam", "sudo", "autofs",
+                        "ssh", "pac", "kcm", ".gz"]
+        file_list = glob.glob(self.path + "sssd_*")
+        for file in file_list:
+            if not any(s in file for s in exclude_list):
+                domain_files.append(file)
+
+        return domain_files
+
+    def set_component(self, component):
+        self.log_files = []
+        if component == self.Component.NSS:
+            self.log_files.append(self.path + "sssd_nss.log")
+        elif component == self.Component.PAM:
+            self.log_files.append(self.path + "sssd_pam.log")
+        elif component == self.Component.BE:
+            if not self.domains:
+                raise IOError
+            # error: No domains found?
+            for dom in self.domains:
+                self.log_files.append(dom)
diff --git a/src/tools/analyzer/source_journald.py b/src/tools/analyzer/source_journald.py
new file mode 100644
index 0000000000..4f30f9e607
--- /dev/null
+++ b/src/tools/analyzer/source_journald.py
@@ -0,0 +1,39 @@
+from systemd import journal
+
+from enum import Enum
+
+_EXE_PREFIX = "/usr/libexec/sssd/"
+_NSS_MATCH = _EXE_PREFIX + "sssd_nss"
+_PAM_MATCH = _EXE_PREFIX + "sssd_pam"
+_BE_MATCH = _EXE_PREFIX + "sssd_be"
+
+
+class Reader:
+    class Component(Enum):
+        NSS = 1   # NSS Responder
+        PAM = 2   # PAM Responder
+        BE = 3    # Backend
+
+    def __init__(self):
+        self.reader = journal.Reader()
+        self.reader.this_boot()
+        self.reader.seek_head()
+
+    def __iter__(self):
+        self.reader.seek_head()
+        for entry in self.reader:
+            ts = entry['__REALTIME_TIMESTAMP']
+            msg = entry['MESSAGE']
+            if ts:
+                yield f'{ts}: {msg}'
+            else:
+                yield msg
+
+    def set_component(self, component):
+        self.reader.flush_matches()
+        if component == self.Component.NSS:
+            self.reader.add_match(_EXE=_NSS_MATCH)
+        elif component == self.Component.PAM:
+            self.reader.add_match(_EXE=_PAM_MATCH)
+        elif component == self.Component.BE:
+            self.reader.add_match(_EXE=_BE_MATCH)
diff --git a/src/tools/analyzer/sss_analyze.py.in b/src/tools/analyzer/sss_analyze.py.in
new file mode 100755
index 0000000000..1b6b684fca
--- /dev/null
+++ b/src/tools/analyzer/sss_analyze.py.in
@@ -0,0 +1,24 @@
+#!@PYTHON_BIN_PATH@
+
+import click
+
+import source_files
+
+from modules import request
+
+class Analyzer(object):
+    def __init__(self, source="files", logdir="/var/log/sssd/"):
+        self.source = source
+        self.logdir = logdir
+
+@click.group(help="Analyzer tool to assist with SSSD Log parsing")
+@click.option('--source', default='files')
+@click.option('--logdir', default='/var/log/sssd/', help="SSSD Log directory to parse "
+              "log files from")
+@click.pass_context
+def cli(ctx, source, logdir):
+    ctx.obj = Analyzer(source, logdir)
+
+if __name__ == '__main__':
+    cli.add_command(request.request)
+    cli()

From 64827ce7833c9fb74989b2017aca34ff64e6f619 Mon Sep 17 00:00:00 2001
From: Justin Stephenson <jstep...@redhat.com>
Date: Fri, 21 May 2021 14:42:41 -0400
Subject: [PATCH 3/3] SSSCTL: Add analyze command

Wrapper for sss_analyze

Print a message about limited functionality when tevent chain ID
support is not built.
---
 Makefile.am                    |  1 +
 src/tools/sssctl/sssctl.c      |  1 +
 src/tools/sssctl/sssctl.h      |  4 ++++
 src/tools/sssctl/sssctl_logs.c | 29 +++++++++++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/Makefile.am b/Makefile.am
index c7c429965d..9b287f5f4f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1983,6 +1983,7 @@ sssctl_LDADD = \
     $(NULL)
 sssctl_CFLAGS = \
     $(AM_CFLAGS) \
+    -DPYTHONDIR_PATH=\"$(python3dir)/sssd\" \
     $(NULL)
 
 if BUILD_SUDO
diff --git a/src/tools/sssctl/sssctl.c b/src/tools/sssctl/sssctl.c
index 8adaf30910..3816125ad3 100644
--- a/src/tools/sssctl/sssctl.c
+++ b/src/tools/sssctl/sssctl.c
@@ -296,6 +296,7 @@ int main(int argc, const char **argv)
         SSS_TOOL_COMMAND("logs-remove", "Remove existing SSSD log files", 0, sssctl_logs_remove),
         SSS_TOOL_COMMAND("logs-fetch", "Archive SSSD log files in tarball", 0, sssctl_logs_fetch),
         SSS_TOOL_COMMAND("debug-level", "Change SSSD debug level", 0, sssctl_debug_level),
+        SSS_TOOL_COMMAND("analyze", "Analyze logged data", 0, sssctl_analyze),
 #ifdef HAVE_LIBINI_CONFIG_V1_3
         SSS_TOOL_DELIMITER("Configuration files tools:"),
         SSS_TOOL_COMMAND_FLAGS("config-check", "Perform static analysis of SSSD configuration", 0, sssctl_config_check, SSS_TOOL_FLAG_SKIP_CMD_INIT),
diff --git a/src/tools/sssctl/sssctl.h b/src/tools/sssctl/sssctl.h
index 599ef65196..c827bb8789 100644
--- a/src/tools/sssctl/sssctl.h
+++ b/src/tools/sssctl/sssctl.h
@@ -96,6 +96,10 @@ errno_t sssctl_debug_level(struct sss_cmdline *cmdline,
                            struct sss_tool_ctx *tool_ctx,
                            void *pvt);
 
+errno_t sssctl_analyze(struct sss_cmdline *cmdline,
+                       struct sss_tool_ctx *tool_ctx,
+                       void *pvt);
+
 errno_t sssctl_user_show(struct sss_cmdline *cmdline,
                          struct sss_tool_ctx *tool_ctx,
                          void *pvt);
diff --git a/src/tools/sssctl/sssctl_logs.c b/src/tools/sssctl/sssctl_logs.c
index ebb2c4571c..b98cd68fec 100644
--- a/src/tools/sssctl/sssctl_logs.c
+++ b/src/tools/sssctl/sssctl_logs.c
@@ -41,6 +41,7 @@
 
 #define LOG_FILE(file) " " LOG_PATH "/" file
 #define LOG_FILES LOG_FILE("*.log")
+#define SSS_ANALYZE PYTHONDIR_PATH"/sss_analyze.py"
 
 #define CHECK(expr, done, msg) do { \
     if (expr) { \
@@ -388,3 +389,31 @@ errno_t sssctl_debug_level(struct sss_cmdline *cmdline,
     talloc_free(ctx);
     return ret;
 }
+
+errno_t sssctl_analyze(struct sss_cmdline *cmdline,
+                       struct sss_tool_ctx *tool_ctx,
+                       void *pvt)
+{
+    errno_t ret;
+
+#ifndef BUILD_CHAIN_ID
+    PRINT("NOTE: Tevent chain ID support missing, request analysis will be limited.\n");
+    PRINT("It is recommended to use the --logdir option against tevent chain ID "
+          "supported SSSD logs.\n");
+#endif
+    const char **args = talloc_array_size(tool_ctx,
+                                          sizeof(char *),
+                                          cmdline->argc + 2);
+    if (!args) {
+        return ENOMEM;
+    }
+    memcpy(&args[1], cmdline->argv, sizeof(char *) * cmdline->argc);
+    args[0] = SSS_ANALYZE;
+    args[cmdline->argc + 1] = NULL;
+
+    ret = sssctl_run_command(args);
+
+    talloc_free(args);
+
+    return ret;
+}
_______________________________________________
sssd-devel mailing list -- sssd-devel@lists.fedorahosted.org
To unsubscribe send an email to sssd-devel-le...@lists.fedorahosted.org
Fedora Code of Conduct: 
https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedorahosted.org/archives/list/sssd-devel@lists.fedorahosted.org
Do not reply to spam on the list, report it: 
https://pagure.io/fedora-infrastructure

Reply via email to