commit:     758c24a6578bad541a188f0fe513906515dd1bda
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Fri Dec 29 00:22:14 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Fri Dec 29 00:22:14 2023 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=758c24a6

app-admin/rasdaemon: backport crash for online vs. configured CPUs

Closes: https://bugs.gentoo.org/890286
Signed-off-by: Sam James <sam <AT> gentoo.org>

 ...on-0.8.0-check-online-cpus-not-configured.patch |  40 +++++
 ...rasdaemon-0.8.0-table-create-offline-cpus.patch | 179 +++++++++++++++++++++
 app-admin/rasdaemon/rasdaemon-0.8.0-r2.ebuild      |  87 ++++++++++
 3 files changed, 306 insertions(+)

diff --git 
a/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch
 
b/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch
new file mode 100644
index 000000000000..0d3e60976659
--- /dev/null
+++ 
b/app-admin/rasdaemon/files/rasdaemon-0.8.0-check-online-cpus-not-configured.patch
@@ -0,0 +1,40 @@
+https://bugs.gentoo.org/890286
+https://github.com/mchehab/rasdaemon/issues/77
+https://github.com/mchehab/rasdaemon/commit/f1ea76375281001cdf4a048c1a4a24d86c6fbe48
+
+From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001
+From: Zeph / Liz Loss-Cutler-Hull <warp-spam_...@aehallh.com>
+Date: Sun, 9 Jul 2023 04:57:19 -0700
+Subject: [PATCH] Check CPUs online, not configured.
+
+When the number of CPUs detected is greater than the number of CPUs in
+the system, rasdaemon will crash when it receives some events.
+
+Looking deeper, we also fail to use the poll method for similar reasons
+in this case.
+
+All of this can be prevented by checking to see how many CPUs are
+currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many
+CPUs the current kernel was configured to support
+(sysconf(_SC_NPROCESSORS_CONF)).
+
+For the kernel side of the discussion, see 
https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rqh...@mail.gmail.com/T/
+Signed-off-by: Mauro Carvalho Chehab <mche...@kernel.org>
+---
+ ras-events.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/ras-events.c b/ras-events.c
+index a82dab2..5935163 100644
+--- a/ras-events.c
++++ b/ras-events.c
+@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, 
struct kbuffer *kbuf,
+ 
+ static int get_num_cpus(struct ras_events *ras)
+ {
+-      return sysconf(_SC_NPROCESSORS_CONF);
++      return sysconf(_SC_NPROCESSORS_ONLN);
+ #if 0
+       char fname[MAX_PATH + 1];
+       int num_cpus = 0;
+

diff --git 
a/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch 
b/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch
new file mode 100644
index 000000000000..4eb3977930c6
--- /dev/null
+++ b/app-admin/rasdaemon/files/rasdaemon-0.8.0-table-create-offline-cpus.patch
@@ -0,0 +1,179 @@
+https://bugs.gentoo.org/890286
+https://github.com/mchehab/rasdaemon/issues/77
+https://github.com/mchehab/rasdaemon/commit/6f7851f72d8464c7a20a248d4abf4362de8f0ba9
+
+From 6f7851f72d8464c7a20a248d4abf4362de8f0ba9 Mon Sep 17 00:00:00 2001
+From: Shiju Jose <shiju.j...@huawei.com>
+Date: Sun, 5 Mar 2023 23:14:42 +0000
+Subject: [PATCH] rasdaemon: fix table create if some cpus are offline
+
+Fix for regression in ras_mc_create_table() if some cpus are offline
+at the system start
+
+Issue:
+
+Regression in the ras_mc_create_table() if some of the cpus are offline
+at the system start when run the rasdaemon.
+
+This issue is reproducible in ras_mc_create_table() with decode and
+record non-standard events and reproducible sometimes with
+ras_mc_create_table() for the standard events.
+
+Also in the multi thread way, there is memory leak in ras_mc_event_opendb()
+as struct sqlite3_priv *priv and sqlite3 *db allocated/initialized per
+thread, but stored in the common struct ras_events ras in pthread data,
+which is shared across the threads.
+
+Reason:
+
+when the system starts with some of the cpus offline and then run
+the rasdaemon, read_ras_event_all_cpus() exit with error and switch to
+the multi thread way. However read() in read_ras_event() return error in
+threads for each of the offline CPUs and does clean up including calling
+ras_mc_event_closedb().
+
+Since the 'struct ras_events ras' passed in the pthread_data to each of the
+threads is common, struct sqlite3_priv *priv and sqlite3 *db allocated/
+initialized per thread and stored in the common 'struct ras_events ras',
+are getting overwritten in each ras_mc_event_opendb()(which called from
+pthread per cpu), result memory leak.
+
+Also when ras_mc_event_closedb() is called in the above error case from
+the threads corresponding to the offline cpus, close the sqlite3 *db and
+free sqlite3_priv *priv stored in the common 'struct ras_events ras',
+result regression when accessing priv->db in the ras_mc_create_table()
+from another context later.
+
+Solution:
+
+In ras_mc_event_opendb(), allocate struct sqlite3_priv *priv,
+init sqlite3 *db and create tables common for the threads with shared
+'struct ras_events ras' based on a reference count and free them in the
+same way.
+
+Also protect critical code ras_mc_event_opendb() and ras_mc_event_closedb()
+using mutex in the multi thread case from any regression caused by the
+thread pre-emption.
+
+Reported-by: Lei Feng <fengle...@h-partners.com>
+Signed-off-by: Shiju Jose <shiju.j...@huawei.com>
+Signed-off-by: Mauro Carvalho Chehab <mche...@kernel.org>
+---
+ ras-events.c | 16 +++++++++++++++-
+ ras-events.h |  4 +++-
+ ras-record.c | 12 ++++++++++++
+ 3 files changed, 30 insertions(+), 2 deletions(-)
+
+diff --git a/ras-events.c b/ras-events.c
+index 49e4f9a..5fe8e19 100644
+--- a/ras-events.c
++++ b/ras-events.c
+@@ -625,19 +625,25 @@ static void *handle_ras_events_cpu(void *priv)
+ 
+       log(TERM, LOG_INFO, "Listening to events on cpu %d\n", pdata->cpu);
+       if (pdata->ras->record_events) {
++              pthread_mutex_lock(&pdata->ras->db_lock);
+               if (ras_mc_event_opendb(pdata->cpu, pdata->ras)) {
++                      pthread_mutex_unlock(&pdata->ras->db_lock);
+                       log(TERM, LOG_ERR, "Can't open database\n");
+                       close(fd);
+                       kbuffer_free(kbuf);
+                       free(page);
+                       return 0;
+               }
++              pthread_mutex_unlock(&pdata->ras->db_lock);
+       }
+ 
+       read_ras_event(fd, pdata, kbuf, page);
+ 
+-      if (pdata->ras->record_events)
++      if (pdata->ras->record_events) {
++              pthread_mutex_lock(&pdata->ras->db_lock);
+               ras_mc_event_closedb(pdata->cpu, pdata->ras);
++              pthread_mutex_unlock(&pdata->ras->db_lock);
++      }
+ 
+       close(fd);
+       kbuffer_free(kbuf);
+@@ -993,6 +999,11 @@ int handle_ras_events(int record_events)
+ 
+       /* Poll doesn't work on this kernel. Fallback to pthread way */
+       if (rc == -255) {
++              if (pthread_mutex_init(&ras->db_lock, NULL) != 0) {
++                      log(SYSLOG, LOG_INFO, "sqlite db lock init has 
failed\n");
++                      goto err;
++              }
++
+               log(SYSLOG, LOG_INFO,
+               "Opening one thread per cpu (%d threads)\n", cpus);
+               for (i = 0; i < cpus; i++) {
+@@ -1005,6 +1016,8 @@ int handle_ras_events(int record_events)
+                               i);
+                               while (--i)
+                                       pthread_cancel(data[i].thread);
++
++                              pthread_mutex_destroy(&ras->db_lock);
+                               goto err;
+                       }
+               }
+@@ -1012,6 +1025,7 @@ int handle_ras_events(int record_events)
+               /* Wait for all threads to complete */
+               for (i = 0; i < cpus; i++)
+                       pthread_join(data[i].thread, NULL);
++              pthread_mutex_destroy(&ras->db_lock);
+       }
+ 
+       log(SYSLOG, LOG_INFO, "Huh! something got wrong. Aborting.\n");
+diff --git a/ras-events.h b/ras-events.h
+index 6c9f507..649b0c0 100644
+--- a/ras-events.h
++++ b/ras-events.h
+@@ -56,7 +56,9 @@ struct ras_events {
+       time_t          uptime_diff;
+ 
+       /* For ras-record */
+-      void            *db_priv;
++      void    *db_priv;
++      int     db_ref_count;
++      pthread_mutex_t db_lock;
+ 
+       /* For the mce handler */
+       struct mce_priv *mce_priv;
+diff --git a/ras-record.c b/ras-record.c
+index a367939..adc97a4 100644
+--- a/ras-record.c
++++ b/ras-record.c
+@@ -763,6 +763,10 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events 
*ras)
+ 
+       printf("Calling %s()\n", __FUNCTION__);
+ 
++      ras->db_ref_count++;
++      if (ras->db_ref_count > 1)
++              return 0;
++
+       ras->db_priv = NULL;
+ 
+       priv = calloc(1, sizeof(*priv));
+@@ -912,6 +916,13 @@ int ras_mc_event_closedb(unsigned int cpu, struct 
ras_events *ras)
+ 
+       printf("Calling %s()\n", __func__);
+ 
++      if (ras->db_ref_count > 0)
++              ras->db_ref_count--;
++      else
++              return -1;
++      if (ras->db_ref_count > 0)
++              return 0;
++
+       if (!priv)
+               return -1;
+ 
+@@ -1018,6 +1029,7 @@ int ras_mc_event_closedb(unsigned int cpu, struct 
ras_events *ras)
+               log(TERM, LOG_ERR,
+                   "cpu %u: Failed to shutdown sqlite: error = %d\n", cpu, rc);
+       free(priv);
++      ras->db_priv = NULL;
+ 
+       return 0;
+ }
+

diff --git a/app-admin/rasdaemon/rasdaemon-0.8.0-r2.ebuild 
b/app-admin/rasdaemon/rasdaemon-0.8.0-r2.ebuild
new file mode 100644
index 000000000000..790f5a194f26
--- /dev/null
+++ b/app-admin/rasdaemon/rasdaemon-0.8.0-r2.ebuild
@@ -0,0 +1,87 @@
+# Copyright 1999-2023 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+inherit autotools flag-o-matic linux-info systemd
+
+DESCRIPTION="Reliability, Availability and Serviceability logging tool"
+HOMEPAGE="https://github.com/mchehab/rasdaemon";
+# This if can be dropped > 0.8.0, see 
https://github.com/mchehab/rasdaemon/issues/88
+if [[ ${PV} == 0.8.0 ]] ; then
+       
SRC_URI="https://github.com/mchehab/rasdaemon/releases/download/refs%2Fheads%2Fmaster/${P}.tar.bz2";
+else
+       
SRC_URI="https://github.com/mchehab/rasdaemon/releases/download/v${PV}/${P}.tar.bz2";
+fi
+
+LICENSE="GPL-2"
+SLOT="0"
+KEYWORDS="~amd64 ~arm ~arm64 ~ppc ~ppc64 ~x86"
+IUSE="selinux"
+
+DEPEND="
+       dev-db/sqlite
+       dev-libs/libtraceevent
+       elibc_musl? ( sys-libs/argp-standalone )
+"
+RDEPEND="
+       ${DEPEND}
+       dev-perl/DBI
+       dev-perl/DBD-SQLite
+       sys-apps/dmidecode
+       selinux? ( sec-policy/selinux-rasdaemon )
+"
+BDEPEND="sys-devel/gettext"
+
+PATCHES=(
+       "${FILESDIR}"/${PN}-0.8.0-bashisms-configure.patch
+       "${FILESDIR}"/${PN}-0.8.0-table-create-offline-cpus.patch
+       "${FILESDIR}"/${PN}-0.8.0-check-online-cpus-not-configured.patch
+)
+
+pkg_setup() {
+       linux-info_pkg_setup
+       local CONFIG_CHECK="~ACPI_EXTLOG ~DEBUG_FS"
+       check_extra_config
+}
+
+src_prepare() {
+       default
+
+       # Only here for 0.8.0's bashism patch
+       eautoreconf
+}
+
+src_configure() {
+       local myconfargs=(
+               --enable-sqlite3
+               --enable-abrt-report
+               --enable-aer
+               --enable-arm
+               --enable-extlog
+               --enable-hisi-ns-decode
+               --enable-mce
+               --enable-non-standard
+               --enable-devlink
+               --enable-diskerror
+               --enable-memory-ce-pfa
+               --includedir="/usr/include/${PN}"
+               --localstatedir=/var
+       )
+
+       use elibc_musl && append-libs -largp
+
+       econf "${myconfargs[@]}"
+}
+
+src_install() {
+       default
+
+       keepdir "/var/lib/${PN}"
+
+       systemd_dounit misc/*.service
+
+       newinitd "${FILESDIR}/rasdaemon.openrc-r2" rasdaemon
+       newinitd "${FILESDIR}/ras-mc-ctl.openrc-r1" ras-mc-ctl
+       newconfd "${FILESDIR}"/rasdaemon.confd rasdaemon
+}

Reply via email to