diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 123c4c7c2ab5..75c77cec0b21 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -183,4 +183,14 @@ config HOTPLUG_PCI_S390
When in doubt, say Y.
+config HOTPLUG_PCI_PCIE_CRAY_E1000
+ bool "PCIe Hotplug extensions for Cray ClusterStor E1000"
+ depends on HOTPLUG_PCI_PCIE && IPMI_HANDLER=y
+ help
+ Say Y here if you have a Cray ClusterStor E1000 and want to control
+ your NVMe slot LEDs. Without this driver is it not possible
typo is it -> it is
+ to control the fault and locate LEDs on the E1000's 24 NVMe slots.
+
+ When in doubt, say N.
+
endif # HOTPLUG_PCI
diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 40aaf31fe338..82a1f0592d0a 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -66,6 +66,9 @@ pciehp-objs := pciehp_core.o \
pciehp_ctrl.o \
pciehp_pci.o \
pciehp_hpc.o
+ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
+pciehp-objs += pciehp_craye1k.o
+endif
shpchp-objs := shpchp_core.o \
shpchp_ctrl.o \
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 273dd8c66f4e..ea68ae041547 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -198,6 +198,13 @@ int pciehp_get_raw_indicator_status(struct hotplug_slot
*h_slot, u8 *status);
int pciehp_slot_reset(struct pcie_device *dev);
+#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
+int craye1k_get_attention_status(struct hotplug_slot *hotplug_slot, u8
*status);
+int craye1k_set_attention_status(struct hotplug_slot *hotplug_slot, u8 status);
+bool is_craye1k_slot(struct controller *ctrl);
+int craye1k_init(void);
+#endif
+
static inline const char *slot_name(struct controller *ctrl)
{
return hotplug_slot_name(&ctrl->hotplug_slot);
diff --git a/drivers/pci/hotplug/pciehp_core.c
b/drivers/pci/hotplug/pciehp_core.c
index ff458e692fed..9a7a7b320810 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -73,6 +73,13 @@ static int init_slot(struct controller *ctrl)
ops->get_attention_status = pciehp_get_raw_indicator_status;
ops->set_attention_status = pciehp_set_raw_indicator_status;
}
+#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
+ if (is_craye1k_slot(ctrl)) {
+ /* slots 1-24 on Cray E1000s are controlled differently */
+ ops->get_attention_status = craye1k_get_attention_status;
+ ops->set_attention_status = craye1k_set_attention_status;
+ }
+#endif
/* register this slot with the hotplug pci core */
ctrl->hotplug_slot.ops = ops;
@@ -404,6 +411,11 @@ int __init pcie_hp_init(void)
pr_debug("pcie_port_service_register = %d\n", retval);
if (retval)
pr_debug("Failure to register service\n");
+#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
+ retval = craye1k_init();
+ if (retval)
+ pr_debug("Failure to register Cray E1000 extensions");
+#endif
return retval;
}
diff --git a/drivers/pci/hotplug/pciehp_craye1k.c
b/drivers/pci/hotplug/pciehp_craye1k.c
new file mode 100644
index 000000000000..844b36882316
--- /dev/null
+++ b/drivers/pci/hotplug/pciehp_craye1k.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022-2024 Lawrence Livermore National Security, LLC
+ */
+/*
+ * Cray ClusterStor E1000 hotplug slot LED driver extensions
+ *
+ * This driver controls the NVMe slot LEDs on the Cray ClusterStore E1000.
+ * It provides hotplug attention status callbacks for the 24 NVMe slots on
+ * the E1000. This allows users to access the E1000's locate and fault
+ * LEDs via the normal /sys/bus/pci/slots/<slot>/attention sysfs entries.
+ * This driver uses IPMI to communicate with the E1000 controller to toggle
+ * the LEDs.
+ *
+ * This driver is based off of ibmpex.c
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/dmi.h>
+#include <linux/ipmi.h>
+#include <linux/ipmi_smi.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/random.h>
+#include "pciehp.h"
+
+/* Cray E1000 commands */
+#define CRAYE1K_CMD_NETFN 0x3c
+#define CRAYE1K_CMD_PRIMARY 0x33
+#define CRAYE1K_CMD_FAULT_LED 0x39
+#define CRAYE1K_CMD_LOCATE_LED 0x22
+
+/* Subcommands */
+#define CRAYE1K_GET_LED 0x0
+#define CRAYE1K_SET_LED 0x1
+#define CRAYE1K_SET_PRIMARY 0x1
both defined as 0x1
This is likely intentional because they belong to different context, it
is ok.
what about prefix CRAYE1K_SUBCMD_ for subcommands?
+
+/*
+ * Milliseconds to wait after get/set LED command. 200ms value found though
+ * experimentation
+ */
+#define CRAYE1K_POST_CMD_WAIT_MS 200
+
+struct craye1k {
+ struct device *dev; /* BMC device */
+ struct mutex lock;
+ struct completion read_complete;
+ struct ipmi_addr address;
+ struct ipmi_user *user;
+ int iface;
+
+ long tx_msg_id;
+ struct kernel_ipmi_msg tx_msg;
+ unsigned char tx_msg_data[IPMI_MAX_MSG_LENGTH];
+ unsigned char rx_msg_data[IPMI_MAX_MSG_LENGTH];
+ unsigned long rx_msg_len;
+ unsigned char rx_result; /* IPMI completion code */
+
+ /* Parent dir for all our debugfs entries */
+ struct dentry *parent;
+
+ /* debugfs stats */
+ u64 check_primary;
+ u64 check_primary_failed;
+ u64 was_already_primary;
+ u64 was_not_already_primary;
+ u64 set_primary;
+ u64 set_initial_primary_failed;
+ u64 set_primary_failed;
+ u64 set_led_locate_failed;
+ u64 set_led_fault_failed;
+ u64 set_led_readback_failed;
+ u64 set_led_failed;
+ u64 get_led_failed;
+ u64 completion_timeout;
+ u64 wrong_msgid;
+ u64 request_failed;
+
+ /* debugfs configuration options */
+
+ /* Print info on spurious IPMI messages */
+ bool print_errors;
+
+ /* Retries for kernel IPMI layer */
+ u32 ipmi_retries;
+
+ /* Timeout in ms for IPMI (0 = use IPMI default_retry_ms) */
+ u32 ipmi_timeout_ms;
+
+ /* Timeout in ms to wait for E1000 message completion */
+ u32 completion_timeout_ms;
+};
+
+/*
+ * Make our craye1k a global so get/set_attention_status() can access it.
+ * This is safe since there's only one node controller on the board, and so
it's
+ * impossible to instantiate more than one craye1k.
+ */
+static struct craye1k *craye1k_global;
+
+/* Return parent dir dentry */
+static struct dentry *
+craye1k_debugfs_init(struct craye1k *craye1k)
+{
+ umode_t mode = 0644;
+ struct dentry *parent = debugfs_create_dir("pciehp_craye1k", NULL);
+
if (!parent) is correct! but debugfs_create_dir() can return ERR_PTR
what about check for IS_ERR(parent) to make fully robust?
+ if (!parent)
+ return NULL;
+
+ debugfs_create_x64("check_primary", mode, parent,
+ &craye1k->check_primary);
+ debugfs_create_x64("check_primary_failed", mode, parent,
+ &craye1k->check_primary_failed);
+ debugfs_create_x64("was_already_primary", mode, parent,
+ &craye1k->was_already_primary);
+ debugfs_create_x64("was_not_already_primary", mode, parent,
+ &craye1k->was_not_already_primary);
+ debugfs_create_x64("set_primary", mode, parent,
+ &craye1k->set_primary);
+ debugfs_create_x64("set_initial_primary_failed", mode, parent,
+ &craye1k->set_initial_primary_failed);
+ debugfs_create_x64("set_primary_failed", mode, parent,
+ &craye1k->set_primary_failed);
+ debugfs_create_x64("set_led_locate_failed", mode, parent,
+ &craye1k->set_led_locate_failed);
+ debugfs_create_x64("set_led_fault_failed", mode, parent,
+ &craye1k->set_led_fault_failed);
+ debugfs_create_x64("set_led_readback_failed", mode, parent,
+ &craye1k->set_led_readback_failed);
+ debugfs_create_x64("set_led_failed", mode, parent,
+ &craye1k->set_led_failed);
+ debugfs_create_x64("get_led_failed", mode, parent,
+ &craye1k->get_led_failed);
+ debugfs_create_x64("completion_timeout", mode, parent,
+ &craye1k->completion_timeout);
+ debugfs_create_x64("wrong_msgid", mode, parent,
+ &craye1k->wrong_msgid);
+ debugfs_create_x64("request_failed", mode, parent,
+ &craye1k->request_failed);
+
+ debugfs_create_x32("ipmi_retries", mode, parent,
+ &craye1k->ipmi_retries);
+ debugfs_create_x32("ipmi_timeout_ms", mode, parent,
+ &craye1k->ipmi_timeout_ms);
+ debugfs_create_x32("completion_timeout_ms", mode, parent,
+ &craye1k->completion_timeout_ms);
+ debugfs_create_bool("print_errors", mode, parent,
+ &craye1k->print_errors);
+
+ return parent;
+}
[clip]
+static int __craye1k_get_attention_status(struct hotplug_slot *hotplug_slot,
+ u8 *status, bool set_primary)
+{
+ unsigned char slot;
+ int locate, fault;
+ struct craye1k *craye1k;
+
+ craye1k = craye1k_global;
+ slot = PSN(to_ctrl(hotplug_slot));
+
+ if (set_primary) {
+ if (craye1k_set_primary(craye1k) != 0) {
+ craye1k->get_led_failed++;
+ return -EIO;
+ }
+ }
+
-EIO when craye1k_set_primary() fails,but -EINVAL when LED reads fail
is it not both hardware I/O failures case ?
+ locate = craye1k_get_slot_led(craye1k, slot, true);
+ if (locate == -1) {
+ craye1k->get_led_failed++;
+ return -EINVAL;
+ }
+ msleep(CRAYE1K_POST_CMD_WAIT_MS);
+
+ fault = craye1k_get_slot_led(craye1k, slot, false);
+ if (fault == -1) {
+ craye1k->get_led_failed++;
+ return -EINVAL;
+ }
+ msleep(CRAYE1K_POST_CMD_WAIT_MS);
+
+ *status = locate << 1 | fault;
+
+ return 0;
+}
Thanks,
Alok
_______________________________________________
Openipmi-developer mailing list
Openipmi-developer@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/openipmi-developer