[PATCH] drm/kms/mode: added a new helper for calculating videomode from crtc's display mode
1. -Added a new helper drm_display_mode_crtc_to_videomode -This helper calculates mode parameters like horizontal front_porch, back_porch, sync length vertical front_porch, back_porch, sync length using crtc_* fields of struct drm_display_mode -It uses following fields of crtc mode horizontal sync start/end, active and total length vertical sync start/end, active and total length 2. -Most of the driver use user-supplied mode for calculating videomode -However, few drivers use HW (crtc) mode for calculating videomode -This helper will be useful for such drivers 3. -Currently following drivers will be using this new helper -arm hdlcd -atmel hlcdc -exynos 5433 decon -exynos7 decon -exynos fimd 4. -This patch removes related duplicate code from above mentioned drivers Signed-off-by: Satendra Singh ThakurCc: Madhur Verma Cc: Hemanshu Srivastava --- drivers/gpu/drm/arm/hdlcd_crtc.c | 8 +--- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 7 +-- drivers/gpu/drm/drm_modes.c| 20 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 22 ++ drivers/gpu/drm/exynos/exynos7_drm_decon.c | 23 ++- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 22 +- include/drm/drm_modes.h| 2 ++ 7 files changed, 53 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index cf5cbd6..d20e471 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -130,13 +130,7 @@ static void hdlcd_crtc_mode_set_nofb(struct drm_crtc *crtc) struct videomode vm; unsigned int polarities, err; - vm.vfront_porch = m->crtc_vsync_start - m->crtc_vdisplay; - vm.vback_porch = m->crtc_vtotal - m->crtc_vsync_end; - vm.vsync_len = m->crtc_vsync_end - m->crtc_vsync_start; - vm.hfront_porch = m->crtc_hsync_start - m->crtc_hdisplay; - vm.hback_porch = m->crtc_htotal - m->crtc_hsync_end; - vm.hsync_len = m->crtc_hsync_end - m->crtc_hsync_start; - + drm_display_mode_crtc_to_videomode(m, ); polarities = HDLCD_POLARITY_DATAEN | HDLCD_POLARITY_DATA; if (m->flags & DRM_MODE_FLAG_PHSYNC) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index d732810..bafcef6 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -81,12 +81,7 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) unsigned int cfg; int div; - vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; - vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; - vm.vsync_len = adj->crtc_vsync_end - adj->crtc_vsync_start; - vm.hfront_porch = adj->crtc_hsync_start - adj->crtc_hdisplay; - vm.hback_porch = adj->crtc_htotal - adj->crtc_hsync_end; - vm.hsync_len = adj->crtc_hsync_end - adj->crtc_hsync_start; + drm_display_mode_crtc_to_videomode(adj, ); regmap_write(regmap, ATMEL_HLCDC_CFG(1), (vm.hsync_len - 1) | ((vm.vsync_len - 1) << 16)); diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index e82b61e..a406749 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -654,6 +654,26 @@ void drm_display_mode_to_videomode(const struct drm_display_mode *dmode, vm->flags |= DISPLAY_FLAGS_DOUBLECLK; } EXPORT_SYMBOL_GPL(drm_display_mode_to_videomode); +/** + * drm_display_mode_crtc_to_videomode - fill in @vm using crtc fields of@dmode, + * @dmode: drm_display_mode structure to use as source + * @vm: videomode structure to use as destination + * + * Fills out @vm using the crtc display mode specified in @dmode. + */ +void drm_display_mode_crtc_to_videomode(const struct drm_display_mode *dmode, + struct videomode *vm) +{ + vm->hfront_porch = dmode->crtc_hsync_start - dmode->crtc_hdisplay; + vm->hsync_len = dmode->crtc_hsync_end - dmode->crtc_hsync_start; + vm->hback_porch = dmode->crtc_htotal - dmode->crtc_hsync_end; + + vm->vfront_porch = dmode->crtc_vsync_start - dmode->crtc_vdisplay; + vm->vsync_len = dmode->crtc_vsync_end - dmode->crtc_vsync_start; + vm->vback_porch = dmode->crtc_vtotal - dmode->crtc_vsync_end; + +} +EXPORT_SYMBOL_GPL(drm_display_mode_crtc_to_videomode); /** * drm_bus_flags_from_videomode - extract information about pixelclk and diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 1c330f2..1ba73a8 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -20,6 +20,7 @@ #include #include #include +#include #include
[PATCH] drm/kms/mode: added a new helper for calculating videomode from crtc's display mode
1. -Added a new helper drm_display_mode_crtc_to_videomode -This helper calculates mode parameters like horizontal front_porch, back_porch, sync length vertical front_porch, back_porch, sync length using crtc_* fields of struct drm_display_mode -It uses following fields of crtc mode horizontal sync start/end, active and total length vertical sync start/end, active and total length 2. -Most of the driver use user-supplied mode for calculating videomode -However, few drivers use HW (crtc) mode for calculating videomode -This helper will be useful for such drivers 3. -Currently following drivers will be using this new helper -arm hdlcd -atmel hlcdc -exynos 5433 decon -exynos7 decon -exynos fimd 4. -This patch removes related duplicate code from above mentioned drivers Signed-off-by: Satendra Singh Thakur Cc: Madhur Verma Cc: Hemanshu Srivastava --- drivers/gpu/drm/arm/hdlcd_crtc.c | 8 +--- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 7 +-- drivers/gpu/drm/drm_modes.c| 20 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 22 ++ drivers/gpu/drm/exynos/exynos7_drm_decon.c | 23 ++- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 22 +- include/drm/drm_modes.h| 2 ++ 7 files changed, 53 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c index cf5cbd6..d20e471 100644 --- a/drivers/gpu/drm/arm/hdlcd_crtc.c +++ b/drivers/gpu/drm/arm/hdlcd_crtc.c @@ -130,13 +130,7 @@ static void hdlcd_crtc_mode_set_nofb(struct drm_crtc *crtc) struct videomode vm; unsigned int polarities, err; - vm.vfront_porch = m->crtc_vsync_start - m->crtc_vdisplay; - vm.vback_porch = m->crtc_vtotal - m->crtc_vsync_end; - vm.vsync_len = m->crtc_vsync_end - m->crtc_vsync_start; - vm.hfront_porch = m->crtc_hsync_start - m->crtc_hdisplay; - vm.hback_porch = m->crtc_htotal - m->crtc_hsync_end; - vm.hsync_len = m->crtc_hsync_end - m->crtc_hsync_start; - + drm_display_mode_crtc_to_videomode(m, ); polarities = HDLCD_POLARITY_DATAEN | HDLCD_POLARITY_DATA; if (m->flags & DRM_MODE_FLAG_PHSYNC) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index d732810..bafcef6 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -81,12 +81,7 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) unsigned int cfg; int div; - vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; - vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; - vm.vsync_len = adj->crtc_vsync_end - adj->crtc_vsync_start; - vm.hfront_porch = adj->crtc_hsync_start - adj->crtc_hdisplay; - vm.hback_porch = adj->crtc_htotal - adj->crtc_hsync_end; - vm.hsync_len = adj->crtc_hsync_end - adj->crtc_hsync_start; + drm_display_mode_crtc_to_videomode(adj, ); regmap_write(regmap, ATMEL_HLCDC_CFG(1), (vm.hsync_len - 1) | ((vm.vsync_len - 1) << 16)); diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index e82b61e..a406749 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -654,6 +654,26 @@ void drm_display_mode_to_videomode(const struct drm_display_mode *dmode, vm->flags |= DISPLAY_FLAGS_DOUBLECLK; } EXPORT_SYMBOL_GPL(drm_display_mode_to_videomode); +/** + * drm_display_mode_crtc_to_videomode - fill in @vm using crtc fields of@dmode, + * @dmode: drm_display_mode structure to use as source + * @vm: videomode structure to use as destination + * + * Fills out @vm using the crtc display mode specified in @dmode. + */ +void drm_display_mode_crtc_to_videomode(const struct drm_display_mode *dmode, + struct videomode *vm) +{ + vm->hfront_porch = dmode->crtc_hsync_start - dmode->crtc_hdisplay; + vm->hsync_len = dmode->crtc_hsync_end - dmode->crtc_hsync_start; + vm->hback_porch = dmode->crtc_htotal - dmode->crtc_hsync_end; + + vm->vfront_porch = dmode->crtc_vsync_start - dmode->crtc_vdisplay; + vm->vsync_len = dmode->crtc_vsync_end - dmode->crtc_vsync_start; + vm->vback_porch = dmode->crtc_vtotal - dmode->crtc_vsync_end; + +} +EXPORT_SYMBOL_GPL(drm_display_mode_crtc_to_videomode); /** * drm_bus_flags_from_videomode - extract information about pixelclk and diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 1c330f2..1ba73a8 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "exynos_drm_drv.h" #include "exynos_drm_crtc.h" @@ -225,26 +226,23 @@ static
Re: [PATCH v2 4/9] x86, memcpy_mcsafe: add write-protection-fault handling
On 05/03/2018 07:59 AM, Dan Williams wrote: > In preparation for using memcpy_mcsafe() to handle user copies it needs > to be to handle write-protection faults while writing user pages. Add > MMU-fault handlers alongside the machine-check exception handlers. > > Note that the machine check fault exception handling makes assumptions > about source buffer alignment and poison alignment. In the write fault > case, given the destination buffer is arbitrarily aligned, it needs a > separate / additional fault handling approach. The mcsafe_handle_tail() > helper is reused. The @limit argument is set to @len since there is no > safety concern about retriggering an MMU fault, and this simplifies the > assembly. > > diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c > index 75d3776123cc..9787f5ee0cf9 100644 > --- a/arch/x86/lib/usercopy_64.c > +++ b/arch/x86/lib/usercopy_64.c > @@ -75,6 +75,23 @@ copy_user_handle_tail(char *to, char *from, unsigned len) > return len; > } > > +/* > + * Similar to copy_user_handle_tail, probe for the write fault point, > + * but reuse __memcpy_mcsafe in case a new read error is encountered. > + * clac() is handled in _copy_to_iter_mcsafe(). > + */ > +__visible unsigned long > +mcsafe_handle_tail(char *to, char *from, unsigned len) > +{ > + for (; len; --len, to++) { > + unsigned long rem = memcpy_mcsafe(to, from, 1); > + Hmm why not for (; len; --len, from++, to++) > + if (rem) > + break; > + } > + return len; > +} --Mika
Re: [PATCH v2 4/9] x86, memcpy_mcsafe: add write-protection-fault handling
On 05/03/2018 07:59 AM, Dan Williams wrote: > In preparation for using memcpy_mcsafe() to handle user copies it needs > to be to handle write-protection faults while writing user pages. Add > MMU-fault handlers alongside the machine-check exception handlers. > > Note that the machine check fault exception handling makes assumptions > about source buffer alignment and poison alignment. In the write fault > case, given the destination buffer is arbitrarily aligned, it needs a > separate / additional fault handling approach. The mcsafe_handle_tail() > helper is reused. The @limit argument is set to @len since there is no > safety concern about retriggering an MMU fault, and this simplifies the > assembly. > > diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c > index 75d3776123cc..9787f5ee0cf9 100644 > --- a/arch/x86/lib/usercopy_64.c > +++ b/arch/x86/lib/usercopy_64.c > @@ -75,6 +75,23 @@ copy_user_handle_tail(char *to, char *from, unsigned len) > return len; > } > > +/* > + * Similar to copy_user_handle_tail, probe for the write fault point, > + * but reuse __memcpy_mcsafe in case a new read error is encountered. > + * clac() is handled in _copy_to_iter_mcsafe(). > + */ > +__visible unsigned long > +mcsafe_handle_tail(char *to, char *from, unsigned len) > +{ > + for (; len; --len, to++) { > + unsigned long rem = memcpy_mcsafe(to, from, 1); > + Hmm why not for (; len; --len, from++, to++) > + if (rem) > + break; > + } > + return len; > +} --Mika
[PATCH v2] NFC: fdp: Remove __func__ from dev_dbg()
Remove redundant __func__ parameter from dev_dgb() calls. v2: Deleted empty dev_dbg() trace calls, which are redundant if function tracer is enabled. Signed-off-by: Amit Pundir--- drivers/nfc/fdp/fdp.c | 18 +++--- drivers/nfc/fdp/i2c.c | 17 - 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index d5784a4..f64a6fd 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -249,8 +249,6 @@ static int fdp_nci_open(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - r = info->phy_ops->enable(info->phy); return r; @@ -261,7 +259,6 @@ static int fdp_nci_close(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); return 0; } @@ -270,8 +267,6 @@ static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - if (atomic_dec_and_test(>data_pkt_counter)) info->data_pkt_counter_cb(ndev); @@ -283,7 +278,6 @@ int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); return nci_recv_frame(ndev, skb); } EXPORT_SYMBOL(fdp_nci_recv_frame); @@ -498,8 +492,6 @@ static int fdp_nci_setup(struct nci_dev *ndev) int r; u8 patched = 0; - dev_dbg(dev, "%s\n", __func__); - r = nci_core_init(ndev); if (r) goto error; @@ -609,7 +601,6 @@ static int fdp_nci_core_reset_ntf_packet(struct nci_dev *ndev, struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_reset_ntf = 1; wake_up(>setup_wq); @@ -622,7 +613,6 @@ static int fdp_nci_prop_patch_ntf_packet(struct nci_dev *ndev, struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_patch_ntf = 1; info->setup_patch_status = skb->data[0]; wake_up(>setup_wq); @@ -637,7 +627,7 @@ static int fdp_nci_prop_patch_rsp_packet(struct nci_dev *ndev, struct device *dev = >phy->i2c_dev->dev; u8 status = skb->data[0]; - dev_dbg(dev, "%s: status 0x%x\n", __func__, status); + dev_dbg(dev, "status 0x%x\n", status); nci_req_complete(ndev, status); return 0; @@ -650,7 +640,7 @@ static int fdp_nci_prop_set_production_data_rsp_packet(struct nci_dev *ndev, struct device *dev = >phy->i2c_dev->dev; u8 status = skb->data[0]; - dev_dbg(dev, "%s: status 0x%x\n", __func__, status); + dev_dbg(dev, "status 0x%x\n", status); nci_req_complete(ndev, status); return 0; @@ -695,7 +685,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, dev_dbg(dev, "OTP version %d\n", info->otp_version); dev_dbg(dev, "RAM version %d\n", info->ram_version); dev_dbg(dev, "key index %d\n", info->key_index); - dev_dbg(dev, "%s: status 0x%x\n", __func__, rsp->status); + dev_dbg(dev, "status 0x%x\n", rsp->status); nci_req_complete(ndev, rsp->status); @@ -798,8 +788,6 @@ void fdp_nci_remove(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - nci_unregister_device(ndev); nci_free_device(ndev); } diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c4da50e..f355ab2 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -57,7 +57,6 @@ static int fdp_nci_i2c_enable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(>i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); return 0; @@ -67,7 +66,6 @@ static void fdp_nci_i2c_disable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(>i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); } @@ -113,8 +111,8 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb) } if (r < 0 || r != skb->len) - dev_dbg(>dev, "%s: error err=%d len=%d\n", - __func__, r, skb->len); + dev_dbg(>dev, "error err=%d len=%d\n", + r, skb->len); if (r >= 0) { if (r != skb->len) { @@ -152,8 +150,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff
[PATCH v2] NFC: fdp: Remove __func__ from dev_dbg()
Remove redundant __func__ parameter from dev_dgb() calls. v2: Deleted empty dev_dbg() trace calls, which are redundant if function tracer is enabled. Signed-off-by: Amit Pundir --- drivers/nfc/fdp/fdp.c | 18 +++--- drivers/nfc/fdp/i2c.c | 17 - 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index d5784a4..f64a6fd 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -249,8 +249,6 @@ static int fdp_nci_open(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - r = info->phy_ops->enable(info->phy); return r; @@ -261,7 +259,6 @@ static int fdp_nci_close(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); return 0; } @@ -270,8 +267,6 @@ static int fdp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - if (atomic_dec_and_test(>data_pkt_counter)) info->data_pkt_counter_cb(ndev); @@ -283,7 +278,6 @@ int fdp_nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); return nci_recv_frame(ndev, skb); } EXPORT_SYMBOL(fdp_nci_recv_frame); @@ -498,8 +492,6 @@ static int fdp_nci_setup(struct nci_dev *ndev) int r; u8 patched = 0; - dev_dbg(dev, "%s\n", __func__); - r = nci_core_init(ndev); if (r) goto error; @@ -609,7 +601,6 @@ static int fdp_nci_core_reset_ntf_packet(struct nci_dev *ndev, struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_reset_ntf = 1; wake_up(>setup_wq); @@ -622,7 +613,6 @@ static int fdp_nci_prop_patch_ntf_packet(struct nci_dev *ndev, struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); info->setup_patch_ntf = 1; info->setup_patch_status = skb->data[0]; wake_up(>setup_wq); @@ -637,7 +627,7 @@ static int fdp_nci_prop_patch_rsp_packet(struct nci_dev *ndev, struct device *dev = >phy->i2c_dev->dev; u8 status = skb->data[0]; - dev_dbg(dev, "%s: status 0x%x\n", __func__, status); + dev_dbg(dev, "status 0x%x\n", status); nci_req_complete(ndev, status); return 0; @@ -650,7 +640,7 @@ static int fdp_nci_prop_set_production_data_rsp_packet(struct nci_dev *ndev, struct device *dev = >phy->i2c_dev->dev; u8 status = skb->data[0]; - dev_dbg(dev, "%s: status 0x%x\n", __func__, status); + dev_dbg(dev, "status 0x%x\n", status); nci_req_complete(ndev, status); return 0; @@ -695,7 +685,7 @@ static int fdp_nci_core_get_config_rsp_packet(struct nci_dev *ndev, dev_dbg(dev, "OTP version %d\n", info->otp_version); dev_dbg(dev, "RAM version %d\n", info->ram_version); dev_dbg(dev, "key index %d\n", info->key_index); - dev_dbg(dev, "%s: status 0x%x\n", __func__, rsp->status); + dev_dbg(dev, "status 0x%x\n", rsp->status); nci_req_complete(ndev, rsp->status); @@ -798,8 +788,6 @@ void fdp_nci_remove(struct nci_dev *ndev) struct fdp_nci_info *info = nci_get_drvdata(ndev); struct device *dev = >phy->i2c_dev->dev; - dev_dbg(dev, "%s\n", __func__); - nci_unregister_device(ndev); nci_free_device(ndev); } diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index c4da50e..f355ab2 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -57,7 +57,6 @@ static int fdp_nci_i2c_enable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(>i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); return 0; @@ -67,7 +66,6 @@ static void fdp_nci_i2c_disable(void *phy_id) { struct fdp_i2c_phy *phy = phy_id; - dev_dbg(>i2c_dev->dev, "%s\n", __func__); fdp_nci_i2c_reset(phy); } @@ -113,8 +111,8 @@ static int fdp_nci_i2c_write(void *phy_id, struct sk_buff *skb) } if (r < 0 || r != skb->len) - dev_dbg(>dev, "%s: error err=%d len=%d\n", - __func__, r, skb->len); + dev_dbg(>dev, "error err=%d len=%d\n", + r, skb->len); if (r >= 0) { if (r != skb->len) { @@ -152,8 +150,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb)
Re: [PATCH 2/2] drivers core: multi-threading device shutdown
This code was a pleasure to read, super clean. On Wed, May 02, 2018 at 11:59:31PM -0400, Pavel Tatashin wrote: > When system is rebooted, halted or kexeced device_shutdown() is > called. > > This function shuts down every single device by calling either: > dev->bus->shutdown(dev) > dev->driver->shutdown(dev) > > Even on a machine just with a moderate amount of devices, device_shutdown() > may take multiple seconds to complete. Because many devices require a > specific delays to perform this operation. > > Here is sample analysis of time it takes to call device_shutdown() on > two socket Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz machine. > > device_shutdown 2.95s > mlx4_shutdown1.14s > megasas_shutdown 0.24s > ixgbe_shutdown 0.37s x 4 (four ixgbe devices on my machine). > the rest 0.09s > > In mlx4 we spent the most time, but that is because there is a 1 second > sleep: > mlx4_shutdown > mlx4_unload_one > mlx4_free_ownership >msleep(1000) > > With megasas we spend quoter of second, but sometimes longer (up-to 0.5s) > in this path: > > megasas_shutdown > megasas_flush_cache > megasas_issue_blocked_cmd > wait_event_timeout > > Finally, with ixgbe_shutdown() it takes 0.37 for each device, but that time > is spread all over the place, with bigger offenders: > > ixgbe_shutdown > __ixgbe_shutdown > ixgbe_close_suspend > ixgbe_down > ixgbe_init_hw_generic > ixgbe_reset_hw_X540 > msleep(100);0.104483472 > ixgbe_get_san_mac_addr_generic 0.048414851 > ixgbe_get_wwn_prefix_generic0.048409893 > ixgbe_start_hw_X540 > ixgbe_start_hw_generic > ixgbe_clear_hw_cntrs_generic 0.048581502 > ixgbe_setup_fc_generic0.024225800 > > All the ixgbe_*generic functions end-up calling: > ixgbe_read_eerd_X540() > ixgbe_acquire_swfw_sync_X540 > usleep_range(5000, 6000); > ixgbe_release_swfw_sync_X540 > usleep_range(5000, 6000); > > While these are short sleeps, they end-up calling them over 24 times! > 24 * 0.0055s = 0.132s. Adding-up to 0.528s for four devices. > > While we should keep optimizing the individual device drivers, in some > cases this is simply a hardware property that forces a specific delay, and > we must wait. > > So, the solution for this problem is to shutdown devices in parallel. > However, we must shutdown children before shutting down parents, so parent > device must wait for its children to finish. > > With this patch, on the same machine devices_shutdown() takes 1.142s, and > without mlx4 one second delay only 0.38s > > Signed-off-by: Pavel Tatashin> --- > drivers/base/core.c | 238 +++- > 1 file changed, 189 insertions(+), 49 deletions(-) > > diff --git a/drivers/base/core.c b/drivers/base/core.c > index b610816eb887..f370369a303b 100644 > --- a/drivers/base/core.c > +++ b/drivers/base/core.c > @@ -25,6 +25,7 @@ > #include > #include > #include > +#include > > #include "base.h" > #include "power/power.h" > @@ -2102,6 +2103,59 @@ const char *device_get_devnode(struct device *dev, > return *tmp = s; > } > > +/** > + * device_children_count - device children count > + * @parent: parent struct device. > + * > + * Returns number of children for this device or 0 if nonde. > + */ > +static int device_children_count(struct device *parent) > +{ > + struct klist_iter i; > + int children = 0; > + > + if (!parent->p) > + return 0; > + > + klist_iter_init(>p->klist_children, ); > + while (next_device()) > + children++; > + klist_iter_exit(); > + > + return children; > +} > + > +/** > + * device_get_child_by_index - Return child using the provide index. > + * @parent: parent struct device. > + * @index: Index of the child, where 0 is the first child in the children > list, > + * and so on. > + * > + * Returns child or NULL if child with this index is not present. > + */ > +static struct device * > +device_get_child_by_index(struct device *parent, int index) > +{ > + struct klist_iter i; > + struct device *dev = NULL, *d; > + int child_index = 0; > + > + if (!parent->p || index < 0) > + return NULL; > + > + klist_iter_init(>p->klist_children, ); > + while ((d = next_device()) != NULL) { perhaps: while ((d = next_device())) { > + if (child_index == index) { > + dev = d; > + break; > + } > + child_index++; > + } > + klist_iter_exit(); > + > + return dev; > +} > + > /** > * device_for_each_child - device child iterator. > * @parent: parent struct device. > @@ -2765,71
Re: [PATCH 2/2] drivers core: multi-threading device shutdown
This code was a pleasure to read, super clean. On Wed, May 02, 2018 at 11:59:31PM -0400, Pavel Tatashin wrote: > When system is rebooted, halted or kexeced device_shutdown() is > called. > > This function shuts down every single device by calling either: > dev->bus->shutdown(dev) > dev->driver->shutdown(dev) > > Even on a machine just with a moderate amount of devices, device_shutdown() > may take multiple seconds to complete. Because many devices require a > specific delays to perform this operation. > > Here is sample analysis of time it takes to call device_shutdown() on > two socket Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz machine. > > device_shutdown 2.95s > mlx4_shutdown1.14s > megasas_shutdown 0.24s > ixgbe_shutdown 0.37s x 4 (four ixgbe devices on my machine). > the rest 0.09s > > In mlx4 we spent the most time, but that is because there is a 1 second > sleep: > mlx4_shutdown > mlx4_unload_one > mlx4_free_ownership >msleep(1000) > > With megasas we spend quoter of second, but sometimes longer (up-to 0.5s) > in this path: > > megasas_shutdown > megasas_flush_cache > megasas_issue_blocked_cmd > wait_event_timeout > > Finally, with ixgbe_shutdown() it takes 0.37 for each device, but that time > is spread all over the place, with bigger offenders: > > ixgbe_shutdown > __ixgbe_shutdown > ixgbe_close_suspend > ixgbe_down > ixgbe_init_hw_generic > ixgbe_reset_hw_X540 > msleep(100);0.104483472 > ixgbe_get_san_mac_addr_generic 0.048414851 > ixgbe_get_wwn_prefix_generic0.048409893 > ixgbe_start_hw_X540 > ixgbe_start_hw_generic > ixgbe_clear_hw_cntrs_generic 0.048581502 > ixgbe_setup_fc_generic0.024225800 > > All the ixgbe_*generic functions end-up calling: > ixgbe_read_eerd_X540() > ixgbe_acquire_swfw_sync_X540 > usleep_range(5000, 6000); > ixgbe_release_swfw_sync_X540 > usleep_range(5000, 6000); > > While these are short sleeps, they end-up calling them over 24 times! > 24 * 0.0055s = 0.132s. Adding-up to 0.528s for four devices. > > While we should keep optimizing the individual device drivers, in some > cases this is simply a hardware property that forces a specific delay, and > we must wait. > > So, the solution for this problem is to shutdown devices in parallel. > However, we must shutdown children before shutting down parents, so parent > device must wait for its children to finish. > > With this patch, on the same machine devices_shutdown() takes 1.142s, and > without mlx4 one second delay only 0.38s > > Signed-off-by: Pavel Tatashin > --- > drivers/base/core.c | 238 +++- > 1 file changed, 189 insertions(+), 49 deletions(-) > > diff --git a/drivers/base/core.c b/drivers/base/core.c > index b610816eb887..f370369a303b 100644 > --- a/drivers/base/core.c > +++ b/drivers/base/core.c > @@ -25,6 +25,7 @@ > #include > #include > #include > +#include > > #include "base.h" > #include "power/power.h" > @@ -2102,6 +2103,59 @@ const char *device_get_devnode(struct device *dev, > return *tmp = s; > } > > +/** > + * device_children_count - device children count > + * @parent: parent struct device. > + * > + * Returns number of children for this device or 0 if nonde. > + */ > +static int device_children_count(struct device *parent) > +{ > + struct klist_iter i; > + int children = 0; > + > + if (!parent->p) > + return 0; > + > + klist_iter_init(>p->klist_children, ); > + while (next_device()) > + children++; > + klist_iter_exit(); > + > + return children; > +} > + > +/** > + * device_get_child_by_index - Return child using the provide index. > + * @parent: parent struct device. > + * @index: Index of the child, where 0 is the first child in the children > list, > + * and so on. > + * > + * Returns child or NULL if child with this index is not present. > + */ > +static struct device * > +device_get_child_by_index(struct device *parent, int index) > +{ > + struct klist_iter i; > + struct device *dev = NULL, *d; > + int child_index = 0; > + > + if (!parent->p || index < 0) > + return NULL; > + > + klist_iter_init(>p->klist_children, ); > + while ((d = next_device()) != NULL) { perhaps: while ((d = next_device())) { > + if (child_index == index) { > + dev = d; > + break; > + } > + child_index++; > + } > + klist_iter_exit(); > + > + return dev; > +} > + > /** > * device_for_each_child - device child iterator. > * @parent: parent struct device. > @@ -2765,71 +2819,157 @@ int
[PATCH] drm/atomic: Handling the case when setting old crtc for plane
In the func drm_atomic_set_crtc_for_plane, with the current code, if crtc of the plane_state and crtc passed as argument to the func are same, entire func will executed in vein. It will get state of crtc and clear and set the bits in plane_mask. All these steps are not required for same old crtc. Ideally, we should do nothing in this case, this patch handles the same, and causes the program to return without doing anything in such scenario. Signed-off-by: Satendra Singh ThakurCc: Madhur Verma Cc: Hemanshu Srivastava --- drivers/gpu/drm/drm_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 7d25c42..5bd3365 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1421,7 +1421,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); -- 2.7.4
[PATCH] drm/atomic: Handling the case when setting old crtc for plane
In the func drm_atomic_set_crtc_for_plane, with the current code, if crtc of the plane_state and crtc passed as argument to the func are same, entire func will executed in vein. It will get state of crtc and clear and set the bits in plane_mask. All these steps are not required for same old crtc. Ideally, we should do nothing in this case, this patch handles the same, and causes the program to return without doing anything in such scenario. Signed-off-by: Satendra Singh Thakur Cc: Madhur Verma Cc: Hemanshu Srivastava --- drivers/gpu/drm/drm_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 7d25c42..5bd3365 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1421,7 +1421,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); -- 2.7.4
[PATCH v2] efi/capsule-loader: Don't output reset log when reset flags are not set
It means firmware attempts to immediately process or launch the capsule when reset flags in capsule header are not set. Moreover, reset is not needed in this case. The current code will output log to indicate reset. This patch adds a branch to avoid reset log output when the flags are not set. Cc: Joey ZhengSigned-off-by: Shunyong Yang --- Changes in v2: *Add EFI_CAPSULE_PERSIST_ACROSS_RESET check according to Ard's suggestion. --- drivers/firmware/efi/capsule-loader.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index e456f4602df1..344785ef8539 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -134,10 +134,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) /* Indicate capsule binary uploading is done */ cap_info->index = NO_FURTHER_WRITE_ACTION; - pr_info("Successfully upload capsule file with reboot type '%s'\n", - !cap_info->reset_type ? "RESET_COLD" : - cap_info->reset_type == 1 ? "RESET_WARM" : - "RESET_SHUTDOWN"); + + if (cap_info->header.flags & EFI_CAPSULE_PERSIST_ACROSS_RESET) + pr_info("Successfully upload capsule file with reboot type '%s'\n", + !cap_info->reset_type ? "RESET_COLD" : + cap_info->reset_type == 1 ? "RESET_WARM" : + "RESET_SHUTDOWN"); + else + pr_info("Successfully upload, process and launch capsule file\n"); + return 0; } -- 1.8.3.1
[PATCH v2] efi/capsule-loader: Don't output reset log when reset flags are not set
It means firmware attempts to immediately process or launch the capsule when reset flags in capsule header are not set. Moreover, reset is not needed in this case. The current code will output log to indicate reset. This patch adds a branch to avoid reset log output when the flags are not set. Cc: Joey Zheng Signed-off-by: Shunyong Yang --- Changes in v2: *Add EFI_CAPSULE_PERSIST_ACROSS_RESET check according to Ard's suggestion. --- drivers/firmware/efi/capsule-loader.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index e456f4602df1..344785ef8539 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -134,10 +134,15 @@ static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info) /* Indicate capsule binary uploading is done */ cap_info->index = NO_FURTHER_WRITE_ACTION; - pr_info("Successfully upload capsule file with reboot type '%s'\n", - !cap_info->reset_type ? "RESET_COLD" : - cap_info->reset_type == 1 ? "RESET_WARM" : - "RESET_SHUTDOWN"); + + if (cap_info->header.flags & EFI_CAPSULE_PERSIST_ACROSS_RESET) + pr_info("Successfully upload capsule file with reboot type '%s'\n", + !cap_info->reset_type ? "RESET_COLD" : + cap_info->reset_type == 1 ? "RESET_WARM" : + "RESET_SHUTDOWN"); + else + pr_info("Successfully upload, process and launch capsule file\n"); + return 0; } -- 1.8.3.1
Re: INFO: rcu detected stall in __schedule
I'm not sure whether this is a PPP bug. As of uptime = 484, RCU says that it stalled for 125 seconds. -- [ 484.407032] INFO: rcu_sched self-detected stall on CPU [ 484.412488] 0-...!: (125000 ticks this GP) idle=f3e/1/4611686018427387906 softirq=112858/112858 fqs=0 [ 484.422300] (t=125000 jiffies g=61626 c=61625 q=1534) [ 484.427663] rcu_sched kthread starved for 125000 jiffies! g61626 c61625 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x402 ->cpu=0 -- 484 - 125 = 359, which was about to start SND related fuzzing in that log. -- 2033/05/18 03:36:31 executing program 1: r0 = socket(0x4a, 0x5, 0x7) setsockopt$inet_int(r0, 0x0, 0x18, &(0x7f00)=0x200, 0x4) bind$inet6(r0, &(0x7fc0)={0xa, 0x0, 0x0, @loopback={0x0, 0x1}}, 0x1c) perf_event_open(&(0x7f40)={0x2, 0x70, 0x3e5}, 0x0, 0x, 0x, 0x0) timer_create(0x0, &(0x7f0001c0)={0x0, 0x15, 0x0, @thr={&(0x7f000440), &(0x7f000540)}}, &(0x7f000200)) timer_getoverrun(0x0) perf_event_open(&(0x7f25c000)={0x2, 0x78, 0x3e3}, 0x0, 0x0, 0x, 0x0) r1 = syz_open_dev$sndctrl(&(0x7f000200)='/dev/snd/controlC#\x00', 0x2, 0x0) perf_event_open(&(0x7f001000)={0x0, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8ce, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfff8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_bp={&(0x7f005000), 0x2}, 0x10c}, 0x0, 0x0, 0x, 0x0) ioctl$SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS(r1, 0xc0045516, &(0x7fc0)=0x1) r2 = syz_open_dev$sndpcmp(&(0x7f000100)='/dev/snd/pcmC#D#p\x00', 0x1, 0x4000) ioctl$SNDRV_SEQ_IOCTL_GET_QUEUE_CLIENT(r2, 0xc04c5349, &(0x7f000240)={0x200, 0xfcdc, 0x1}) syz_open_dev$tun(&(0x7f0003c0)='/dev/net/tun\x00', 0x0, 0x20402) ioctl$SNDRV_CTL_IOCTL_PVERSION(r1, 0xc1105517, &(0x7f001000)=""/250) ioctl$SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS(r1, 0xc0045516, &(0x7f00)) 2033/05/18 03:36:31 executing program 4: syz_emit_ethernet(0x3e, &(0x7fc0)={@broadcast=[0xff, 0xff, 0xff, 0xff, 0xff, 0xff], @empty=[0x0, 0x0, 0xb00], [], {@ipv4={0x800, {{0x5, 0x4, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x1, 0x0, @remote={0xac, 0x14, 0x14, 0xbb}, @dev={0xac, 0x14, 0x14}}, @icmp=@parameter_prob={0x5, 0x4, 0x0, 0x0, 0x0, 0x0, {0x5, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @local={0xac, 0x223, 0x14, 0xaa}, @dev={0xac, 0x14, 0x14}}}, &(0x7f00)={0x0, 0x2, [0x0, 0x2e6]}) 2033/05/18 03:36:31 executing program 1: r0 = socket$pppoe(0x18, 0x1, 0x0) connect$pppoe(r0, &(0x7fc0)={0x18, 0x0, {0x1, @broadcast=[0xff, 0xff, 0xff, 0xff, 0xff, 0xff], 'ip6_vti0\x00'}}, 0x1e) r1 = socket(0x3, 0xb, 0x8001) setsockopt$inet_sctp6_SCTP_ADAPTATION_LAYER(r1, 0x84, 0x7, &(0x7f000100)={0x2}, 0x4) ioctl$sock_inet_SIOCGIFADDR(r0, 0x8915, &(0x7f40)={'veth1_to_bridge\x00', {0x2, 0x4e21}}) r2 = syz_open_dev$admmidi(&(0x7f00)='/dev/admmidi#\x00', 0x6, 0x8000) setsockopt$SO_VM_SOCKETS_BUFFER_MAX_SIZE(r2, 0x28, 0x2, &(0x7f80)=0xff00, 0x8) [ 359.306427] snd_virmidi snd_virmidi.0: control 112:0:0:�:0 is already present --
Re: INFO: rcu detected stall in __schedule
I'm not sure whether this is a PPP bug. As of uptime = 484, RCU says that it stalled for 125 seconds. -- [ 484.407032] INFO: rcu_sched self-detected stall on CPU [ 484.412488] 0-...!: (125000 ticks this GP) idle=f3e/1/4611686018427387906 softirq=112858/112858 fqs=0 [ 484.422300] (t=125000 jiffies g=61626 c=61625 q=1534) [ 484.427663] rcu_sched kthread starved for 125000 jiffies! g61626 c61625 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x402 ->cpu=0 -- 484 - 125 = 359, which was about to start SND related fuzzing in that log. -- 2033/05/18 03:36:31 executing program 1: r0 = socket(0x4a, 0x5, 0x7) setsockopt$inet_int(r0, 0x0, 0x18, &(0x7f00)=0x200, 0x4) bind$inet6(r0, &(0x7fc0)={0xa, 0x0, 0x0, @loopback={0x0, 0x1}}, 0x1c) perf_event_open(&(0x7f40)={0x2, 0x70, 0x3e5}, 0x0, 0x, 0x, 0x0) timer_create(0x0, &(0x7f0001c0)={0x0, 0x15, 0x0, @thr={&(0x7f000440), &(0x7f000540)}}, &(0x7f000200)) timer_getoverrun(0x0) perf_event_open(&(0x7f25c000)={0x2, 0x78, 0x3e3}, 0x0, 0x0, 0x, 0x0) r1 = syz_open_dev$sndctrl(&(0x7f000200)='/dev/snd/controlC#\x00', 0x2, 0x0) perf_event_open(&(0x7f001000)={0x0, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x8ce, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xfff8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @perf_bp={&(0x7f005000), 0x2}, 0x10c}, 0x0, 0x0, 0x, 0x0) ioctl$SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS(r1, 0xc0045516, &(0x7fc0)=0x1) r2 = syz_open_dev$sndpcmp(&(0x7f000100)='/dev/snd/pcmC#D#p\x00', 0x1, 0x4000) ioctl$SNDRV_SEQ_IOCTL_GET_QUEUE_CLIENT(r2, 0xc04c5349, &(0x7f000240)={0x200, 0xfcdc, 0x1}) syz_open_dev$tun(&(0x7f0003c0)='/dev/net/tun\x00', 0x0, 0x20402) ioctl$SNDRV_CTL_IOCTL_PVERSION(r1, 0xc1105517, &(0x7f001000)=""/250) ioctl$SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS(r1, 0xc0045516, &(0x7f00)) 2033/05/18 03:36:31 executing program 4: syz_emit_ethernet(0x3e, &(0x7fc0)={@broadcast=[0xff, 0xff, 0xff, 0xff, 0xff, 0xff], @empty=[0x0, 0x0, 0xb00], [], {@ipv4={0x800, {{0x5, 0x4, 0x0, 0x0, 0x30, 0x0, 0x0, 0x0, 0x1, 0x0, @remote={0xac, 0x14, 0x14, 0xbb}, @dev={0xac, 0x14, 0x14}}, @icmp=@parameter_prob={0x5, 0x4, 0x0, 0x0, 0x0, 0x0, {0x5, 0x4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, @local={0xac, 0x223, 0x14, 0xaa}, @dev={0xac, 0x14, 0x14}}}, &(0x7f00)={0x0, 0x2, [0x0, 0x2e6]}) 2033/05/18 03:36:31 executing program 1: r0 = socket$pppoe(0x18, 0x1, 0x0) connect$pppoe(r0, &(0x7fc0)={0x18, 0x0, {0x1, @broadcast=[0xff, 0xff, 0xff, 0xff, 0xff, 0xff], 'ip6_vti0\x00'}}, 0x1e) r1 = socket(0x3, 0xb, 0x8001) setsockopt$inet_sctp6_SCTP_ADAPTATION_LAYER(r1, 0x84, 0x7, &(0x7f000100)={0x2}, 0x4) ioctl$sock_inet_SIOCGIFADDR(r0, 0x8915, &(0x7f40)={'veth1_to_bridge\x00', {0x2, 0x4e21}}) r2 = syz_open_dev$admmidi(&(0x7f00)='/dev/admmidi#\x00', 0x6, 0x8000) setsockopt$SO_VM_SOCKETS_BUFFER_MAX_SIZE(r2, 0x28, 0x2, &(0x7f80)=0xff00, 0x8) [ 359.306427] snd_virmidi snd_virmidi.0: control 112:0:0:�:0 is already present --
Re: [PATCH V3 10/10] ASoC: amd: dma driver changes for bt i2s instance
Some checkpatch nits below... On Tue, May 1, 2018 at 2:53 PM Vijendar Mukundawrote: > With in ACP, There are three I2S controllers can be > configured/enabled ( I2S SP, I2S MICSP, I2S BT). > Default enabled I2S controller instance is I2S SP. > This patch provides required changes to support I2S BT > controller Instance. > Signed-off-by: Vijendar Mukunda > --- > v1->v2: defined i2s instance macros in acp header file > v2->v3: sqaushed previous patch series and spilt changes > into multiple patches (acp dma driver code cleanup > patches and bt i2s instance specific changes) >sound/soc/amd/acp-da7219-max98357a.c | 23 >sound/soc/amd/acp-pcm-dma.c | 256 +++ >sound/soc/amd/acp.h | 40 ++ >3 files changed, 262 insertions(+), 57 deletions(-) > diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c > index 133139d..b3184ab 100644 > --- a/sound/soc/amd/acp-da7219-max98357a.c > +++ b/sound/soc/amd/acp-da7219-max98357a.c > @@ -36,6 +36,7 @@ >#include >#include > +#include "acp.h" >#include "../codecs/da7219.h" >#include "../codecs/da7219-aad.h" > @@ -44,6 +45,7 @@ >static struct snd_soc_jack cz_jack; >static struct clk *da7219_dai_clk; > +extern int bt_pad_enable; WARNING: externs should be avoided in .c files >static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd) >{ > @@ -132,6 +134,9 @@ static const struct snd_pcm_hw_constraint_list constraints_channels = { >static int cz_da7219_startup(struct snd_pcm_substream *substream) >{ > struct snd_pcm_runtime *runtime = substream->runtime; > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > /* >* On this platform for PCM device we support stereo > @@ -143,6 +148,7 @@ static int cz_da7219_startup(struct snd_pcm_substream *substream) > snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, > _rates); > + machine->i2s_instance = I2S_BT_INSTANCE; I'm not a big fan of this approach, but I don't know any other way to tell a single "platform" driver (acp-pcm-dma) which of two channels (ST/BT) to use via the pcm_open() callback. Mark, can you recommend any other way of doing this? > return da7219_clk_enable(substream); >} > @@ -153,6 +159,11 @@ static void cz_da7219_shutdown(struct snd_pcm_substream *substream) >static int cz_max_startup(struct snd_pcm_substream *substream) >{ > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > + > + machine->i2s_instance = I2S_SP_INSTANCE; > return da7219_clk_enable(substream); >} > @@ -163,6 +174,11 @@ static void cz_max_shutdown(struct snd_pcm_substream *substream) >static int cz_dmic_startup(struct snd_pcm_substream *substream) >{ > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > + > + machine->i2s_instance = I2S_SP_INSTANCE; > return da7219_clk_enable(substream); >} > @@ -266,10 +282,16 @@ static int cz_probe(struct platform_device *pdev) >{ > int ret; > struct snd_soc_card *card; > + struct acp_platform_info *machine; > + machine = devm_kzalloc(>dev, sizeof(struct acp_platform_info), > + GFP_KERNEL); > + if (!machine) > + return -ENOMEM; > card = _card; > cz_card.dev = >dev; > platform_set_drvdata(pdev, card); > + snd_soc_card_set_drvdata(card, machine); > ret = devm_snd_soc_register_card(>dev, _card); > if (ret) { > dev_err(>dev, > @@ -277,6 +299,7 @@ static int cz_probe(struct platform_device *pdev) > cz_card.name, ret); > return ret; > } > + bt_pad_enable = device_property_read_bool(>dev, "bt-pad-enable"); > return 0; >} > diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c > index ec9cab3..2ea103a 100644 > --- a/sound/soc/amd/acp-pcm-dma.c > +++ b/sound/soc/amd/acp-pcm-dma.c > @@ -37,12 +37,14 @@ >#define MAX_BUFFER (PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS) >#define MIN_BUFFER MAX_BUFFER > -#define ST_PLAYBACK_MAX_PERIOD_SIZE 8192 > +#define ST_PLAYBACK_MAX_PERIOD_SIZE 4096 >#define ST_CAPTURE_MAX_PERIOD_SIZE ST_PLAYBACK_MAX_PERIOD_SIZE >#define ST_MAX_BUFFER (ST_PLAYBACK_MAX_PERIOD_SIZE *
Re: [PATCH V3 10/10] ASoC: amd: dma driver changes for bt i2s instance
Some checkpatch nits below... On Tue, May 1, 2018 at 2:53 PM Vijendar Mukunda wrote: > With in ACP, There are three I2S controllers can be > configured/enabled ( I2S SP, I2S MICSP, I2S BT). > Default enabled I2S controller instance is I2S SP. > This patch provides required changes to support I2S BT > controller Instance. > Signed-off-by: Vijendar Mukunda > --- > v1->v2: defined i2s instance macros in acp header file > v2->v3: sqaushed previous patch series and spilt changes > into multiple patches (acp dma driver code cleanup > patches and bt i2s instance specific changes) >sound/soc/amd/acp-da7219-max98357a.c | 23 >sound/soc/amd/acp-pcm-dma.c | 256 +++ >sound/soc/amd/acp.h | 40 ++ >3 files changed, 262 insertions(+), 57 deletions(-) > diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c > index 133139d..b3184ab 100644 > --- a/sound/soc/amd/acp-da7219-max98357a.c > +++ b/sound/soc/amd/acp-da7219-max98357a.c > @@ -36,6 +36,7 @@ >#include >#include > +#include "acp.h" >#include "../codecs/da7219.h" >#include "../codecs/da7219-aad.h" > @@ -44,6 +45,7 @@ >static struct snd_soc_jack cz_jack; >static struct clk *da7219_dai_clk; > +extern int bt_pad_enable; WARNING: externs should be avoided in .c files >static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd) >{ > @@ -132,6 +134,9 @@ static const struct snd_pcm_hw_constraint_list constraints_channels = { >static int cz_da7219_startup(struct snd_pcm_substream *substream) >{ > struct snd_pcm_runtime *runtime = substream->runtime; > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > /* >* On this platform for PCM device we support stereo > @@ -143,6 +148,7 @@ static int cz_da7219_startup(struct snd_pcm_substream *substream) > snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, > _rates); > + machine->i2s_instance = I2S_BT_INSTANCE; I'm not a big fan of this approach, but I don't know any other way to tell a single "platform" driver (acp-pcm-dma) which of two channels (ST/BT) to use via the pcm_open() callback. Mark, can you recommend any other way of doing this? > return da7219_clk_enable(substream); >} > @@ -153,6 +159,11 @@ static void cz_da7219_shutdown(struct snd_pcm_substream *substream) >static int cz_max_startup(struct snd_pcm_substream *substream) >{ > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > + > + machine->i2s_instance = I2S_SP_INSTANCE; > return da7219_clk_enable(substream); >} > @@ -163,6 +174,11 @@ static void cz_max_shutdown(struct snd_pcm_substream *substream) >static int cz_dmic_startup(struct snd_pcm_substream *substream) >{ > + struct snd_soc_pcm_runtime *rtd = substream->private_data; > + struct snd_soc_card *card = rtd->card; > + struct acp_platform_info *machine = snd_soc_card_get_drvdata(card); > + > + machine->i2s_instance = I2S_SP_INSTANCE; > return da7219_clk_enable(substream); >} > @@ -266,10 +282,16 @@ static int cz_probe(struct platform_device *pdev) >{ > int ret; > struct snd_soc_card *card; > + struct acp_platform_info *machine; > + machine = devm_kzalloc(>dev, sizeof(struct acp_platform_info), > + GFP_KERNEL); > + if (!machine) > + return -ENOMEM; > card = _card; > cz_card.dev = >dev; > platform_set_drvdata(pdev, card); > + snd_soc_card_set_drvdata(card, machine); > ret = devm_snd_soc_register_card(>dev, _card); > if (ret) { > dev_err(>dev, > @@ -277,6 +299,7 @@ static int cz_probe(struct platform_device *pdev) > cz_card.name, ret); > return ret; > } > + bt_pad_enable = device_property_read_bool(>dev, "bt-pad-enable"); > return 0; >} > diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c > index ec9cab3..2ea103a 100644 > --- a/sound/soc/amd/acp-pcm-dma.c > +++ b/sound/soc/amd/acp-pcm-dma.c > @@ -37,12 +37,14 @@ >#define MAX_BUFFER (PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS) >#define MIN_BUFFER MAX_BUFFER > -#define ST_PLAYBACK_MAX_PERIOD_SIZE 8192 > +#define ST_PLAYBACK_MAX_PERIOD_SIZE 4096 >#define ST_CAPTURE_MAX_PERIOD_SIZE ST_PLAYBACK_MAX_PERIOD_SIZE >#define ST_MAX_BUFFER (ST_PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS) >#define ST_MIN_BUFFER ST_MAX_BUFFER
Re: [PATCH] net/xfrm: Fix lookups for states with spi == 0
On Wed, May 02, 2018 at 01:41:36PM +0100, Dmitry Safonov wrote: > > But still it's possible to create ipsec with zero SPI. > And it seems not making sense to search for a state with SPI hash if > request has zero SPI. Fair enough. In fact a zero SPI is legal and defined for IPcomp. The bug arose from this patch: commit 7b4dc3600e4877178ba94c7fbf7e520421378aa6 Author: Masahide NAKAMURADate: Wed Sep 27 22:21:52 2006 -0700 [XFRM]: Do not add a state whose SPI is zero to the SPI hash. SPI=0 is used for acquired IPsec SA and MIPv6 RO state. Such state should not be added to the SPI hash because we do not care about it on deleting path. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki I think it would be better to revert this. Cheers, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] net/xfrm: Fix lookups for states with spi == 0
On Wed, May 02, 2018 at 01:41:36PM +0100, Dmitry Safonov wrote: > > But still it's possible to create ipsec with zero SPI. > And it seems not making sense to search for a state with SPI hash if > request has zero SPI. Fair enough. In fact a zero SPI is legal and defined for IPcomp. The bug arose from this patch: commit 7b4dc3600e4877178ba94c7fbf7e520421378aa6 Author: Masahide NAKAMURA Date: Wed Sep 27 22:21:52 2006 -0700 [XFRM]: Do not add a state whose SPI is zero to the SPI hash. SPI=0 is used for acquired IPsec SA and MIPv6 RO state. Such state should not be added to the SPI hash because we do not care about it on deleting path. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki I think it would be better to revert this. Cheers, -- Email: Herbert Xu Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] mtd: rawnand: marvell: pass ms delay to wait_op
On 03/05/18 14:21, Chris Packham wrote: > marvell_nfc_wait_op() expects the delay to be expressed in milliseconds > but nand_sdr_timings uses picoseconds. Use PSEC_TO_MSEC when passing > tPROG_max to marvell_nfc_wait_op(). > > Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller > driver") > Cc: sta...@vger.kernel.org > Signed-off-by: Chris Packham> --- > drivers/mtd/nand/raw/marvell_nand.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/mtd/nand/raw/marvell_nand.c > b/drivers/mtd/nand/raw/marvell_nand.c > index 1d779a35ac8e..e4b964fd40d8 100644 > --- a/drivers/mtd/nand/raw/marvell_nand.c > +++ b/drivers/mtd/nand/raw/marvell_nand.c > @@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct > nand_chip *chip, > return ret; > > ret = marvell_nfc_wait_op(chip, > - chip->data_interface.timings.sdr.tPROG_max); > + > PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); > return ret; > } > > @@ -1494,7 +1494,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct > mtd_info *mtd, > } > > ret = marvell_nfc_wait_op(chip, > - chip->data_interface.timings.sdr.tPROG_max); > + > PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); > > marvell_nfc_disable_hw_ecc(chip); > Actually I'm not so sure about this patch. While passing the pico-second value for tPROG_max is clearly wrong and leads to seemingly indefinite hangs on some systems. Converting the times to micro-seconds leaves us with delays that are far too short. The old pxa3xx driver had hard coded 200ms delays. These delays now work out to 1ms which seems every bit as wrong as 6ms.
Re: [PATCH] mtd: rawnand: marvell: pass ms delay to wait_op
On 03/05/18 14:21, Chris Packham wrote: > marvell_nfc_wait_op() expects the delay to be expressed in milliseconds > but nand_sdr_timings uses picoseconds. Use PSEC_TO_MSEC when passing > tPROG_max to marvell_nfc_wait_op(). > > Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller > driver") > Cc: sta...@vger.kernel.org > Signed-off-by: Chris Packham > --- > drivers/mtd/nand/raw/marvell_nand.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/mtd/nand/raw/marvell_nand.c > b/drivers/mtd/nand/raw/marvell_nand.c > index 1d779a35ac8e..e4b964fd40d8 100644 > --- a/drivers/mtd/nand/raw/marvell_nand.c > +++ b/drivers/mtd/nand/raw/marvell_nand.c > @@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct > nand_chip *chip, > return ret; > > ret = marvell_nfc_wait_op(chip, > - chip->data_interface.timings.sdr.tPROG_max); > + > PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); > return ret; > } > > @@ -1494,7 +1494,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct > mtd_info *mtd, > } > > ret = marvell_nfc_wait_op(chip, > - chip->data_interface.timings.sdr.tPROG_max); > + > PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); > > marvell_nfc_disable_hw_ecc(chip); > Actually I'm not so sure about this patch. While passing the pico-second value for tPROG_max is clearly wrong and leads to seemingly indefinite hangs on some systems. Converting the times to micro-seconds leaves us with delays that are far too short. The old pxa3xx driver had hard coded 200ms delays. These delays now work out to 1ms which seems every bit as wrong as 6ms.
linux-next: Tree for May 3
Hi all, Changes since 20180502: Removed tree: idr (finished with) The rockchip tree gained a conflict against the renesas tree. Non-merge commits (relative to Linus' tree): 3788 3632 files changed, 146396 insertions(+), 66409 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), I do an x86_64 modules_install followed by builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc and sparc64 defconfig. And finally, a simple boot test of the powerpc pseries_le_defconfig kernel in qemu (with and without kvm enabled). Below is a summary of the state of the merge. I am currently merging 257 trees (counting Linus' and 44 trees of bug fix patches pending for the current merge release). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwell $ git checkout master $ git reset --hard stable Merging origin/master (2d618bdf7163 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux-hexagon-kernel) Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild) Merging kbuild-current/fixes (6d08b06e67cd Linux 4.17-rc2) Merging arc-current/for-curr (661e50bc8532 Linux 4.16-rc4) Merging arm-current/fixes (30cfae461581 ARM: replace unnecessary perl with sed and the shell $(( )) operator) Merging arm64-fixes/for-next/fixes (3789c122d0a0 arm64: avoid instrumenting atomic_ll_sc.o) Merging m68k-current/for-linus (ecd685580c8f m68k/mac: Remove bogus "FIXME" comment) Merging powerpc-fixes/fixes (b2d7ecbe3556 powerpc/kvm/booke: Fix altivec related build break) Merging sparc/master (fff75eb2a08c Merge tag 'errseq-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux) Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2) Merging net/master (7df40c2673a1 net_sched: fq: take care of throttled flows before reuse) Merging bpf/master (0f58e58e2803 Merge branch 'x86-bpf-jit-fixes') Merging ipsec/master (b4331a681822 vti6: Change minimum MTU to IPV4_MIN_MTU, vti6 can carry IPv4 too) Merging netfilter/master (2f99aa31cd7a netfilter: nf_tables: skip synchronize_rcu if transaction log is empty) Merging ipvs/master (765cca91b895 netfilter: conntrack: include kmemleak.h for kmemleak_not_leak()) Merging wireless-drivers/master (af8a41cccf8f rtlwifi: cleanup 8723be ant_sel definition) Merging mac80211/master (2f0605a697f4 nl80211: Free connkeys on external authentication failure) Merging rdma-fixes/for-rc (db82476f3741 IB/core: Make ib_mad_client_id atomic) Merging sound-current/for-linus (f13876e2c33a ALSA: pcm: Check PCM state at xfern compat ioctl) Merging pci-current/for-linus (0cf22d6b317c PCI: Add "PCIe" to pcie_print_link_status() messages) Merging driver-core.current/driver-core-linus (6da6c0db5316 Linux v4.17-rc3) Merging tty.current/tty-linus (6da6c0db5316 Linux v4.17-rc3) Merging usb.current/usb-linus (9aea9b6cc78d usb: musb: trace: fix NULL pointer dereference in musb_g_tx()) Merging usb-gadget-fixes/fixes (ed769520727e usb: gadget: composite Allow for larger configuration descriptors) Merging usb-serial-fixes/usb-linus (4842ed5bfcb9 USB: serial: visor: handle potential invalid device configuration) Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea: msm: fix ulpi-node lookup) Merging phy/fixes (60cc43fc8884 Linux 4.17-rc1) Merging staging.current/staging-linus (6da6c0db5316 Linux v4.17-rc3) Merging char-misc.current/char-misc-linus (6da6c0db5316 Linux v4.17-rc3) Merging input-current/for-linus (f6eeb9e54857 Input: atmel_mxt_ts - add missing compatible strings to OF device table) Merging crypto-current/master (eea0d3ea7546 crypto: drbg - set freed buffers to NULL) Mergi
linux-next: Tree for May 3
Hi all, Changes since 20180502: Removed tree: idr (finished with) The rockchip tree gained a conflict against the renesas tree. Non-merge commits (relative to Linus' tree): 3788 3632 files changed, 146396 insertions(+), 66409 deletions(-) I have created today's linux-next tree at git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git (patches at http://www.kernel.org/pub/linux/kernel/next/ ). If you are tracking the linux-next tree using git, you should not use "git pull" to do so as that will try to merge the new linux-next release with the old one. You should use "git fetch" and checkout or reset to the new master. You can see which trees have been included by looking in the Next/Trees file in the source. There are also quilt-import.log and merge.log files in the Next directory. Between each merge, the tree was built with a ppc64_defconfig for powerpc, an allmodconfig for x86_64, a multi_v7_defconfig for arm and a native build of tools/perf. After the final fixups (if any), I do an x86_64 modules_install followed by builds for x86_64 allnoconfig, powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig, allyesconfig and pseries_le_defconfig and i386, sparc and sparc64 defconfig. And finally, a simple boot test of the powerpc pseries_le_defconfig kernel in qemu (with and without kvm enabled). Below is a summary of the state of the merge. I am currently merging 257 trees (counting Linus' and 44 trees of bug fix patches pending for the current merge release). Stats about the size of the tree over time can be seen at http://neuling.org/linux-next-size.html . Status of my local build tests will be at http://kisskb.ellerman.id.au/linux-next . If maintainers want to give advice about cross compilers/configs that work, we are always open to add more builds. Thanks to Randy Dunlap for doing many randconfig builds. And to Paul Gortmaker for triage and bug fixes. -- Cheers, Stephen Rothwell $ git checkout master $ git reset --hard stable Merging origin/master (2d618bdf7163 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux-hexagon-kernel) Merging fixes/master (147a89bc71e7 Merge tag 'kconfig-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild) Merging kbuild-current/fixes (6d08b06e67cd Linux 4.17-rc2) Merging arc-current/for-curr (661e50bc8532 Linux 4.16-rc4) Merging arm-current/fixes (30cfae461581 ARM: replace unnecessary perl with sed and the shell $(( )) operator) Merging arm64-fixes/for-next/fixes (3789c122d0a0 arm64: avoid instrumenting atomic_ll_sc.o) Merging m68k-current/for-linus (ecd685580c8f m68k/mac: Remove bogus "FIXME" comment) Merging powerpc-fixes/fixes (b2d7ecbe3556 powerpc/kvm/booke: Fix altivec related build break) Merging sparc/master (fff75eb2a08c Merge tag 'errseq-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux) Merging fscrypt-current/for-stable (ae64f9bd1d36 Linux 4.15-rc2) Merging net/master (7df40c2673a1 net_sched: fq: take care of throttled flows before reuse) Merging bpf/master (0f58e58e2803 Merge branch 'x86-bpf-jit-fixes') Merging ipsec/master (b4331a681822 vti6: Change minimum MTU to IPV4_MIN_MTU, vti6 can carry IPv4 too) Merging netfilter/master (2f99aa31cd7a netfilter: nf_tables: skip synchronize_rcu if transaction log is empty) Merging ipvs/master (765cca91b895 netfilter: conntrack: include kmemleak.h for kmemleak_not_leak()) Merging wireless-drivers/master (af8a41cccf8f rtlwifi: cleanup 8723be ant_sel definition) Merging mac80211/master (2f0605a697f4 nl80211: Free connkeys on external authentication failure) Merging rdma-fixes/for-rc (db82476f3741 IB/core: Make ib_mad_client_id atomic) Merging sound-current/for-linus (f13876e2c33a ALSA: pcm: Check PCM state at xfern compat ioctl) Merging pci-current/for-linus (0cf22d6b317c PCI: Add "PCIe" to pcie_print_link_status() messages) Merging driver-core.current/driver-core-linus (6da6c0db5316 Linux v4.17-rc3) Merging tty.current/tty-linus (6da6c0db5316 Linux v4.17-rc3) Merging usb.current/usb-linus (9aea9b6cc78d usb: musb: trace: fix NULL pointer dereference in musb_g_tx()) Merging usb-gadget-fixes/fixes (ed769520727e usb: gadget: composite Allow for larger configuration descriptors) Merging usb-serial-fixes/usb-linus (4842ed5bfcb9 USB: serial: visor: handle potential invalid device configuration) Merging usb-chipidea-fixes/ci-for-usb-stable (964728f9f407 USB: chipidea: msm: fix ulpi-node lookup) Merging phy/fixes (60cc43fc8884 Linux 4.17-rc1) Merging staging.current/staging-linus (6da6c0db5316 Linux v4.17-rc3) Merging char-misc.current/char-misc-linus (6da6c0db5316 Linux v4.17-rc3) Merging input-current/for-linus (f6eeb9e54857 Input: atmel_mxt_ts - add missing compatible strings to OF device table) Merging crypto-current/master (eea0d3ea7546 crypto: drbg - set freed buffers to NULL) Mergi
Re: [kernel-team] [PATCH 1/3] staging: Android: vsoc: Create wc kernel mapping for region shm.
On Wed, May 2, 2018 at 9:45 PM 'Alistair Strachan' via kernel-team < kernel-t...@android.com> wrote: > Map the region shm as write-combining instead of uncachable. I think more commit message is needed. Why is this done, what does it fix, etc. Its hard to know what the improvement is without a cover-letter either. I am assuming the improvement is in performance by using the write-combining mode. thanks, - Joel
Re: [kernel-team] [PATCH 1/3] staging: Android: vsoc: Create wc kernel mapping for region shm.
On Wed, May 2, 2018 at 9:45 PM 'Alistair Strachan' via kernel-team < kernel-t...@android.com> wrote: > Map the region shm as write-combining instead of uncachable. I think more commit message is needed. Why is this done, what does it fix, etc. Its hard to know what the improvement is without a cover-letter either. I am assuming the improvement is in performance by using the write-combining mode. thanks, - Joel
[PATCH v2 3/9] x86, memcpy_mcsafe: return bytes remaining
Machine check safe memory copies are currently deployed in the pmem driver whenever reading from persistent memory media, so that -EIO is returned rather than triggering a kernel panic. While this protects most pmem accesses, it is not complete in the filesystem-dax case. When filesystem-dax is enabled reads may bypass the block layer and the driver via dax_iomap_actor() and its usage of copy_to_iter(). In preparation for creating a copy_to_iter() variant that can handle machine checks, teach memcpy_mcsafe() to return the number of bytes remaining rather than -EFAULT when an exception occurs. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Co-developed-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/include/asm/string_64.h |8 +--- arch/x86/lib/memcpy_64.S | 20 ++-- drivers/nvdimm/claim.c |3 ++- drivers/nvdimm/pmem.c|6 +++--- include/linux/string.h |4 ++-- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4752f8984923..d33f92b9fa22 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,9 +132,10 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 5709f3ec22a4..f01a88391c98 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -252,14 +252,22 @@ ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* +* Return number of bytes not copied for any failure. Note that +* there is no "tail" handling since the source buffer is 8-byte +* aligned and poison is cacheline aligned. +*/ +.E_read_words: + shll$3, %ecx +.E_leading_bytes: + addl%edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret .previous - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) #endif diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 30852270484f..2e96b34bc936 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, if (rw == READ) { if (unlikely(is_bad_pmem(>bb, sector, sz_align))) return -EIO; - return memcpy_mcsafe(buf, nsio->addr + offset, size); + if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) + return -EIO; } if (unlikely(is_bad_pmem(>bb, sector, sz_align))) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 9d714926ecf5..e023d6aa22b5 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { unsigned int chunk; - int rc; + unsigned long rem; void *mem; while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE); - rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); - if (rc) + if (rem) return BLK_STS_IOERR; len -= chunk;
你可以回复我吗/////// ,
[PATCH v2 3/9] x86, memcpy_mcsafe: return bytes remaining
Machine check safe memory copies are currently deployed in the pmem driver whenever reading from persistent memory media, so that -EIO is returned rather than triggering a kernel panic. While this protects most pmem accesses, it is not complete in the filesystem-dax case. When filesystem-dax is enabled reads may bypass the block layer and the driver via dax_iomap_actor() and its usage of copy_to_iter(). In preparation for creating a copy_to_iter() variant that can handle machine checks, teach memcpy_mcsafe() to return the number of bytes remaining rather than -EFAULT when an exception occurs. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Co-developed-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/include/asm/string_64.h |8 +--- arch/x86/lib/memcpy_64.S | 20 ++-- drivers/nvdimm/claim.c |3 ++- drivers/nvdimm/pmem.c|6 +++--- include/linux/string.h |4 ++-- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4752f8984923..d33f92b9fa22 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,9 +132,10 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 5709f3ec22a4..f01a88391c98 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -252,14 +252,22 @@ ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* +* Return number of bytes not copied for any failure. Note that +* there is no "tail" handling since the source buffer is 8-byte +* aligned and poison is cacheline aligned. +*/ +.E_read_words: + shll$3, %ecx +.E_leading_bytes: + addl%edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret .previous - _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) #endif diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 30852270484f..2e96b34bc936 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -276,7 +276,8 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, if (rw == READ) { if (unlikely(is_bad_pmem(>bb, sector, sz_align))) return -EIO; - return memcpy_mcsafe(buf, nsio->addr + offset, size); + if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) + return -EIO; } if (unlikely(is_bad_pmem(>bb, sector, sz_align))) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 9d714926ecf5..e023d6aa22b5 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -101,15 +101,15 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { unsigned int chunk; - int rc; + unsigned long rem; void *mem; while (len) { mem = kmap_atomic(page); chunk = min_t(unsigned int, len, PAGE_SIZE); - rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); + rem = memcpy_mcsafe(mem + off, pmem_addr, chunk); kunmap_atomic(mem); - if (rc) + if (rem) return BLK_STS_IOERR; len -= chunk; off = 0; diff --git a/include/linux/string.h b/include/linux/string.h index dd39a690c841..4a5a0eb7df51 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -147,8 +147,8 @@ extern int memcmp(const void *,const void
你可以回复我吗/////// ,
[PATCH v2 6/9] dax: introduce a ->copy_to_iter dax operation
Similar to the ->copy_from_iter() operation, a platform may want to deploy an architecture or device specific routine for handling reads from a dax_device like /dev/pmemX. On x86 this routine will point to a machine check safe version of copy_to_iter(). For now, add the plumbing to device-mapper and the dax core. Cc: Ross ZwislerCc: Mike Snitzer Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 10 ++ drivers/md/dm-linear.c| 16 drivers/md/dm-log-writes.c| 15 +++ drivers/md/dm-stripe.c| 21 + drivers/md/dm.c | 25 + drivers/nvdimm/pmem.c |7 +++ drivers/s390/block/dcssblk.c |7 +++ fs/dax.c |3 ++- include/linux/dax.h |5 + include/linux/device-mapper.h |5 +++-- 10 files changed, 111 insertions(+), 3 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..31b839113399 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -282,6 +282,16 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_to_iter); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 775c06d953b7..d10964d41fd7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL +#define linear_dax_copy_to_iter NULL #endif static struct target_type linear_target = { @@ -204,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_copy_to_iter = linear_dax_copy_to_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c90c7c08a77f..9ea2b0291f20 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } + +static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), )) + return 0; + return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL +#define log_writes_dax_copy_to_iter NULL #endif static struct target_type log_writes_target = { @@ -982,6 +996,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, + .dax_copy_to_iter = log_writes_dax_copy_to_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index fe7fb9b1aec3..8547d7594338 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, +
[PATCH v2 6/9] dax: introduce a ->copy_to_iter dax operation
Similar to the ->copy_from_iter() operation, a platform may want to deploy an architecture or device specific routine for handling reads from a dax_device like /dev/pmemX. On x86 this routine will point to a machine check safe version of copy_to_iter(). For now, add the plumbing to device-mapper and the dax core. Cc: Ross Zwisler Cc: Mike Snitzer Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 10 ++ drivers/md/dm-linear.c| 16 drivers/md/dm-log-writes.c| 15 +++ drivers/md/dm-stripe.c| 21 + drivers/md/dm.c | 25 + drivers/nvdimm/pmem.c |7 +++ drivers/s390/block/dcssblk.c |7 +++ fs/dax.c |3 ++- include/linux/dax.h |5 + include/linux/device-mapper.h |5 +++-- 10 files changed, 111 insertions(+), 3 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..31b839113399 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -282,6 +282,16 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_to_iter); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 775c06d953b7..d10964d41fd7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), )) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL +#define linear_dax_copy_to_iter NULL #endif static struct target_type linear_target = { @@ -204,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_copy_to_iter = linear_dax_copy_to_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c90c7c08a77f..9ea2b0291f20 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } + +static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), )) + return 0; + return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL +#define log_writes_dax_copy_to_iter NULL #endif static struct target_type log_writes_target = { @@ -982,6 +996,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, + .dax_copy_to_iter = log_writes_dax_copy_to_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index fe7fb9b1aec3..8547d7594338 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + sector_t
[PATCH v2 8/9] pmem: switch to copy_to_iter_mcsafe()
Use the machine check safe version of copy_to_iter() for the ->copy_to_iter() operation published by the pmem driver. Signed-off-by: Dan Williams--- drivers/nvdimm/pmem.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1b8ab48365de..6d3da8c92868 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -267,7 +267,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return copy_to_iter(addr, bytes, i); + return copy_to_iter_mcsafe(addr, bytes, i); } static const struct dax_operations pmem_dax_ops = {
[PATCH v2 8/9] pmem: switch to copy_to_iter_mcsafe()
Use the machine check safe version of copy_to_iter() for the ->copy_to_iter() operation published by the pmem driver. Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1b8ab48365de..6d3da8c92868 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -267,7 +267,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) { - return copy_to_iter(addr, bytes, i); + return copy_to_iter_mcsafe(addr, bytes, i); } static const struct dax_operations pmem_dax_ops = {
[PATCH v2 9/9] x86, nfit_test: unit test for memcpy_mcsafe()
Given the fact that the ACPI "EINJ" (error injection) facility is not universally available, implement software infrastructure to validate the memcpy_mcsafe() exception handling implementation. For each potential read exception point in memcpy_mcsafe(), inject a emulated exception point at the address identified by 'mcsafe_inject' variable. With this infrastructure implement a test to validate that the 'bytes remaining' calculation is correct for a range of various source buffer alignments. This code is compiled out by default. The CONFIG_MCSAFE_DEBUG configuration symbol needs to be manually enabled by editing Kconfig.debug. I.e. this functionality can not be accidentally enabled by a user / distro, it's only for development. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Reported-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/Kconfig.debug |3 ++ arch/x86/include/asm/mcsafe_debug.h | 50 +++ arch/x86/lib/memcpy_64.S|7 + tools/testing/nvdimm/test/nfit.c| 48 ++ 4 files changed, 108 insertions(+) create mode 100644 arch/x86/include/asm/mcsafe_debug.h diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efc..8bdec78a405f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_DEBUG + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/include/asm/mcsafe_debug.h b/arch/x86/include/asm/mcsafe_debug.h new file mode 100644 index ..0f85d24b46c5 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_debug.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_DEBUG_H_ +#define _MCSAFE_DEBUG_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_DEBUG +extern unsigned long mcsafe_inject; + +static inline void set_mcsafe_inject(void *addr) +{ + if (addr) + mcsafe_inject = (unsigned long) addr; + else + mcsafe_inject = ~0UL; +} +#else /* CONFIG_MCSAFE_DEBUG */ +static inline void set_mcsafe_inject(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_DEBUG */ + +#else /* __ASSEMBLY__ */ +#include + +#ifdef CONFIG_MCSAFE_DEBUG +.macro MCSAFE_DEBUG_CTL + .pushsection .data + .align 8 + .globl mcsafe_inject + mcsafe_inject: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_inject) + .popsection +.endm + +.macro MCSAFE_DEBUG offset reg count target + leaq \offset(\reg), %r9 + addq \count, %r9 + cmp mcsafe_inject, %r9 + jg \target +.endm +#else +.macro MCSAFE_DEBUG_CTL +.endm + +.macro MCSAFE_DEBUG offset reg count target +.endm +#endif /* CONFIG_MCSAFE_DEBUG */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_DEBUG_H_ */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c3b527a9f95d..e5f489b2c6ea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,6 +184,9 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_DEBUG_CTL + /* * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. @@ -205,6 +209,7 @@ ENTRY(__memcpy_mcsafe) negl %ecx subl %ecx, %edx .L_read_leading_bytes: + MCSAFE_DEBUG 0 %rsi $1 .E_leading_bytes movb (%rsi), %al .L_write_leading_bytes: movb %al, (%rdi) @@ -220,6 +225,7 @@ ENTRY(__memcpy_mcsafe) jz .L_no_whole_words .L_read_words: + MCSAFE_DEBUG 0 %rsi $8 .E_read_words movq (%rsi), %r8 .L_write_words: movq %r8, (%rdi) @@ -236,6 +242,7 @@ ENTRY(__memcpy_mcsafe) /* Copy trailing bytes */ movl %edx, %ecx .L_read_trailing_bytes: + MCSAFE_DEBUG 0 %rsi $1 .E_trailing_bytes movb (%rsi), %al .L_write_trailing_bytes: movb %al, (%rdi) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4ea385be528f..db04ff658971 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -29,6 +29,8 @@ #include "nfit_test.h" #include "../watermark.h" +#include + /* * Generate an NFIT table to describe the following topology: * @@ -2681,6 +2683,51 @@ static struct platform_driver nfit_test_driver = { .id_table =
[PATCH v2 9/9] x86, nfit_test: unit test for memcpy_mcsafe()
Given the fact that the ACPI "EINJ" (error injection) facility is not universally available, implement software infrastructure to validate the memcpy_mcsafe() exception handling implementation. For each potential read exception point in memcpy_mcsafe(), inject a emulated exception point at the address identified by 'mcsafe_inject' variable. With this infrastructure implement a test to validate that the 'bytes remaining' calculation is correct for a range of various source buffer alignments. This code is compiled out by default. The CONFIG_MCSAFE_DEBUG configuration symbol needs to be manually enabled by editing Kconfig.debug. I.e. this functionality can not be accidentally enabled by a user / distro, it's only for development. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Reported-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/Kconfig.debug |3 ++ arch/x86/include/asm/mcsafe_debug.h | 50 +++ arch/x86/lib/memcpy_64.S|7 + tools/testing/nvdimm/test/nfit.c| 48 ++ 4 files changed, 108 insertions(+) create mode 100644 arch/x86/include/asm/mcsafe_debug.h diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 192e4d2f9efc..8bdec78a405f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -72,6 +72,9 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. +config MCSAFE_DEBUG + def_bool n + config X86_PTDUMP_CORE def_bool n diff --git a/arch/x86/include/asm/mcsafe_debug.h b/arch/x86/include/asm/mcsafe_debug.h new file mode 100644 index ..0f85d24b46c5 --- /dev/null +++ b/arch/x86/include/asm/mcsafe_debug.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MCSAFE_DEBUG_H_ +#define _MCSAFE_DEBUG_H_ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_MCSAFE_DEBUG +extern unsigned long mcsafe_inject; + +static inline void set_mcsafe_inject(void *addr) +{ + if (addr) + mcsafe_inject = (unsigned long) addr; + else + mcsafe_inject = ~0UL; +} +#else /* CONFIG_MCSAFE_DEBUG */ +static inline void set_mcsafe_inject(void *addr) +{ +} +#endif /* CONFIG_MCSAFE_DEBUG */ + +#else /* __ASSEMBLY__ */ +#include + +#ifdef CONFIG_MCSAFE_DEBUG +.macro MCSAFE_DEBUG_CTL + .pushsection .data + .align 8 + .globl mcsafe_inject + mcsafe_inject: + .quad 0 + EXPORT_SYMBOL_GPL(mcsafe_inject) + .popsection +.endm + +.macro MCSAFE_DEBUG offset reg count target + leaq \offset(\reg), %r9 + addq \count, %r9 + cmp mcsafe_inject, %r9 + jg \target +.endm +#else +.macro MCSAFE_DEBUG_CTL +.endm + +.macro MCSAFE_DEBUG offset reg count target +.endm +#endif /* CONFIG_MCSAFE_DEBUG */ +#endif /* __ASSEMBLY__ */ +#endif /* _MCSAFE_DEBUG_H_ */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index c3b527a9f95d..e5f489b2c6ea 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -183,6 +184,9 @@ ENTRY(memcpy_orig) ENDPROC(memcpy_orig) #ifndef CONFIG_UML + +MCSAFE_DEBUG_CTL + /* * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. @@ -205,6 +209,7 @@ ENTRY(__memcpy_mcsafe) negl %ecx subl %ecx, %edx .L_read_leading_bytes: + MCSAFE_DEBUG 0 %rsi $1 .E_leading_bytes movb (%rsi), %al .L_write_leading_bytes: movb %al, (%rdi) @@ -220,6 +225,7 @@ ENTRY(__memcpy_mcsafe) jz .L_no_whole_words .L_read_words: + MCSAFE_DEBUG 0 %rsi $8 .E_read_words movq (%rsi), %r8 .L_write_words: movq %r8, (%rdi) @@ -236,6 +242,7 @@ ENTRY(__memcpy_mcsafe) /* Copy trailing bytes */ movl %edx, %ecx .L_read_trailing_bytes: + MCSAFE_DEBUG 0 %rsi $1 .E_trailing_bytes movb (%rsi), %al .L_write_trailing_bytes: movb %al, (%rdi) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4ea385be528f..db04ff658971 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -29,6 +29,8 @@ #include "nfit_test.h" #include "../watermark.h" +#include + /* * Generate an NFIT table to describe the following topology: * @@ -2681,6 +2683,51 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; +static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +void mcsafe_test(void) +{ + bool do_inject = false; + int i; + + if (IS_ENABLED(CONFIG_MCSAFE_DEBUG)) { + pr_info("%s: run...\n", __func__); +
Re: [PATCH v2 4/4] vsprintf: Add command line option debug_early_boot
On Wed, May 02, 2018 at 09:57:57PM -0700, Kees Cook wrote: > On Wed, May 2, 2018 at 3:50 PM, Tobin C. Hardingwrote: > > Currently printing [hashed] pointers requires either a hw RNG or enough > > entropy to be available. Early in the boot sequence these conditions > > may not be met resulting in a dummy string '(ptrval)' being > > printed. This makes debugging the early boot sequence difficult. We > > can relax the requirement to use cryptographically secure hashing during > > debugging. This enables debugging while keeping development/production > > kernel behaviour the same. > > > > If new command line option debug_early_boot is enabled use > > cryptographically insecure hashing and hash pointer value immediately. > > > > Signed-off-by: Tobin C. Harding > > --- > > Documentation/admin-guide/kernel-parameters.txt | 8 > > lib/vsprintf.c | 18 ++ > > 2 files changed, 26 insertions(+) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > > b/Documentation/admin-guide/kernel-parameters.txt > > index b8d1379aa039..ab619c4ccbf2 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -748,6 +748,14 @@ > > > > debug [KNL] Enable kernel debugging (events log level). > > > > + debug_early_boot > > + [KNL] Enable debugging early in the boot sequence. > > If > > + enabled, we use a weak hash instead of siphash to > > hash > > + pointers. Use this option if you need to see > > pointer > > + values during early boot (i.e you are seeing > > instances > > + of '(___ptrval___)') - cryptographically insecure, > > + please do not use on production kernels. > > + > > debug_locks_verbose= > > [KNL] verbose self-tests > > Format=<0|1> > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c > > index 3697a19c2b25..6c139b442267 100644 > > --- a/lib/vsprintf.c > > +++ b/lib/vsprintf.c > > @@ -1654,6 +1654,18 @@ char *device_node_string(char *buf, char *end, > > struct device_node *dn, > > return widen_string(buf, buf - buf_start, end, spec); > > } > > > > +/* Make pointers available for printing early in the boot sequence. */ > > +static int debug_early_boot; > > Please make this __ro_after_init too. Good suggestion. I forgot, we are supposed to be closing security wholes not opening them :) thanks, Tobin.
[PATCH v2 7/9] dax: report bytes remaining in dax_iomap_actor()
In preparation for protecting the dax read(2) path from media errors with copy_to_iter_mcsafe() (via dax_copy_to_iter()), convert the implementation to report the bytes successfully transferred. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Dan Williams --- fs/dax.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a64afdf7ec0d..34a2d435ae4b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -991,6 +991,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; + size_t xfer; int id; if (iov_iter_rw(iter) == READ) { @@ -1054,19 +1055,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * vfs_write(), depending on which operation we are doing. */ if (iov_iter_rw(iter) == WRITE) - map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = dax_copy_to_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter); - if (map_len <= 0) { - ret = map_len ? map_len : -EFAULT; - break; - } - pos += map_len; - length -= map_len; - done += map_len; + pos += xfer; + length -= xfer; + done += xfer; + + if (xfer == 0) + ret = -EFAULT; + if (xfer < map_len) + break; } dax_read_unlock(id);
Re: [PATCH v2 4/4] vsprintf: Add command line option debug_early_boot
On Wed, May 02, 2018 at 09:57:57PM -0700, Kees Cook wrote: > On Wed, May 2, 2018 at 3:50 PM, Tobin C. Harding wrote: > > Currently printing [hashed] pointers requires either a hw RNG or enough > > entropy to be available. Early in the boot sequence these conditions > > may not be met resulting in a dummy string '(ptrval)' being > > printed. This makes debugging the early boot sequence difficult. We > > can relax the requirement to use cryptographically secure hashing during > > debugging. This enables debugging while keeping development/production > > kernel behaviour the same. > > > > If new command line option debug_early_boot is enabled use > > cryptographically insecure hashing and hash pointer value immediately. > > > > Signed-off-by: Tobin C. Harding > > --- > > Documentation/admin-guide/kernel-parameters.txt | 8 > > lib/vsprintf.c | 18 ++ > > 2 files changed, 26 insertions(+) > > > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > > b/Documentation/admin-guide/kernel-parameters.txt > > index b8d1379aa039..ab619c4ccbf2 100644 > > --- a/Documentation/admin-guide/kernel-parameters.txt > > +++ b/Documentation/admin-guide/kernel-parameters.txt > > @@ -748,6 +748,14 @@ > > > > debug [KNL] Enable kernel debugging (events log level). > > > > + debug_early_boot > > + [KNL] Enable debugging early in the boot sequence. > > If > > + enabled, we use a weak hash instead of siphash to > > hash > > + pointers. Use this option if you need to see > > pointer > > + values during early boot (i.e you are seeing > > instances > > + of '(___ptrval___)') - cryptographically insecure, > > + please do not use on production kernels. > > + > > debug_locks_verbose= > > [KNL] verbose self-tests > > Format=<0|1> > > diff --git a/lib/vsprintf.c b/lib/vsprintf.c > > index 3697a19c2b25..6c139b442267 100644 > > --- a/lib/vsprintf.c > > +++ b/lib/vsprintf.c > > @@ -1654,6 +1654,18 @@ char *device_node_string(char *buf, char *end, > > struct device_node *dn, > > return widen_string(buf, buf - buf_start, end, spec); > > } > > > > +/* Make pointers available for printing early in the boot sequence. */ > > +static int debug_early_boot; > > Please make this __ro_after_init too. Good suggestion. I forgot, we are supposed to be closing security wholes not opening them :) thanks, Tobin.
[PATCH v2 7/9] dax: report bytes remaining in dax_iomap_actor()
In preparation for protecting the dax read(2) path from media errors with copy_to_iter_mcsafe() (via dax_copy_to_iter()), convert the implementation to report the bytes successfully transferred. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Dan Williams --- fs/dax.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index a64afdf7ec0d..34a2d435ae4b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -991,6 +991,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iov_iter *iter = data; loff_t end = pos + length, done = 0; ssize_t ret = 0; + size_t xfer; int id; if (iov_iter_rw(iter) == READ) { @@ -1054,19 +1055,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, * vfs_write(), depending on which operation we are doing. */ if (iov_iter_rw(iter) == WRITE) - map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = dax_copy_to_iter(dax_dev, pgoff, kaddr, + xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr, map_len, iter); - if (map_len <= 0) { - ret = map_len ? map_len : -EFAULT; - break; - } - pos += map_len; - length -= map_len; - done += map_len; + pos += xfer; + length -= xfer; + done += xfer; + + if (xfer == 0) + ret = -EFAULT; + if (xfer < map_len) + break; } dax_read_unlock(id);
[PATCH v2 4/9] x86, memcpy_mcsafe: add write-protection-fault handling
In preparation for using memcpy_mcsafe() to handle user copies it needs to be to handle write-protection faults while writing user pages. Add MMU-fault handlers alongside the machine-check exception handlers. Note that the machine check fault exception handling makes assumptions about source buffer alignment and poison alignment. In the write fault case, given the destination buffer is arbitrarily aligned, it needs a separate / additional fault handling approach. The mcsafe_handle_tail() helper is reused. The @limit argument is set to @len since there is no safety concern about retriggering an MMU fault, and this simplifies the assembly. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Co-developed-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/include/asm/uaccess_64.h |3 +++ arch/x86/lib/memcpy_64.S | 14 ++ arch/x86/lib/usercopy_64.c| 17 + 3 files changed, 34 insertions(+) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398e..c63efc07891f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -194,4 +194,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index f01a88391c98..c3b527a9f95d 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -265,9 +265,23 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) mov %ecx, %eax ret + /* +* For write fault handling, given the destination is unaligned, +* we handle faults on multi-byte writes with a byte-by-byte +* copy up to the write-protected page. +*/ +.E_write_words: + shll$3, %ecx + addl%edx, %ecx + movl%ecx, %edx + jmp mcsafe_handle_tail + .previous _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 75d3776123cc..9787f5ee0cf9 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -75,6 +75,23 @@ copy_user_handle_tail(char *to, char *from, unsigned len) return len; } +/* + * Similar to copy_user_handle_tail, probe for the write fault point, + * but reuse __memcpy_mcsafe in case a new read error is encountered. + * clac() is handled in _copy_to_iter_mcsafe(). + */ +__visible unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++) { + unsigned long rem = memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB
[PATCH v2 5/9] x86, memcpy_mcsafe: define copy_to_iter_mcsafe()
Use the updated memcpy_mcsafe() implementation to define copy_user_mcsafe() and copy_to_iter_mcsafe(). The most significant difference from typical copy_to_iter() is that the ITER_KVEC and ITER_BVEC iterator types can fail to complete a full transfer. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Dan Williams --- arch/x86/Kconfig |1 + arch/x86/include/asm/uaccess_64.h | 11 +++ include/linux/uio.h | 15 + lib/iov_iter.c| 61 + 4 files changed, 88 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..6ca22706cd64 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_PMEM_APIif X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c63efc07891f..62acb613114b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -47,6 +47,17 @@ copy_user_generic(void *to, const void *from, unsigned len) } static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + ret = memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + +static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { int ret = 0; diff --git a/include/linux/uio.h b/include/linux/uio.h index e67e12adb136..f5766e853a77 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); +#else +#define _copy_to_iter_mcsafe _copy_to_iter +#endif + static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { @@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) return _copy_from_iter_flushcache(addr, bytes, i); } +static __always_inline __must_check +size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return 0; + else + return _copy_to_iter_mcsafe(addr, bytes, i); +} + size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..70ebc8ede143 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +static int copyout_mcsafe(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = copy_to_user_mcsafe((__force void *) to, from, n); + } + return n; +} + +static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + unsigned long ret; + char *to; + + to = kmap_atomic(page); + ret = memcpy_mcsafe(to + offset, from, len); + kunmap_atomic(to); + + return ret; +} + +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +{ + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, + copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr =
[PATCH v2 5/9] x86, memcpy_mcsafe: define copy_to_iter_mcsafe()
Use the updated memcpy_mcsafe() implementation to define copy_user_mcsafe() and copy_to_iter_mcsafe(). The most significant difference from typical copy_to_iter() is that the ITER_KVEC and ITER_BVEC iterator types can fail to complete a full transfer. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Dan Williams --- arch/x86/Kconfig |1 + arch/x86/include/asm/uaccess_64.h | 11 +++ include/linux/uio.h | 15 + lib/iov_iter.c| 61 + 4 files changed, 88 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..6ca22706cd64 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_PMEM_APIif X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c63efc07891f..62acb613114b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -47,6 +47,17 @@ copy_user_generic(void *to, const void *from, unsigned len) } static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + ret = memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + +static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { int ret = 0; diff --git a/include/linux/uio.h b/include/linux/uio.h index e67e12adb136..f5766e853a77 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -154,6 +154,12 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); #define _copy_from_iter_flushcache _copy_from_iter_nocache #endif +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i); +#else +#define _copy_to_iter_mcsafe _copy_to_iter +#endif + static __always_inline __must_check size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { @@ -163,6 +169,15 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) return _copy_from_iter_flushcache(addr, bytes, i); } +static __always_inline __must_check +size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i) +{ + if (unlikely(!check_copy_size(addr, bytes, false))) + return 0; + else + return _copy_to_iter_mcsafe(addr, bytes, i); +} + size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 970212670b6a..70ebc8ede143 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -573,6 +573,67 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(_copy_to_iter); +#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE +static int copyout_mcsafe(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = copy_to_user_mcsafe((__force void *) to, from, n); + } + return n; +} + +static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, + const char *from, size_t len) +{ + unsigned long ret; + char *to; + + to = kmap_atomic(page); + ret = memcpy_mcsafe(to + offset, from, len); + kunmap_atomic(to); + + return ret; +} + +size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) +{ + const char *from = addr; + unsigned long rem, curr_addr, s_addr = (unsigned long) addr; + + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + if (iter_is_iovec(i)) + might_fault(); + iterate_and_advance(i, bytes, v, + copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), + ({ + rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len); + if (rem) { + curr_addr = (unsigned long) from; + bytes = curr_addr - s_addr - rem; + return bytes; + } + }), + ({ + rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) -
[PATCH v2 4/9] x86, memcpy_mcsafe: add write-protection-fault handling
In preparation for using memcpy_mcsafe() to handle user copies it needs to be to handle write-protection faults while writing user pages. Add MMU-fault handlers alongside the machine-check exception handlers. Note that the machine check fault exception handling makes assumptions about source buffer alignment and poison alignment. In the write fault case, given the destination buffer is arbitrarily aligned, it needs a separate / additional fault handling approach. The mcsafe_handle_tail() helper is reused. The @limit argument is set to @len since there is no safety concern about retriggering an MMU fault, and this simplifies the assembly. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Co-developed-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/include/asm/uaccess_64.h |3 +++ arch/x86/lib/memcpy_64.S | 14 ++ arch/x86/lib/usercopy_64.c| 17 + 3 files changed, 34 insertions(+) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398e..c63efc07891f 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -194,4 +194,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index f01a88391c98..c3b527a9f95d 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -265,9 +265,23 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) mov %ecx, %eax ret + /* +* For write fault handling, given the destination is unaligned, +* we handle faults on multi-byte writes with a byte-by-byte +* copy up to the write-protected page. +*/ +.E_write_words: + shll$3, %ecx + addl%edx, %ecx + movl%ecx, %edx + jmp mcsafe_handle_tail + .previous _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 75d3776123cc..9787f5ee0cf9 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -75,6 +75,23 @@ copy_user_handle_tail(char *to, char *from, unsigned len) return len; } +/* + * Similar to copy_user_handle_tail, probe for the write fault point, + * but reuse __memcpy_mcsafe in case a new read error is encountered. + * clac() is handled in _copy_to_iter_mcsafe(). + */ +__visible unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++) { + unsigned long rem = memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB
[PATCH v2 1/9] x86, memcpy_mcsafe: remove loop unrolling
In preparation for teaching memcpy_mcsafe() to return 'bytes remaining' rather than pass / fail, simplify the implementation to remove loop unrolling. The unrolling complicates the fault handling for negligible benefit given modern CPUs perform loop stream detection. Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Reported-by: Linus Torvalds Signed-off-by: Dan Williams --- arch/x86/include/asm/string_64.h |4 +-- arch/x86/lib/memcpy_64.S | 59 ++ 2 files changed, 12 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..4752f8984923 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..54c971892db5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled) jnz .L_copy_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_copy_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_copy_words /* Any trailing bytes? */ .L_no_whole_words: @@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" /* Return -EFAULT for any failure */ @@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) .previous _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) +
[PATCH v2 2/9] x86, memcpy_mcsafe: add labels for write fault handling
The memcpy_mcsafe() implementation handles CPU exceptions when reading from the source address. Before it can be used for user copies it needs to grow support for handling write faults. In preparation for adding that exception handling update the labels for the read cache word X case (.L_cache_rX) and write cache word X case (.L_cache_wX). Cc:Cc: Ingo Molnar Cc: Borislav Petkov Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Reported-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/lib/memcpy_64.S | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 54c971892db5..5709f3ec22a4 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -204,13 +204,14 @@ ENTRY(__memcpy_mcsafe) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: movl %edx, %ecx @@ -218,13 +219,14 @@ ENTRY(__memcpy_mcsafe) shrl $3, %ecx jz .L_no_whole_words -.L_copy_words: +.L_read_words: movq (%rsi), %r8 +.L_write_words: movq %r8, (%rdi) addq $8, %rsi addq $8, %rdi decl %ecx - jnz .L_copy_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -233,13 +235,14 @@ ENTRY(__memcpy_mcsafe) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: @@ -256,7 +259,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) #endif
[PATCH v2 1/9] x86, memcpy_mcsafe: remove loop unrolling
In preparation for teaching memcpy_mcsafe() to return 'bytes remaining' rather than pass / fail, simplify the implementation to remove loop unrolling. The unrolling complicates the fault handling for negligible benefit given modern CPUs perform loop stream detection. Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Tony Luck Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Reported-by: Linus Torvalds Signed-off-by: Dan Williams --- arch/x86/include/asm/string_64.h |4 +-- arch/x86/lib/memcpy_64.S | 59 ++ 2 files changed, 12 insertions(+), 51 deletions(-) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..4752f8984923 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,7 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check int __memcpy_mcsafe(void *dst, const void *src, size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -138,7 +138,7 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..54c971892db5 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -213,49 +213,18 @@ ENTRY(memcpy_mcsafe_unrolled) jnz .L_copy_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_copy_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_copy_words /* Any trailing bytes? */ .L_no_whole_words: @@ -276,8 +245,8 @@ ENTRY(memcpy_mcsafe_unrolled) .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" /* Return -EFAULT for any failure */ @@ -288,14 +257,6 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) .previous _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) #endif
[PATCH v2 2/9] x86, memcpy_mcsafe: add labels for write fault handling
The memcpy_mcsafe() implementation handles CPU exceptions when reading from the source address. Before it can be used for user copies it needs to grow support for handling write faults. In preparation for adding that exception handling update the labels for the read cache word X case (.L_cache_rX) and write cache word X case (.L_cache_wX). Cc: Cc: Ingo Molnar Cc: Borislav Petkov Cc: Al Viro Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Reported-by: Tony Luck Signed-off-by: Dan Williams --- arch/x86/lib/memcpy_64.S | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 54c971892db5..5709f3ec22a4 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -204,13 +204,14 @@ ENTRY(__memcpy_mcsafe) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: movl %edx, %ecx @@ -218,13 +219,14 @@ ENTRY(__memcpy_mcsafe) shrl $3, %ecx jz .L_no_whole_words -.L_copy_words: +.L_read_words: movq (%rsi), %r8 +.L_write_words: movq %r8, (%rdi) addq $8, %rsi addq $8, %rdi decl %ecx - jnz .L_copy_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -233,13 +235,14 @@ ENTRY(__memcpy_mcsafe) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: @@ -256,7 +259,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_words, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .L_memcpy_mcsafe_fail) #endif
[PATCH v2 0/9] use memcpy_mcsafe() for copy_to_iter()
Changes since v1 [1]: * Remove the loop unrolling in the assembly implementation since it significantly complicates the exception handling (Linus) * Introduce a ->copy_to_iter() dax operation for symmetry with the existing ->copy_from_iter() operation to allow platform / device-specific implementations. [1]: https://lists.01.org/pipermail/linux-nvdimm/2018-May/015548.html --- Currently memcpy_mcsafe() is only deployed in the pmem driver when reading through a /dev/pmemX block device. However, a filesystem in dax mode mounted on a /dev/pmemX block device will bypass the block layer and the driver for reads. The filesystem-dax (fsdax) read case uses dax_direct_access() and copy_to_iter() to bypass the block layer. The result of the bypass is that the kernel treats machine checks during read as system fatal (reboot) when they could simply be flagged as an I/O error, similar to performing reads through the pmem driver. Prevent this fatal condition by deploying memcpy_mcsafe() in the fsdax read path. The main differences between this copy_to_user_mcsafe() and copy_user_generic_unrolled() are: * Typical tail/residue handling after a fault retries the copy byte-by-byte until the fault happens again. Re-triggering machine checks is potentially fatal so the implementation uses source alignment and poison alignment assumptions to avoid re-triggering machine checks. * SMAP coordination is handled external to the assembly with __uaccess_begin() and __uaccess_end(). * ITER_KVEC and ITER_BVEC can now end prematurely with an error. The new MCSAFE_DEBUG facility is proposed as a way to unit test the exception handling without requiring an ACPI EINJ capable platform. --- Dan Williams (9): x86, memcpy_mcsafe: remove loop unrolling x86, memcpy_mcsafe: add labels for write fault handling x86, memcpy_mcsafe: return bytes remaining x86, memcpy_mcsafe: add write-protection-fault handling x86, memcpy_mcsafe: define copy_to_iter_mcsafe() dax: introduce a ->copy_to_iter dax operation dax: report bytes remaining in dax_iomap_actor() pmem: switch to copy_to_iter_mcsafe() x86, nfit_test: unit test for memcpy_mcsafe() arch/x86/Kconfig|1 arch/x86/Kconfig.debug |3 + arch/x86/include/asm/mcsafe_debug.h | 50 arch/x86/include/asm/string_64.h| 10 ++- arch/x86/include/asm/uaccess_64.h | 14 arch/x86/lib/memcpy_64.S| 109 --- arch/x86/lib/usercopy_64.c | 17 + drivers/dax/super.c | 10 +++ drivers/md/dm-linear.c | 16 + drivers/md/dm-log-writes.c | 15 + drivers/md/dm-stripe.c | 21 +++ drivers/md/dm.c | 25 drivers/nvdimm/claim.c |3 + drivers/nvdimm/pmem.c | 13 +++- drivers/s390/block/dcssblk.c|7 ++ fs/dax.c| 21 --- include/linux/dax.h |5 ++ include/linux/device-mapper.h |5 +- include/linux/string.h |4 + include/linux/uio.h | 15 + lib/iov_iter.c | 61 tools/testing/nvdimm/test/nfit.c| 48 +++ 22 files changed, 394 insertions(+), 79 deletions(-) create mode 100644 arch/x86/include/asm/mcsafe_debug.h
[PATCH v2 0/9] use memcpy_mcsafe() for copy_to_iter()
Changes since v1 [1]: * Remove the loop unrolling in the assembly implementation since it significantly complicates the exception handling (Linus) * Introduce a ->copy_to_iter() dax operation for symmetry with the existing ->copy_from_iter() operation to allow platform / device-specific implementations. [1]: https://lists.01.org/pipermail/linux-nvdimm/2018-May/015548.html --- Currently memcpy_mcsafe() is only deployed in the pmem driver when reading through a /dev/pmemX block device. However, a filesystem in dax mode mounted on a /dev/pmemX block device will bypass the block layer and the driver for reads. The filesystem-dax (fsdax) read case uses dax_direct_access() and copy_to_iter() to bypass the block layer. The result of the bypass is that the kernel treats machine checks during read as system fatal (reboot) when they could simply be flagged as an I/O error, similar to performing reads through the pmem driver. Prevent this fatal condition by deploying memcpy_mcsafe() in the fsdax read path. The main differences between this copy_to_user_mcsafe() and copy_user_generic_unrolled() are: * Typical tail/residue handling after a fault retries the copy byte-by-byte until the fault happens again. Re-triggering machine checks is potentially fatal so the implementation uses source alignment and poison alignment assumptions to avoid re-triggering machine checks. * SMAP coordination is handled external to the assembly with __uaccess_begin() and __uaccess_end(). * ITER_KVEC and ITER_BVEC can now end prematurely with an error. The new MCSAFE_DEBUG facility is proposed as a way to unit test the exception handling without requiring an ACPI EINJ capable platform. --- Dan Williams (9): x86, memcpy_mcsafe: remove loop unrolling x86, memcpy_mcsafe: add labels for write fault handling x86, memcpy_mcsafe: return bytes remaining x86, memcpy_mcsafe: add write-protection-fault handling x86, memcpy_mcsafe: define copy_to_iter_mcsafe() dax: introduce a ->copy_to_iter dax operation dax: report bytes remaining in dax_iomap_actor() pmem: switch to copy_to_iter_mcsafe() x86, nfit_test: unit test for memcpy_mcsafe() arch/x86/Kconfig|1 arch/x86/Kconfig.debug |3 + arch/x86/include/asm/mcsafe_debug.h | 50 arch/x86/include/asm/string_64.h| 10 ++- arch/x86/include/asm/uaccess_64.h | 14 arch/x86/lib/memcpy_64.S| 109 --- arch/x86/lib/usercopy_64.c | 17 + drivers/dax/super.c | 10 +++ drivers/md/dm-linear.c | 16 + drivers/md/dm-log-writes.c | 15 + drivers/md/dm-stripe.c | 21 +++ drivers/md/dm.c | 25 drivers/nvdimm/claim.c |3 + drivers/nvdimm/pmem.c | 13 +++- drivers/s390/block/dcssblk.c|7 ++ fs/dax.c| 21 --- include/linux/dax.h |5 ++ include/linux/device-mapper.h |5 +- include/linux/string.h |4 + include/linux/uio.h | 15 + lib/iov_iter.c | 61 tools/testing/nvdimm/test/nfit.c| 48 +++ 22 files changed, 394 insertions(+), 79 deletions(-) create mode 100644 arch/x86/include/asm/mcsafe_debug.h
[PATCH v15 5/9] PCI/AER: Factor out error reporting from AER
This patch factors out error reporting callbacks, which are currently tightly coupled with AER. DPC should be able to register callbacks and attempt recovery when DPC trigger event occurs. Signed-off-by: Oza Pawandeepdiff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 800e1d4..03f4e0b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 08b4584..b4c9506 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -76,36 +76,6 @@ struct aer_rpc { */ }; -struct aer_broadcast_data { - enum pci_channel_state state; - enum pci_ers_result result; -}; - -static inline pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) -{ - if (new == PCI_ERS_RESULT_NO_AER_DRIVER) - return PCI_ERS_RESULT_NO_AER_DRIVER; - - if (new == PCI_ERS_RESULT_NONE) - return orig; - - switch (orig) { - case PCI_ERS_RESULT_CAN_RECOVER: - case PCI_ERS_RESULT_RECOVERED: - orig = new; - break; - case PCI_ERS_RESULT_DISCONNECT: - if (new == PCI_ERS_RESULT_NEED_RESET) - orig = PCI_ERS_RESULT_NEED_RESET; - break; - default: - break; - } - - return orig; -} - extern struct bus_type pcie_port_bus_type; void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index be4ee3b..51515d1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,191 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int report_error_detected(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(>dev); - dev->error_state = result_data->state; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->error_detected) { - if (result_data->state == pci_channel_io_frozen && - dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - /* -* In case of fatal recovery, if one of down- -* stream device has no driver. We might be -* unable to recover because a later insmod -* of a driver for this device is unaware of -* its hw state. -*/ - pci_printk(KERN_DEBUG, dev, "device has %s\n", - dev->driver ? - "no AER-aware driver" : "no driver"); - } - - /* -* If there's any device in the subtree that does not -* have an error_detected callback, returning -* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of -* the subsequent mmio_enabled/slot_reset/resume -* callbacks of "any" device in the subtree. All the -* devices in the subtree are left in the error state -* without recovery. -*/ - - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else - vote = PCI_ERS_RESULT_NONE; - } else { - err_handler = dev->driver->err_handler; - vote = err_handler->error_detected(dev, result_data->state); - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - } - - result_data->result = merge_result(result_data->result, vote); - device_unlock(>dev); - return 0; -} - -static int report_mmio_enabled(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(>dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->mmio_enabled) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->mmio_enabled(dev); - result_data->result =
[PATCH v15 5/9] PCI/AER: Factor out error reporting from AER
This patch factors out error reporting callbacks, which are currently tightly coupled with AER. DPC should be able to register callbacks and attempt recovery when DPC trigger event occurs. Signed-off-by: Oza Pawandeep diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 800e1d4..03f4e0b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o +pcieportdrv-y := portdrv_core.o portdrv_pci.o err.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 08b4584..b4c9506 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -76,36 +76,6 @@ struct aer_rpc { */ }; -struct aer_broadcast_data { - enum pci_channel_state state; - enum pci_ers_result result; -}; - -static inline pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) -{ - if (new == PCI_ERS_RESULT_NO_AER_DRIVER) - return PCI_ERS_RESULT_NO_AER_DRIVER; - - if (new == PCI_ERS_RESULT_NONE) - return orig; - - switch (orig) { - case PCI_ERS_RESULT_CAN_RECOVER: - case PCI_ERS_RESULT_RECOVERED: - orig = new; - break; - case PCI_ERS_RESULT_DISCONNECT: - if (new == PCI_ERS_RESULT_NEED_RESET) - orig = PCI_ERS_RESULT_NEED_RESET; - break; - default: - break; - } - - return orig; -} - extern struct bus_type pcie_port_bus_type; void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index be4ee3b..51515d1 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,191 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int report_error_detected(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(>dev); - dev->error_state = result_data->state; - - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->error_detected) { - if (result_data->state == pci_channel_io_frozen && - dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { - /* -* In case of fatal recovery, if one of down- -* stream device has no driver. We might be -* unable to recover because a later insmod -* of a driver for this device is unaware of -* its hw state. -*/ - pci_printk(KERN_DEBUG, dev, "device has %s\n", - dev->driver ? - "no AER-aware driver" : "no driver"); - } - - /* -* If there's any device in the subtree that does not -* have an error_detected callback, returning -* PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of -* the subsequent mmio_enabled/slot_reset/resume -* callbacks of "any" device in the subtree. All the -* devices in the subtree are left in the error state -* without recovery. -*/ - - if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) - vote = PCI_ERS_RESULT_NO_AER_DRIVER; - else - vote = PCI_ERS_RESULT_NONE; - } else { - err_handler = dev->driver->err_handler; - vote = err_handler->error_detected(dev, result_data->state); - pci_uevent_ers(dev, PCI_ERS_RESULT_NONE); - } - - result_data->result = merge_result(result_data->result, vote); - device_unlock(>dev); - return 0; -} - -static int report_mmio_enabled(struct pci_dev *dev, void *data) -{ - pci_ers_result_t vote; - const struct pci_error_handlers *err_handler; - struct aer_broadcast_data *result_data; - result_data = (struct aer_broadcast_data *) data; - - device_lock(>dev); - if (!dev->driver || - !dev->driver->err_handler || - !dev->driver->err_handler->mmio_enabled) - goto out; - - err_handler = dev->driver->err_handler; - vote = err_handler->mmio_enabled(dev); - result_data->result = merge_result(result_data->result, vote);
Re: [PATCH] kernel/exit.c: pointer sighand could be uninitialized
On Wed, May 02, 2018 at 06:48:57PM -0700, Yizhuo Zhai wrote: > Variable 'sighand' could be uninitialized if probe_kernel_address fails > (-EFAULT). The later use in the if statement may lead to undefined behavior. Excuse me, but that's nonsense. The value *copied* into it (in case probe_kernel_address() has not failed) may be just as uninitialized. If mere "compare uninitialized pointer value to NULL" can cause nasal demons to fly, * we are screwed anyway * the piece of crap compiler should be printed on sandpaper and used to polish its authors. Read the comments in there, please. Especially the one regarding the second case.
[PATCH v15 6/9] PCI/PORTDRV: Implement generic find service
This patch implements generic pcie_port_find_service() routine. Signed-off-by: Oza PawandeepReviewed-by: Keith Busch diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 51515d1..a525296 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,32 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int find_aer_service_iter(struct device *device, void *data) -{ - struct pcie_port_service_driver *service_driver, **drv; - - drv = (struct pcie_port_service_driver **) data; - - if (device->bus == _port_bus_type && device->driver) { - service_driver = to_service_driver(device->driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - *drv = service_driver; - return 1; - } - } - - return 0; -} - -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) -{ - struct pcie_port_service_driver *drv = NULL; - - device_for_each_child(>dev, , find_aer_service_iter); - - return drv; -} - /** * handle_error_source - handle logging error into an event log * @aerdev: pointer to pcie_device data structure of the root port diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 55df974..877785d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -195,10 +195,8 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) udev = dev->bus->self; } -#if IS_ENABLED(CONFIG_PCIEAER) /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); -#endif + driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); if (driver && driver->reset_link) { status = driver->reset_link(udev); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 47c9824..ba6c963 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -112,5 +112,6 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev); +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index c9c0663..d843055 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -18,6 +18,10 @@ #include "../pci.h" #include "portdrv.h" +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + u32 service; +}; /** * release_pcie_device - free PCI Express port service device structure @@ -398,6 +402,46 @@ static int remove_iter(struct device *dev, void *data) return 0; } +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == _port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + return 1; + } + } + + return 0; +} +/** + * pcie_port_find_service - find the service driver + * @dev: PCI Express port the service devices associated with + * @service: Service to find + * + * Find PCI Express port service driver associated with given service + */ +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service) +{ + struct pcie_port_service_driver *drv; + struct portdrv_service_data pdrvs; + + pdrvs.drv = NULL; + pdrvs.service = service; + device_for_each_child(>dev, , find_service_iter); + + drv = pdrvs.drv; + return drv; +} + /** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with -- 2.7.4
Re: [PATCH] kernel/exit.c: pointer sighand could be uninitialized
On Wed, May 02, 2018 at 06:48:57PM -0700, Yizhuo Zhai wrote: > Variable 'sighand' could be uninitialized if probe_kernel_address fails > (-EFAULT). The later use in the if statement may lead to undefined behavior. Excuse me, but that's nonsense. The value *copied* into it (in case probe_kernel_address() has not failed) may be just as uninitialized. If mere "compare uninitialized pointer value to NULL" can cause nasal demons to fly, * we are screwed anyway * the piece of crap compiler should be printed on sandpaper and used to polish its authors. Read the comments in there, please. Especially the one regarding the second case.
[PATCH v15 6/9] PCI/PORTDRV: Implement generic find service
This patch implements generic pcie_port_find_service() routine. Signed-off-by: Oza Pawandeep Reviewed-by: Keith Busch diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 51515d1..a525296 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -228,32 +228,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -static int find_aer_service_iter(struct device *device, void *data) -{ - struct pcie_port_service_driver *service_driver, **drv; - - drv = (struct pcie_port_service_driver **) data; - - if (device->bus == _port_bus_type && device->driver) { - service_driver = to_service_driver(device->driver); - if (service_driver->service == PCIE_PORT_SERVICE_AER) { - *drv = service_driver; - return 1; - } - } - - return 0; -} - -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev) -{ - struct pcie_port_service_driver *drv = NULL; - - device_for_each_child(>dev, , find_aer_service_iter); - - return drv; -} - /** * handle_error_source - handle logging error into an event log * @aerdev: pointer to pcie_device data structure of the root port diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 55df974..877785d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -195,10 +195,8 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) udev = dev->bus->self; } -#if IS_ENABLED(CONFIG_PCIEAER) /* Use the aer driver of the component firstly */ - driver = find_aer_service(udev); -#endif + driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); if (driver && driver->reset_link) { status = driver->reset_link(udev); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 47c9824..ba6c963 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -112,5 +112,6 @@ static inline bool pcie_pme_no_msi(void) { return false; } static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ -struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev); +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index c9c0663..d843055 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -18,6 +18,10 @@ #include "../pci.h" #include "portdrv.h" +struct portdrv_service_data { + struct pcie_port_service_driver *drv; + u32 service; +}; /** * release_pcie_device - free PCI Express port service device structure @@ -398,6 +402,46 @@ static int remove_iter(struct device *dev, void *data) return 0; } +static int find_service_iter(struct device *device, void *data) +{ + struct pcie_port_service_driver *service_driver; + struct portdrv_service_data *pdrvs; + u32 service; + + pdrvs = (struct portdrv_service_data *) data; + service = pdrvs->service; + + if (device->bus == _port_bus_type && device->driver) { + service_driver = to_service_driver(device->driver); + if (service_driver->service == service) { + pdrvs->drv = service_driver; + return 1; + } + } + + return 0; +} +/** + * pcie_port_find_service - find the service driver + * @dev: PCI Express port the service devices associated with + * @service: Service to find + * + * Find PCI Express port service driver associated with given service + */ +struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, + u32 service) +{ + struct pcie_port_service_driver *drv; + struct portdrv_service_data pdrvs; + + pdrvs.drv = NULL; + pdrvs.service = service; + device_for_each_child(>dev, , find_service_iter); + + drv = pdrvs.drv; + return drv; +} + /** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with -- 2.7.4
[PATCH v15 7/9] PCI/PORTDRV: Implement generic find device
This patch implements generic pcie_port_find_device() routine. Signed-off-by: Oza PawandeepReviewed-by: Keith Busch diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index ba6c963..896608a 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -114,4 +114,6 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, u32 service); +struct device *pcie_port_find_device(struct pci_dev *dev, +u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index d843055..c6147c4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -20,6 +20,7 @@ #include "portdrv.h" struct portdrv_service_data { struct pcie_port_service_driver *drv; + struct device *dev; u32 service; }; @@ -415,6 +416,7 @@ static int find_service_iter(struct device *device, void *data) service_driver = to_service_driver(device->driver); if (service_driver->service == service) { pdrvs->drv = service_driver; + pdrvs->dev = device; return 1; } } @@ -443,6 +445,27 @@ struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, } /** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service devices associated with + * @service: For the service to find + * + * Find PCI Express port service driver associated with given service + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(>dev, , find_service_iter); + + device = pdrvs.dev; + return device; +} + +/** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with * -- 2.7.4
[PATCH v15 7/9] PCI/PORTDRV: Implement generic find device
This patch implements generic pcie_port_find_device() routine. Signed-off-by: Oza Pawandeep Reviewed-by: Keith Busch diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index ba6c963..896608a 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -114,4 +114,6 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, u32 service); +struct device *pcie_port_find_device(struct pci_dev *dev, +u32 service); #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index d843055..c6147c4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -20,6 +20,7 @@ #include "portdrv.h" struct portdrv_service_data { struct pcie_port_service_driver *drv; + struct device *dev; u32 service; }; @@ -415,6 +416,7 @@ static int find_service_iter(struct device *device, void *data) service_driver = to_service_driver(device->driver); if (service_driver->service == service) { pdrvs->drv = service_driver; + pdrvs->dev = device; return 1; } } @@ -443,6 +445,27 @@ struct pcie_port_service_driver *pcie_port_find_service(struct pci_dev *dev, } /** + * pcie_port_find_device - find the struct device + * @dev: PCI Express port the service devices associated with + * @service: For the service to find + * + * Find PCI Express port service driver associated with given service + */ +struct device *pcie_port_find_device(struct pci_dev *dev, + u32 service) +{ + struct device *device; + struct portdrv_service_data pdrvs; + + pdrvs.dev = NULL; + pdrvs.service = service; + device_for_each_child(>dev, , find_service_iter); + + device = pdrvs.dev; + return device; +} + +/** * pcie_port_device_remove - unregister PCI Express port service devices * @dev: PCI Express port the service devices to unregister are associated with * -- 2.7.4
[PATCH v15 8/9] PCI/DPC: Unify and plumb error handling into DPC
Current DPC driver does not do recovery, e.g. calling end-point's driver's callbacks, which sanitize the sw. DPC driver implements link_reset callback, and calls pci_do_recovery(). Signed-off-by: Oza Pawandeepdiff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 80ec384..aed7c9f 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -73,29 +73,21 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc) pcie_wait_for_link(pdev, false); } -static void dpc_work(struct work_struct *work) +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { - struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); - struct pci_dev *dev, *temp, *pdev = dpc->dev->port; - struct pci_bus *parent = pdev->subordinate; - u16 cap = dpc->cap_pos, ctl; - - pci_lock_rescan_remove(); - list_for_each_entry_safe_reverse(dev, temp, >devices, -bus_list) { - pci_dev_get(dev); - pci_dev_set_disconnected(dev, NULL); - if (pci_has_subordinate(dev)) - pci_walk_bus(dev->subordinate, -pci_dev_set_disconnected, NULL); - pci_stop_and_remove_bus_device(dev); - pci_dev_put(dev); - } - pci_unlock_rescan_remove(); + struct dpc_dev *dpc; + struct pcie_device *pciedev; + struct device *devdpc; + u16 cap, ctl; + + devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC); + pciedev = to_pcie_device(devdpc); + dpc = get_service_data(pciedev); + cap = dpc->cap_pos; dpc_wait_link_inactive(dpc); if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) - return; + return PCI_ERS_RESULT_DISCONNECT; if (dpc->rp_extensions && dpc->rp_pio_status) { pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, dpc->rp_pio_status); @@ -108,6 +100,17 @@ static void dpc_work(struct work_struct *work) pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, ); pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL, ctl | PCI_EXP_DPC_CTL_INT_EN); + + return PCI_ERS_RESULT_RECOVERED; +} + +static void dpc_work(struct work_struct *work) +{ + struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); + struct pci_dev *pdev = dpc->dev->port; + + /* From DPC point of view error is always FATAL. */ + pcie_do_recovery(pdev, DPC_FATAL); } static void dpc_process_rp_pio_error(struct dpc_dev *dpc) @@ -288,6 +291,7 @@ static struct pcie_port_service_driver dpcdriver = { .service= PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, + .reset_link = dpc_reset_link, }; static int __init dpc_service_init(void) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 877785d..526aba8 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -181,11 +181,12 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) return PCI_ERS_RESULT_RECOVERED; } -static pci_ers_result_t reset_link(struct pci_dev *dev) +static pci_ers_result_t reset_link(struct pci_dev *dev, int severity) { struct pci_dev *udev; pci_ers_result_t status; struct pcie_port_service_driver *driver; + u32 service; if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { /* Reset this port for all subordinates */ @@ -196,7 +197,12 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) } /* Use the aer driver of the component firstly */ - driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); + if (severity == DPC_FATAL) + service = PCIE_PORT_SERVICE_DPC; + else + service = PCIE_PORT_SERVICE_AER; + + driver = pcie_port_find_service(udev, service); if (driver && driver->reset_link) { status = driver->reset_link(udev); @@ -302,7 +308,7 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) pci_dev_put(pdev); } - result = reset_link(udev); + result = reset_link(udev, severity); if (result == PCI_ERS_RESULT_RECOVERED) if (pcie_wait_for_link(udev, true)) pci_rescan_bus(udev->bus); @@ -326,7 +332,8 @@ void pcie_do_recovery(struct pci_dev *dev, int severity) pci_ers_result_t status; enum pci_channel_state state; - if (severity == AER_FATAL) { + if ((severity == AER_FATAL) || + (severity == DPC_FATAL)) { status = do_fatal_recovery(dev, severity); if (status != PCI_ERS_RESULT_RECOVERED) goto failed; diff --git a/include/linux/aer.h
[PATCH v15 8/9] PCI/DPC: Unify and plumb error handling into DPC
Current DPC driver does not do recovery, e.g. calling end-point's driver's callbacks, which sanitize the sw. DPC driver implements link_reset callback, and calls pci_do_recovery(). Signed-off-by: Oza Pawandeep diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 80ec384..aed7c9f 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -73,29 +73,21 @@ static void dpc_wait_link_inactive(struct dpc_dev *dpc) pcie_wait_for_link(pdev, false); } -static void dpc_work(struct work_struct *work) +static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { - struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); - struct pci_dev *dev, *temp, *pdev = dpc->dev->port; - struct pci_bus *parent = pdev->subordinate; - u16 cap = dpc->cap_pos, ctl; - - pci_lock_rescan_remove(); - list_for_each_entry_safe_reverse(dev, temp, >devices, -bus_list) { - pci_dev_get(dev); - pci_dev_set_disconnected(dev, NULL); - if (pci_has_subordinate(dev)) - pci_walk_bus(dev->subordinate, -pci_dev_set_disconnected, NULL); - pci_stop_and_remove_bus_device(dev); - pci_dev_put(dev); - } - pci_unlock_rescan_remove(); + struct dpc_dev *dpc; + struct pcie_device *pciedev; + struct device *devdpc; + u16 cap, ctl; + + devdpc = pcie_port_find_device(pdev, PCIE_PORT_SERVICE_DPC); + pciedev = to_pcie_device(devdpc); + dpc = get_service_data(pciedev); + cap = dpc->cap_pos; dpc_wait_link_inactive(dpc); if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc)) - return; + return PCI_ERS_RESULT_DISCONNECT; if (dpc->rp_extensions && dpc->rp_pio_status) { pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, dpc->rp_pio_status); @@ -108,6 +100,17 @@ static void dpc_work(struct work_struct *work) pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, ); pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL, ctl | PCI_EXP_DPC_CTL_INT_EN); + + return PCI_ERS_RESULT_RECOVERED; +} + +static void dpc_work(struct work_struct *work) +{ + struct dpc_dev *dpc = container_of(work, struct dpc_dev, work); + struct pci_dev *pdev = dpc->dev->port; + + /* From DPC point of view error is always FATAL. */ + pcie_do_recovery(pdev, DPC_FATAL); } static void dpc_process_rp_pio_error(struct dpc_dev *dpc) @@ -288,6 +291,7 @@ static struct pcie_port_service_driver dpcdriver = { .service= PCIE_PORT_SERVICE_DPC, .probe = dpc_probe, .remove = dpc_remove, + .reset_link = dpc_reset_link, }; static int __init dpc_service_init(void) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 877785d..526aba8 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -181,11 +181,12 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev) return PCI_ERS_RESULT_RECOVERED; } -static pci_ers_result_t reset_link(struct pci_dev *dev) +static pci_ers_result_t reset_link(struct pci_dev *dev, int severity) { struct pci_dev *udev; pci_ers_result_t status; struct pcie_port_service_driver *driver; + u32 service; if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { /* Reset this port for all subordinates */ @@ -196,7 +197,12 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) } /* Use the aer driver of the component firstly */ - driver = pcie_port_find_service(udev, PCIE_PORT_SERVICE_AER); + if (severity == DPC_FATAL) + service = PCIE_PORT_SERVICE_DPC; + else + service = PCIE_PORT_SERVICE_AER; + + driver = pcie_port_find_service(udev, service); if (driver && driver->reset_link) { status = driver->reset_link(udev); @@ -302,7 +308,7 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) pci_dev_put(pdev); } - result = reset_link(udev); + result = reset_link(udev, severity); if (result == PCI_ERS_RESULT_RECOVERED) if (pcie_wait_for_link(udev, true)) pci_rescan_bus(udev->bus); @@ -326,7 +332,8 @@ void pcie_do_recovery(struct pci_dev *dev, int severity) pci_ers_result_t status; enum pci_channel_state state; - if (severity == AER_FATAL) { + if ((severity == AER_FATAL) || + (severity == DPC_FATAL)) { status = do_fatal_recovery(dev, severity); if (status != PCI_ERS_RESULT_RECOVERED) goto failed; diff --git a/include/linux/aer.h b/include/linux/aer.h index
[PATCH v15 2/9] pci-error-recovery: Add AER_FATAL handling
It adds description on AER_FATAL error handling. Signed-off-by: Oza Pawandeepdiff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt index 0b6bb3e..688b691 100644 --- a/Documentation/PCI/pci-error-recovery.txt +++ b/Documentation/PCI/pci-error-recovery.txt @@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error event will be platform-dependent, but will follow the general sequence described below. -STEP 0: Error Event +STEP 0: Error Event: ERR_NONFATAL --- A PCI bus error is detected by the PCI hardware. On powerpc, the slot is isolated, in that all I/O is blocked: all reads return 0x, @@ -228,13 +228,7 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations). If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform proceeds to STEP 4 (Slot Reset) -STEP 3: Link Reset --- -The platform resets the link. This is a PCI-Express specific step -and is done whenever a fatal error has been detected that can be -"solved" by resetting the link. - -STEP 4: Slot Reset +STEP 3: Slot Reset -- In response to a return value of PCI_ERS_RESULT_NEED_RESET, the @@ -320,7 +314,7 @@ Failure). >>> However, it probably should. -STEP 5: Resume Operations +STEP 4: Resume Operations - The platform will call the resume() callback on all affected device drivers if all drivers on the segment have returned @@ -332,7 +326,7 @@ a result code. At this point, if a new error happens, the platform will restart a new error recovery sequence. -STEP 6: Permanent Failure +STEP 5: Permanent Failure - A "permanent failure" has occurred, and the platform cannot recover the device. The platform will call error_detected() with a @@ -355,6 +349,27 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt for additional detail on real-life experience of the causes of software errors. +STEP 0: Error Event: ERR_FATAL +--- +PCI bus error is detected by the PCI hardware. On powerpc, the slot is +isolated, in that all I/O is blocked: all reads return 0x, all +writes are ignored. + +STEP 1: Remove devices + +Platform removes the devices depending on the error agent, it could be +this port for all subordinates or upstream component (likely downstream +port) + +STEP 2: Reset link + +The platform resets the link. This is a PCI-Express specific step and is +done whenever a fatal error has been detected that can be "solved" by +resetting the link. + +STEP 3: Re-enumerate the devices + +Initiates the re-enumeration. Conclusion; General Remarks --- -- 2.7.4
[PATCH v15 9/9] PCI/DPC: Disable ERR_NONFATAL and enable ERR_FATAL for DPC
This patch disables ERR_NONFATAL trigger for DPC, so now DPC handles only ERR_FATAL. Signed-off-by: Oza Pawandeepdiff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index aed7c9f..6966e00 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -263,7 +263,7 @@ static int dpc_probe(struct pcie_device *dev) } } - ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN; + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -281,7 +281,7 @@ static void dpc_remove(struct pcie_device *dev) u16 ctl; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ); - ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 103ba79..86f1cc2 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -981,6 +981,7 @@ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL6 /* DPC control */ +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ #define PCI_EXP_DPC_CTL_INT_EN0x0008 /* DPC Interrupt Enable */ -- 2.7.4
[PATCH v15 2/9] pci-error-recovery: Add AER_FATAL handling
It adds description on AER_FATAL error handling. Signed-off-by: Oza Pawandeep diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt index 0b6bb3e..688b691 100644 --- a/Documentation/PCI/pci-error-recovery.txt +++ b/Documentation/PCI/pci-error-recovery.txt @@ -110,7 +110,7 @@ The actual steps taken by a platform to recover from a PCI error event will be platform-dependent, but will follow the general sequence described below. -STEP 0: Error Event +STEP 0: Error Event: ERR_NONFATAL --- A PCI bus error is detected by the PCI hardware. On powerpc, the slot is isolated, in that all I/O is blocked: all reads return 0x, @@ -228,13 +228,7 @@ proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations). If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform proceeds to STEP 4 (Slot Reset) -STEP 3: Link Reset --- -The platform resets the link. This is a PCI-Express specific step -and is done whenever a fatal error has been detected that can be -"solved" by resetting the link. - -STEP 4: Slot Reset +STEP 3: Slot Reset -- In response to a return value of PCI_ERS_RESULT_NEED_RESET, the @@ -320,7 +314,7 @@ Failure). >>> However, it probably should. -STEP 5: Resume Operations +STEP 4: Resume Operations - The platform will call the resume() callback on all affected device drivers if all drivers on the segment have returned @@ -332,7 +326,7 @@ a result code. At this point, if a new error happens, the platform will restart a new error recovery sequence. -STEP 6: Permanent Failure +STEP 5: Permanent Failure - A "permanent failure" has occurred, and the platform cannot recover the device. The platform will call error_detected() with a @@ -355,6 +349,27 @@ errors. See the discussion in powerpc/eeh-pci-error-recovery.txt for additional detail on real-life experience of the causes of software errors. +STEP 0: Error Event: ERR_FATAL +--- +PCI bus error is detected by the PCI hardware. On powerpc, the slot is +isolated, in that all I/O is blocked: all reads return 0x, all +writes are ignored. + +STEP 1: Remove devices + +Platform removes the devices depending on the error agent, it could be +this port for all subordinates or upstream component (likely downstream +port) + +STEP 2: Reset link + +The platform resets the link. This is a PCI-Express specific step and is +done whenever a fatal error has been detected that can be "solved" by +resetting the link. + +STEP 3: Re-enumerate the devices + +Initiates the re-enumeration. Conclusion; General Remarks --- -- 2.7.4
[PATCH v15 9/9] PCI/DPC: Disable ERR_NONFATAL and enable ERR_FATAL for DPC
This patch disables ERR_NONFATAL trigger for DPC, so now DPC handles only ERR_FATAL. Signed-off-by: Oza Pawandeep diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index aed7c9f..6966e00 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -263,7 +263,7 @@ static int dpc_probe(struct pcie_device *dev) } } - ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN; + ctl = (ctl & 0xfff4) | PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); dev_info(device, "DPC error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", @@ -281,7 +281,7 @@ static void dpc_remove(struct pcie_device *dev) u16 ctl; pci_read_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ); - ctl &= ~(PCI_EXP_DPC_CTL_EN_NONFATAL | PCI_EXP_DPC_CTL_INT_EN); + ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); pci_write_config_word(pdev, dpc->cap_pos + PCI_EXP_DPC_CTL, ctl); } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 103ba79..86f1cc2 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -981,6 +981,7 @@ #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ #define PCI_EXP_DPC_CTL6 /* DPC control */ +#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ #define PCI_EXP_DPC_CTL_INT_EN0x0008 /* DPC Interrupt Enable */ -- 2.7.4
[PATCH v15 1/9] PCI: Unify wait for link active into generic PCI
Clients such as HP, DPC are using pcie_wait_link_active(), which waits till the link becomes active or inactive. Made generic function and moved it to drivers/pci/pci.c Signed-off-by: Oza Pawandeepdiff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 18a42f8..e0c2b8e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -231,25 +231,11 @@ bool pciehp_check_link_active(struct controller *ctrl) return ret; } -static void __pcie_wait_link_active(struct controller *ctrl, bool active) -{ - int timeout = 1000; - - if (pciehp_check_link_active(ctrl) == active) - return; - while (timeout > 0) { - msleep(10); - timeout -= 10; - if (pciehp_check_link_active(ctrl) == active) - return; - } - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n", - active ? "set" : "cleared"); -} - static void pcie_wait_link_active(struct controller *ctrl) { - __pcie_wait_link_active(ctrl, true); + struct pci_dev *pdev = ctrl_dev(ctrl); + + pcie_wait_for_link(pdev, true); } static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e597655..2e4d1e4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4138,6 +4138,35 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS); } +/** + * pcie_wait_for_link - Wait for link till it's active/inactive + * @pdev: Bridge device + * @active: waiting for active or inactive ? + * + * Use this to wait till link becomes active or inactive. + */ +bool pcie_wait_for_link(struct pci_dev *pdev, bool active) +{ + int timeout = 1000; + bool ret; + u16 lnk_status; + + for (;;) { + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); + if (ret == active) + return true; + if (timeout <= 0) + break; + msleep(10); + timeout -= 10; + } + + pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", +active ? "set" : "cleared"); + + return false; +} void pci_reset_secondary_bus(struct pci_dev *dev) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 023f7cf..cec9d8c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,7 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 8c57d60..80ec384 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -68,19 +68,9 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) static void dpc_wait_link_inactive(struct dpc_dev *dpc) { - unsigned long timeout = jiffies + HZ; struct pci_dev *pdev = dpc->dev->port; - struct device *dev = >dev->device; - u16 lnk_status; - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); - while (lnk_status & PCI_EXP_LNKSTA_DLLLA && - !time_after(jiffies, timeout)) { - msleep(10); - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); - } - if (lnk_status & PCI_EXP_LNKSTA_DLLLA) - dev_warn(dev, "Link state not disabled for DPC event\n"); + pcie_wait_for_link(pdev, false); } static void dpc_work(struct work_struct *work) -- 2.7.4
[PATCH v15 1/9] PCI: Unify wait for link active into generic PCI
Clients such as HP, DPC are using pcie_wait_link_active(), which waits till the link becomes active or inactive. Made generic function and moved it to drivers/pci/pci.c Signed-off-by: Oza Pawandeep diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 18a42f8..e0c2b8e 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -231,25 +231,11 @@ bool pciehp_check_link_active(struct controller *ctrl) return ret; } -static void __pcie_wait_link_active(struct controller *ctrl, bool active) -{ - int timeout = 1000; - - if (pciehp_check_link_active(ctrl) == active) - return; - while (timeout > 0) { - msleep(10); - timeout -= 10; - if (pciehp_check_link_active(ctrl) == active) - return; - } - ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n", - active ? "set" : "cleared"); -} - static void pcie_wait_link_active(struct controller *ctrl) { - __pcie_wait_link_active(ctrl, true); + struct pci_dev *pdev = ctrl_dev(ctrl); + + pcie_wait_for_link(pdev, true); } static bool pci_bus_check_dev(struct pci_bus *bus, int devfn) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e597655..2e4d1e4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4138,6 +4138,35 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return pci_dev_wait(dev, "PM D3->D0", PCIE_RESET_READY_POLL_MS); } +/** + * pcie_wait_for_link - Wait for link till it's active/inactive + * @pdev: Bridge device + * @active: waiting for active or inactive ? + * + * Use this to wait till link becomes active or inactive. + */ +bool pcie_wait_for_link(struct pci_dev *pdev, bool active) +{ + int timeout = 1000; + bool ret; + u16 lnk_status; + + for (;;) { + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); + ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); + if (ret == active) + return true; + if (timeout <= 0) + break; + msleep(10); + timeout -= 10; + } + + pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n", +active ? "set" : "cleared"); + + return false; +} void pci_reset_secondary_bus(struct pci_dev *dev) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 023f7cf..cec9d8c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,7 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index 8c57d60..80ec384 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -68,19 +68,9 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc) static void dpc_wait_link_inactive(struct dpc_dev *dpc) { - unsigned long timeout = jiffies + HZ; struct pci_dev *pdev = dpc->dev->port; - struct device *dev = >dev->device; - u16 lnk_status; - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); - while (lnk_status & PCI_EXP_LNKSTA_DLLLA && - !time_after(jiffies, timeout)) { - msleep(10); - pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, _status); - } - if (lnk_status & PCI_EXP_LNKSTA_DLLLA) - dev_warn(dev, "Link state not disabled for DPC event\n"); + pcie_wait_for_link(pdev, false); } static void dpc_work(struct work_struct *work) -- 2.7.4
[PATCH v15 3/9] PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices
This patch alters the behavior of handling of ERR_FATAL, where removal of devices is initiated, followed by reset link, followed by re-enumeration. So the errors are handled in a different way as follows: ERR_NONFATAL => call driver recovery entry points ERR_FATAL=> remove and re-enumerate please refer to Documentation/PCI/pci-error-recovery.txt for more details. Signed-off-by: Oza Pawandeepdiff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 779b387..206f590 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + /* +* This function is called only on ERR_FATAL now, and since +* the pci_report_resume is called only in ERR_NONFATAL case, +* the clearing part has to be taken care here. +*/ + aer_error_resume(dev); + return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 0ea5acc..655d4e8 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -20,6 +20,7 @@ #include #include #include "aerdrv.h" +#include "../../pci.h" #definePCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -474,6 +475,44 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) return status; } +static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + + if (severity == AER_FATAL) + pci_cleanup_aer_uncorrect_error_status(dev); + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + list_for_each_entry_safe_reverse(pdev, temp, >devices, +bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, +pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev); + if (result == PCI_ERS_RESULT_RECOVERED) + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + + pci_unlock_rescan_remove(); + + return result; +} + /** * do_recovery - handle nonfatal/fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error @@ -485,11 +524,15 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) */ static void do_recovery(struct pci_dev *dev, int severity) { - pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t status; enum pci_channel_state state; - if (severity == AER_FATAL) - state = pci_channel_io_frozen; + if (severity == AER_FATAL) { + status = do_fatal_recovery(dev, severity); + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + return; + } else state = pci_channel_io_normal; @@ -498,12 +541,6 @@ static void do_recovery(struct pci_dev *dev, int severity) "error_detected", report_error_detected); - if (severity == AER_FATAL) { - result = reset_link(dev); - if (result != PCI_ERS_RESULT_RECOVERED) - goto failed; - } - if (status == PCI_ERS_RESULT_CAN_RECOVER) status = broadcast_error_message(dev, state, -- 2.7.4
[PATCH v15 0/9] Address error and recovery for AER and DPC
This patch set brings in error handling support for DPC The current implementation of AER and error message broadcasting to the EP driver is tightly coupled and limited to AER service driver. It is important to factor out broadcasting and other link handling callbacks. So that not only when AER gets triggered, but also when DPC get triggered (for e.g. ERR_FATAL), callbacks are handled appropriately. The goal of the patch-set is: DPC should handle the error handling and recovery similar to AER, because finally both are attempting recovery in some or the other way, and for that error handling and recovery framework has to be loosely coupled. It achieves uniformity and transparency to the error handling agents such as AER, DPC, with respect to recovery and error handling. So, this patch-set tries to unify lot of things between error agents and make them behave in a well defined way. (be it error (FATAL, NON_FATAL) handling or recovery). The FATAL error handling is handled with remove/reset_link/re-enumerate sequence while the NON_FATAL follows the default path. Documentation/PCI/pci-error-recovery.txt talks more on that. Changes since v14: Bjorn's comments addressed > simplified the patch set, and moved AER_FATAL handling in the beginning. > rebase the code to 4.17-rc1. Changes since v13: Bjorn's comments addressed > handke FATAL errors with remove devices followed by re-enumeration. > changes in AER and DPC along with required Documentation. Changes since v12: Bjorn's and Keith's Comments addressed. > Made DPC and AER error handling identical > hanldled cases for hotplug enabled system differently. Changes since v11: Bjorn's comments addressed. > rename pcie-err.c to err.c > removed EXPORT_SYMBOL > made generic find_serivce function in port driver. > removed mutex patch as no need to have mutex in pcie_do_recovery > brough in DPC_FATAL in aer.h > so now all the error codes (AER and DPC) are unified in aer.h Changes since v10: Christoph Hellwig's, David Laight's and Randy Dunlap's comments addressed. > renamed pci_do_recovery to pcie_do_recovery > removed inner braces in conditional statements. > restrctured the code in pci_wait_for_link > EXPORT_SYMBOL_GPL Changes since v9: Sinan's comments addressed. > bool active = true; unnecessary variable removed. Changes since v8: Fixed Kbuild errors. Changes since v7: Rebased the code on pci master > https://kernel.googlesource.com/pub/scm/linux/kernel/git/helgaas/pci Changes since v6: Sinan's and Stefan's comments implemented. > reordered patch 6 and 7 > cleaned up Changes since v5: Sinan's and Keith's comments incorporated. > made separate patch for mutex > unified error repotting codes into driver/pci/pci.h > got rid of wait link active/inactive and made generic function in driver/pci/pci.c Changes since v4: Bjorn's comments incorporated. > Renamed only do_recovery. > moved the things more locally to drivers/pci/pci.h Changes since v3: Bjorn's comments incorporated. > Made separate patch renaming generic pci_err.c > Introduce pci_err.h to contain all the error types and recovery > removed all the dependencies on pci.h Changes since v2: Based on feedback from Keith: " When DPC is triggered due to receipt of an uncorrectable error Message, the Requester ID from the Message is recorded in the DPC Error Source ID register and that Message is discarded and not forwarded Upstream. " Removed the patch where AER checks if DPC service is active Changes since v1: Kbuild errors fixed: > pci_find_dpc_dev made static > ras_event.h updated > pci_find_aer_service call with CONFIG check > pci_find_dpc_service call with CONFIG check Oza Pawandeep (9): PCI: Unify wait for link active into generic PCI pci-error-recovery: Add AER_FATAL handling PCI/AER: Handle ERRR_FATAL with removal and re-enumeration of devices PCI/AER: Rename error recovery to generic PCI naming PCI/AER: Factor out error reporting from AER PCI/PORTDRV: Implement generic find service PCI/PORTDRV: Implement generic find device PCI/DPC: Unify and plumb error handling into DPC PCI/DPC: Disable ERR_NONFATAL and enable ERR_FATAL for DPC Documentation/PCI/pci-error-recovery.txt | 35 ++- drivers/pci/hotplug/pciehp_hpc.c | 20 +- drivers/pci/pci.c| 29 +++ drivers/pci/pci.h| 4 + drivers/pci/pcie/Makefile| 2 +- drivers/pci/pcie/aer/aerdrv.c| 2 + drivers/pci/pcie/aer/aerdrv.h| 30 --- drivers/pci/pcie/aer/aerdrv_core.c | 317 +- drivers/pci/pcie/dpc.c | 58 +++-- drivers/pci/pcie/err.c | 374 +++
[PATCH v15 3/9] PCI/AER: Handle ERR_FATAL with removal and re-enumeration of devices
This patch alters the behavior of handling of ERR_FATAL, where removal of devices is initiated, followed by reset link, followed by re-enumeration. So the errors are handled in a different way as follows: ERR_NONFATAL => call driver recovery entry points ERR_FATAL=> remove and re-enumerate please refer to Documentation/PCI/pci-error-recovery.txt for more details. Signed-off-by: Oza Pawandeep diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 779b387..206f590 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -330,6 +330,13 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); + /* +* This function is called only on ERR_FATAL now, and since +* the pci_report_resume is called only in ERR_NONFATAL case, +* the clearing part has to be taken care here. +*/ + aer_error_resume(dev); + return PCI_ERS_RESULT_RECOVERED; } diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 0ea5acc..655d4e8 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -20,6 +20,7 @@ #include #include #include "aerdrv.h" +#include "../../pci.h" #definePCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -474,6 +475,44 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) return status; } +static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) +{ + struct pci_dev *udev; + struct pci_bus *parent; + struct pci_dev *pdev, *temp; + pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED; + + if (severity == AER_FATAL) + pci_cleanup_aer_uncorrect_error_status(dev); + + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + udev = dev; + else + udev = dev->bus->self; + + parent = udev->subordinate; + pci_lock_rescan_remove(); + list_for_each_entry_safe_reverse(pdev, temp, >devices, +bus_list) { + pci_dev_get(pdev); + pci_dev_set_disconnected(pdev, NULL); + if (pci_has_subordinate(pdev)) + pci_walk_bus(pdev->subordinate, +pci_dev_set_disconnected, NULL); + pci_stop_and_remove_bus_device(pdev); + pci_dev_put(pdev); + } + + result = reset_link(udev); + if (result == PCI_ERS_RESULT_RECOVERED) + if (pcie_wait_for_link(udev, true)) + pci_rescan_bus(udev->bus); + + pci_unlock_rescan_remove(); + + return result; +} + /** * do_recovery - handle nonfatal/fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error @@ -485,11 +524,15 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) */ static void do_recovery(struct pci_dev *dev, int severity) { - pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; + pci_ers_result_t status; enum pci_channel_state state; - if (severity == AER_FATAL) - state = pci_channel_io_frozen; + if (severity == AER_FATAL) { + status = do_fatal_recovery(dev, severity); + if (status != PCI_ERS_RESULT_RECOVERED) + goto failed; + return; + } else state = pci_channel_io_normal; @@ -498,12 +541,6 @@ static void do_recovery(struct pci_dev *dev, int severity) "error_detected", report_error_detected); - if (severity == AER_FATAL) { - result = reset_link(dev); - if (result != PCI_ERS_RESULT_RECOVERED) - goto failed; - } - if (status == PCI_ERS_RESULT_CAN_RECOVER) status = broadcast_error_message(dev, state, -- 2.7.4
[PATCH v15 0/9] Address error and recovery for AER and DPC
This patch set brings in error handling support for DPC The current implementation of AER and error message broadcasting to the EP driver is tightly coupled and limited to AER service driver. It is important to factor out broadcasting and other link handling callbacks. So that not only when AER gets triggered, but also when DPC get triggered (for e.g. ERR_FATAL), callbacks are handled appropriately. The goal of the patch-set is: DPC should handle the error handling and recovery similar to AER, because finally both are attempting recovery in some or the other way, and for that error handling and recovery framework has to be loosely coupled. It achieves uniformity and transparency to the error handling agents such as AER, DPC, with respect to recovery and error handling. So, this patch-set tries to unify lot of things between error agents and make them behave in a well defined way. (be it error (FATAL, NON_FATAL) handling or recovery). The FATAL error handling is handled with remove/reset_link/re-enumerate sequence while the NON_FATAL follows the default path. Documentation/PCI/pci-error-recovery.txt talks more on that. Changes since v14: Bjorn's comments addressed > simplified the patch set, and moved AER_FATAL handling in the beginning. > rebase the code to 4.17-rc1. Changes since v13: Bjorn's comments addressed > handke FATAL errors with remove devices followed by re-enumeration. > changes in AER and DPC along with required Documentation. Changes since v12: Bjorn's and Keith's Comments addressed. > Made DPC and AER error handling identical > hanldled cases for hotplug enabled system differently. Changes since v11: Bjorn's comments addressed. > rename pcie-err.c to err.c > removed EXPORT_SYMBOL > made generic find_serivce function in port driver. > removed mutex patch as no need to have mutex in pcie_do_recovery > brough in DPC_FATAL in aer.h > so now all the error codes (AER and DPC) are unified in aer.h Changes since v10: Christoph Hellwig's, David Laight's and Randy Dunlap's comments addressed. > renamed pci_do_recovery to pcie_do_recovery > removed inner braces in conditional statements. > restrctured the code in pci_wait_for_link > EXPORT_SYMBOL_GPL Changes since v9: Sinan's comments addressed. > bool active = true; unnecessary variable removed. Changes since v8: Fixed Kbuild errors. Changes since v7: Rebased the code on pci master > https://kernel.googlesource.com/pub/scm/linux/kernel/git/helgaas/pci Changes since v6: Sinan's and Stefan's comments implemented. > reordered patch 6 and 7 > cleaned up Changes since v5: Sinan's and Keith's comments incorporated. > made separate patch for mutex > unified error repotting codes into driver/pci/pci.h > got rid of wait link active/inactive and made generic function in driver/pci/pci.c Changes since v4: Bjorn's comments incorporated. > Renamed only do_recovery. > moved the things more locally to drivers/pci/pci.h Changes since v3: Bjorn's comments incorporated. > Made separate patch renaming generic pci_err.c > Introduce pci_err.h to contain all the error types and recovery > removed all the dependencies on pci.h Changes since v2: Based on feedback from Keith: " When DPC is triggered due to receipt of an uncorrectable error Message, the Requester ID from the Message is recorded in the DPC Error Source ID register and that Message is discarded and not forwarded Upstream. " Removed the patch where AER checks if DPC service is active Changes since v1: Kbuild errors fixed: > pci_find_dpc_dev made static > ras_event.h updated > pci_find_aer_service call with CONFIG check > pci_find_dpc_service call with CONFIG check Oza Pawandeep (9): PCI: Unify wait for link active into generic PCI pci-error-recovery: Add AER_FATAL handling PCI/AER: Handle ERRR_FATAL with removal and re-enumeration of devices PCI/AER: Rename error recovery to generic PCI naming PCI/AER: Factor out error reporting from AER PCI/PORTDRV: Implement generic find service PCI/PORTDRV: Implement generic find device PCI/DPC: Unify and plumb error handling into DPC PCI/DPC: Disable ERR_NONFATAL and enable ERR_FATAL for DPC Documentation/PCI/pci-error-recovery.txt | 35 ++- drivers/pci/hotplug/pciehp_hpc.c | 20 +- drivers/pci/pci.c| 29 +++ drivers/pci/pci.h| 4 + drivers/pci/pcie/Makefile| 2 +- drivers/pci/pcie/aer/aerdrv.c| 2 + drivers/pci/pcie/aer/aerdrv.h| 30 --- drivers/pci/pcie/aer/aerdrv_core.c | 317 +- drivers/pci/pcie/dpc.c | 58 +++-- drivers/pci/pcie/err.c | 374 +++
[PATCH v15 4/9] PCI/AER: Rename error recovery to generic PCI naming
This patch renames error recovery to generic name with pcie prefix Signed-off-by: Oza PawandeepReviewed-by: Keith Busch diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index cec9d8c..22a9589 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,9 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +/* PCI error reporting and recovery */ +void pcie_do_recovery(struct pci_dev *dev, int severity); + bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 655d4e8..be4ee3b 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -475,7 +475,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) return status; } -static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) +static pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev, int severity) { struct pci_dev *udev; struct pci_bus *parent; @@ -514,7 +514,7 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) } /** - * do_recovery - handle nonfatal/fatal error recovery process + * pcie_do_recovery - handle nonfatal/fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error * @severity: error severity type * @@ -522,13 +522,13 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) * error detected message to all downstream drivers within a hierarchy in * question and return the returned code. */ -static void do_recovery(struct pci_dev *dev, int severity) +void pcie_do_recovery(struct pci_dev *dev, int severity) { pci_ers_result_t status; enum pci_channel_state state; if (severity == AER_FATAL) { - status = do_fatal_recovery(dev, severity); + status = pcie_do_fatal_recovery(dev, severity); if (status != PCI_ERS_RESULT_RECOVERED) goto failed; return; @@ -600,7 +600,7 @@ static void handle_error_source(struct pcie_device *aerdev, pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); } else - do_recovery(dev, info->severity); + pcie_do_recovery(dev, info->severity); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -665,7 +665,7 @@ static void aer_recover_work_func(struct work_struct *work) } cper_print_aer(pdev, entry.severity, entry.regs); if (entry.severity != AER_CORRECTABLE) - do_recovery(pdev, entry.severity); + pcie_do_recovery(pdev, entry.severity); pci_dev_put(pdev); } } -- 2.7.4
[PATCH v15 4/9] PCI/AER: Rename error recovery to generic PCI naming
This patch renames error recovery to generic name with pcie prefix Signed-off-by: Oza Pawandeep Reviewed-by: Keith Busch diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index cec9d8c..22a9589 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -353,6 +353,9 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, void pci_enable_acs(struct pci_dev *dev); +/* PCI error reporting and recovery */ +void pcie_do_recovery(struct pci_dev *dev, int severity); + bool pcie_wait_for_link(struct pci_dev *pdev, bool active); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 655d4e8..be4ee3b 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -475,7 +475,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev) return status; } -static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) +static pci_ers_result_t pcie_do_fatal_recovery(struct pci_dev *dev, int severity) { struct pci_dev *udev; struct pci_bus *parent; @@ -514,7 +514,7 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) } /** - * do_recovery - handle nonfatal/fatal error recovery process + * pcie_do_recovery - handle nonfatal/fatal error recovery process * @dev: pointer to a pci_dev data structure of agent detecting an error * @severity: error severity type * @@ -522,13 +522,13 @@ static pci_ers_result_t do_fatal_recovery(struct pci_dev *dev, int severity) * error detected message to all downstream drivers within a hierarchy in * question and return the returned code. */ -static void do_recovery(struct pci_dev *dev, int severity) +void pcie_do_recovery(struct pci_dev *dev, int severity) { pci_ers_result_t status; enum pci_channel_state state; if (severity == AER_FATAL) { - status = do_fatal_recovery(dev, severity); + status = pcie_do_fatal_recovery(dev, severity); if (status != PCI_ERS_RESULT_RECOVERED) goto failed; return; @@ -600,7 +600,7 @@ static void handle_error_source(struct pcie_device *aerdev, pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); } else - do_recovery(dev, info->severity); + pcie_do_recovery(dev, info->severity); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -665,7 +665,7 @@ static void aer_recover_work_func(struct work_struct *work) } cper_print_aer(pdev, entry.severity, entry.regs); if (entry.severity != AER_CORRECTABLE) - do_recovery(pdev, entry.severity); + pcie_do_recovery(pdev, entry.severity); pci_dev_put(pdev); } } -- 2.7.4
Re: [v2 PATCH 1/1] tg3: fix meaningless hw_stats reading after tg3_halt memset 0 hw_stats
On Wed, May 2, 2018 at 5:30 PM, Zumeng Chenwrote: > On 2018年05月03日 01:32, Michael Chan wrote: >> >> On Wed, May 2, 2018 at 3:27 AM, Zumeng Chen wrote: >>> >>> On 2018年05月02日 13:12, Michael Chan wrote: On Tue, May 1, 2018 at 5:42 PM, Zumeng Chen wrote: > diff --git a/drivers/net/ethernet/broadcom/tg3.h > b/drivers/net/ethernet/broadcom/tg3.h > index 3b5e98e..c61d83c 100644 > --- a/drivers/net/ethernet/broadcom/tg3.h > +++ b/drivers/net/ethernet/broadcom/tg3.h > @@ -3102,6 +3102,7 @@ enum TG3_FLAGS { > TG3_FLAG_ROBOSWITCH, > TG3_FLAG_ONE_DMA_AT_ONCE, > TG3_FLAG_RGMII_MODE, > + TG3_FLAG_HALT, I think you should be able to use the existing INIT_COMPLETE flag >>> >>> >>> No, it will bring the uncertain factors into the existed complicate >>> logic >>> of INIT_COMPLETE. >>> And I think it's very simple logic here to fix the meaningless hw_stats >>> reading and the problem >>> of commit f5992b72. I even suspect if you have read INIT_COMPLETE related >>> codes carefully. >>> >> We should use an existing flag whenever appropriate > > > I disagree. This is sort of blahblah... >> I don't want to see another flag added that is practically the same as !INIT_COMPLETE. The driver already has close to one hundred flags. Adding a new flag that is similar to an existing flag will just make the code more difficult to understand and maintain. If you don't want to fix it the cleaner way, Siva or I will fix it.
Re: [v2 PATCH 1/1] tg3: fix meaningless hw_stats reading after tg3_halt memset 0 hw_stats
On Wed, May 2, 2018 at 5:30 PM, Zumeng Chen wrote: > On 2018年05月03日 01:32, Michael Chan wrote: >> >> On Wed, May 2, 2018 at 3:27 AM, Zumeng Chen wrote: >>> >>> On 2018年05月02日 13:12, Michael Chan wrote: On Tue, May 1, 2018 at 5:42 PM, Zumeng Chen wrote: > diff --git a/drivers/net/ethernet/broadcom/tg3.h > b/drivers/net/ethernet/broadcom/tg3.h > index 3b5e98e..c61d83c 100644 > --- a/drivers/net/ethernet/broadcom/tg3.h > +++ b/drivers/net/ethernet/broadcom/tg3.h > @@ -3102,6 +3102,7 @@ enum TG3_FLAGS { > TG3_FLAG_ROBOSWITCH, > TG3_FLAG_ONE_DMA_AT_ONCE, > TG3_FLAG_RGMII_MODE, > + TG3_FLAG_HALT, I think you should be able to use the existing INIT_COMPLETE flag >>> >>> >>> No, it will bring the uncertain factors into the existed complicate >>> logic >>> of INIT_COMPLETE. >>> And I think it's very simple logic here to fix the meaningless hw_stats >>> reading and the problem >>> of commit f5992b72. I even suspect if you have read INIT_COMPLETE related >>> codes carefully. >>> >> We should use an existing flag whenever appropriate > > > I disagree. This is sort of blahblah... >> I don't want to see another flag added that is practically the same as !INIT_COMPLETE. The driver already has close to one hundred flags. Adding a new flag that is similar to an existing flag will just make the code more difficult to understand and maintain. If you don't want to fix it the cleaner way, Siva or I will fix it.
Re: [PATCH v2 4/4] vsprintf: Add command line option debug_early_boot
On Wed, May 2, 2018 at 3:50 PM, Tobin C. Hardingwrote: > Currently printing [hashed] pointers requires either a hw RNG or enough > entropy to be available. Early in the boot sequence these conditions > may not be met resulting in a dummy string '(ptrval)' being > printed. This makes debugging the early boot sequence difficult. We > can relax the requirement to use cryptographically secure hashing during > debugging. This enables debugging while keeping development/production > kernel behaviour the same. > > If new command line option debug_early_boot is enabled use > cryptographically insecure hashing and hash pointer value immediately. > > Signed-off-by: Tobin C. Harding > --- > Documentation/admin-guide/kernel-parameters.txt | 8 > lib/vsprintf.c | 18 ++ > 2 files changed, 26 insertions(+) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > b/Documentation/admin-guide/kernel-parameters.txt > index b8d1379aa039..ab619c4ccbf2 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -748,6 +748,14 @@ > > debug [KNL] Enable kernel debugging (events log level). > > + debug_early_boot > + [KNL] Enable debugging early in the boot sequence. If > + enabled, we use a weak hash instead of siphash to hash > + pointers. Use this option if you need to see pointer > + values during early boot (i.e you are seeing instances > + of '(___ptrval___)') - cryptographically insecure, > + please do not use on production kernels. > + > debug_locks_verbose= > [KNL] verbose self-tests > Format=<0|1> > diff --git a/lib/vsprintf.c b/lib/vsprintf.c > index 3697a19c2b25..6c139b442267 100644 > --- a/lib/vsprintf.c > +++ b/lib/vsprintf.c > @@ -1654,6 +1654,18 @@ char *device_node_string(char *buf, char *end, struct > device_node *dn, > return widen_string(buf, buf - buf_start, end, spec); > } > > +/* Make pointers available for printing early in the boot sequence. */ > +static int debug_early_boot; Please make this __ro_after_init too. -Kees > +EXPORT_SYMBOL(debug_early_boot); > + > +static int __init debug_early_boot_enable(char *str) > +{ > + debug_early_boot = 1; > + pr_info("debug_early_boot enabled\n"); > + return 0; > +} > +early_param("debug_early_boot", debug_early_boot_enable); > + > static bool have_filled_random_ptr_key __read_mostly; > static siphash_key_t ptr_key __read_mostly; > > @@ -1707,6 +1719,12 @@ static char *ptr_to_id(char *buf, char *end, void > *ptr, struct printf_spec spec) > const char *str = sizeof(ptr) == 8 ? "(ptrval)" : "(ptrval)"; > unsigned long hashval; > > + /* When debugging early boot use non-cryptographically secure hash */ > + if (unlikely(debug_early_boot)) { > + hashval = hash_long((unsigned long)ptr, 32); > + return pointer_string(buf, end, (const void *)hashval, spec); > + } > + > if (unlikely(!have_filled_random_ptr_key)) { > spec.field_width = 2 * sizeof(ptr); > /* string length must be less than default_width */ > -- > 2.7.4 > -- Kees Cook Pixel Security
Re: [PATCH v2 4/4] vsprintf: Add command line option debug_early_boot
On Wed, May 2, 2018 at 3:50 PM, Tobin C. Harding wrote: > Currently printing [hashed] pointers requires either a hw RNG or enough > entropy to be available. Early in the boot sequence these conditions > may not be met resulting in a dummy string '(ptrval)' being > printed. This makes debugging the early boot sequence difficult. We > can relax the requirement to use cryptographically secure hashing during > debugging. This enables debugging while keeping development/production > kernel behaviour the same. > > If new command line option debug_early_boot is enabled use > cryptographically insecure hashing and hash pointer value immediately. > > Signed-off-by: Tobin C. Harding > --- > Documentation/admin-guide/kernel-parameters.txt | 8 > lib/vsprintf.c | 18 ++ > 2 files changed, 26 insertions(+) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt > b/Documentation/admin-guide/kernel-parameters.txt > index b8d1379aa039..ab619c4ccbf2 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -748,6 +748,14 @@ > > debug [KNL] Enable kernel debugging (events log level). > > + debug_early_boot > + [KNL] Enable debugging early in the boot sequence. If > + enabled, we use a weak hash instead of siphash to hash > + pointers. Use this option if you need to see pointer > + values during early boot (i.e you are seeing instances > + of '(___ptrval___)') - cryptographically insecure, > + please do not use on production kernels. > + > debug_locks_verbose= > [KNL] verbose self-tests > Format=<0|1> > diff --git a/lib/vsprintf.c b/lib/vsprintf.c > index 3697a19c2b25..6c139b442267 100644 > --- a/lib/vsprintf.c > +++ b/lib/vsprintf.c > @@ -1654,6 +1654,18 @@ char *device_node_string(char *buf, char *end, struct > device_node *dn, > return widen_string(buf, buf - buf_start, end, spec); > } > > +/* Make pointers available for printing early in the boot sequence. */ > +static int debug_early_boot; Please make this __ro_after_init too. -Kees > +EXPORT_SYMBOL(debug_early_boot); > + > +static int __init debug_early_boot_enable(char *str) > +{ > + debug_early_boot = 1; > + pr_info("debug_early_boot enabled\n"); > + return 0; > +} > +early_param("debug_early_boot", debug_early_boot_enable); > + > static bool have_filled_random_ptr_key __read_mostly; > static siphash_key_t ptr_key __read_mostly; > > @@ -1707,6 +1719,12 @@ static char *ptr_to_id(char *buf, char *end, void > *ptr, struct printf_spec spec) > const char *str = sizeof(ptr) == 8 ? "(ptrval)" : "(ptrval)"; > unsigned long hashval; > > + /* When debugging early boot use non-cryptographically secure hash */ > + if (unlikely(debug_early_boot)) { > + hashval = hash_long((unsigned long)ptr, 32); > + return pointer_string(buf, end, (const void *)hashval, spec); > + } > + > if (unlikely(!have_filled_random_ptr_key)) { > spec.field_width = 2 * sizeof(ptr); > /* string length must be less than default_width */ > -- > 2.7.4 > -- Kees Cook Pixel Security
Re: [PATCH] kernel/exit.c: pointer sighand could be uninitialized
On Wed, May 2, 2018 at 6:48 PM, Yizhuo Zhaiwrote: > Variable 'sighand' could be uninitialized if probe_kernel_address fails > (-EFAULT). The later use in the if statement may lead to undefined behavior. > > Signed-off-by: yzhai...@ucr.edu > --- > kernel/exit.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/kernel/exit.c b/kernel/exit.c > index f6cad39..a353bd1 100644 > --- a/kernel/exit.c > +++ b/kernel/exit.c > @@ -232,7 +232,7 @@ void release_task(struct task_struct *p) > */ > struct task_struct *task_rcu_dereference(struct task_struct **ptask) > { > - struct sighand_struct *sighand; > + struct sighand_struct *sighand = NULL; > struct task_struct *task; > > /* Better would probably be to check the return of probe_kernel_address() and take appropriate action... -Kees -- Kees Cook Pixel Security
Re: [PATCH] kernel/exit.c: pointer sighand could be uninitialized
On Wed, May 2, 2018 at 6:48 PM, Yizhuo Zhai wrote: > Variable 'sighand' could be uninitialized if probe_kernel_address fails > (-EFAULT). The later use in the if statement may lead to undefined behavior. > > Signed-off-by: yzhai...@ucr.edu > --- > kernel/exit.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/kernel/exit.c b/kernel/exit.c > index f6cad39..a353bd1 100644 > --- a/kernel/exit.c > +++ b/kernel/exit.c > @@ -232,7 +232,7 @@ void release_task(struct task_struct *p) > */ > struct task_struct *task_rcu_dereference(struct task_struct **ptask) > { > - struct sighand_struct *sighand; > + struct sighand_struct *sighand = NULL; > struct task_struct *task; > > /* Better would probably be to check the return of probe_kernel_address() and take appropriate action... -Kees -- Kees Cook Pixel Security
[PATCH 1/3] staging: Android: vsoc: Create wc kernel mapping for region shm.
Map the region shm as write-combining instead of uncachable. Cc: Greg Kroah-HartmanCc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/TODO | 1 - drivers/staging/android/vsoc.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 2ea6f97b8f0f..ebd6ba3ae02e 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -18,7 +18,6 @@ vsoc.c, uapi/vsoc_shm.h waiting threads. We should eventually use multiple queues and select the queue based on the region. - Add debugfs support for examining the permissions of regions. - - Use ioremap_wc instead of ioremap_nocache. - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been superseded by the futex and is there for legacy reasons. diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 587c66d709b9..794137b7751f 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -802,9 +802,7 @@ static int vsoc_probe_device(struct pci_dev *pdev, dev_info(>dev, "shared memory @ DMA %p size=0x%zx\n", (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); - /* TODO(ghartman): ioremap_wc should work here */ - vsoc_dev.kernel_mapped_shm = ioremap_nocache( - vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); if (!vsoc_dev.kernel_mapped_shm) { dev_err(_dev.dev->dev, "cannot iomap region\n"); vsoc_remove_device(pdev);
[PATCH 1/3] staging: Android: vsoc: Create wc kernel mapping for region shm.
Map the region shm as write-combining instead of uncachable. Cc: Greg Kroah-Hartman Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/TODO | 1 - drivers/staging/android/vsoc.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/staging/android/TODO b/drivers/staging/android/TODO index 2ea6f97b8f0f..ebd6ba3ae02e 100644 --- a/drivers/staging/android/TODO +++ b/drivers/staging/android/TODO @@ -18,7 +18,6 @@ vsoc.c, uapi/vsoc_shm.h waiting threads. We should eventually use multiple queues and select the queue based on the region. - Add debugfs support for examining the permissions of regions. - - Use ioremap_wc instead of ioremap_nocache. - Remove VSOC_WAIT_FOR_INCOMING_INTERRUPT ioctl. This functionality has been superseded by the futex and is there for legacy reasons. diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 587c66d709b9..794137b7751f 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -802,9 +802,7 @@ static int vsoc_probe_device(struct pci_dev *pdev, dev_info(>dev, "shared memory @ DMA %p size=0x%zx\n", (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); - /* TODO(ghartman): ioremap_wc should work here */ - vsoc_dev.kernel_mapped_shm = ioremap_nocache( - vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); if (!vsoc_dev.kernel_mapped_shm) { dev_err(_dev.dev->dev, "cannot iomap region\n"); vsoc_remove_device(pdev);
[PATCH 2/3] staging: Android: vsoc: Fix a i386-randconfig warning.
Fix "warning: cast to pointer from integer of different size" when printing the region shm physical address. Use the %pa conversion specifier and pass the resource by reference. Cc: Greg Kroah-HartmanCc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/vsoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 794137b7751f..3e6e4af7d6a1 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -800,8 +800,8 @@ static int vsoc_probe_device(struct pci_dev *pdev, vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); - dev_info(>dev, "shared memory @ DMA %p size=0x%zx\n", -(void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + dev_info(>dev, "shared memory @ DMA %pa size=0x%zx\n", +_dev.shm_phys_start, vsoc_dev.shm_size); vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); if (!vsoc_dev.kernel_mapped_shm) { dev_err(_dev.dev->dev, "cannot iomap region\n");
[PATCH 2/3] staging: Android: vsoc: Fix a i386-randconfig warning.
Fix "warning: cast to pointer from integer of different size" when printing the region shm physical address. Use the %pa conversion specifier and pass the resource by reference. Cc: Greg Kroah-Hartman Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/vsoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 794137b7751f..3e6e4af7d6a1 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -800,8 +800,8 @@ static int vsoc_probe_device(struct pci_dev *pdev, vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); - dev_info(>dev, "shared memory @ DMA %p size=0x%zx\n", -(void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); + dev_info(>dev, "shared memory @ DMA %pa size=0x%zx\n", +_dev.shm_phys_start, vsoc_dev.shm_size); vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0); if (!vsoc_dev.kernel_mapped_shm) { dev_err(_dev.dev->dev, "cannot iomap region\n");
[PATCH 3/3] staging: Android: Fix sparse warnings in vsoc driver.
Cc: Greg Kroah-HartmanCc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/vsoc.c | 100 - 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 3e6e4af7d6a1..954ed2c5d807 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -81,8 +81,8 @@ struct vsoc_region_data { atomic_t *incoming_signalled; /* Flag indicating the guest has signalled the host. */ atomic_t *outgoing_signalled; - int irq_requested; - int device_created; + bool irq_requested; + bool device_created; }; struct vsoc_device { @@ -91,7 +91,7 @@ struct vsoc_device { /* Physical address of SHARED_MEMORY_BAR. */ phys_addr_t shm_phys_start; /* Kernel virtual address of SHARED_MEMORY_BAR. */ - void *kernel_mapped_shm; + void __iomem *kernel_mapped_shm; /* Size of the entire shared memory window in bytes. */ size_t shm_size; /* @@ -116,22 +116,23 @@ struct vsoc_device { * vsoc_region_data because the kernel deals with them as an array. */ struct msix_entry *msix_entries; - /* -* Flags that indicate what we've initialzied. These are used to do an -* orderly cleanup of the device. -*/ - char enabled_device; - char requested_regions; - char cdev_added; - char class_added; - char msix_enabled; /* Mutex that protectes the permission list */ struct mutex mtx; /* Major number assigned by the kernel */ int major; - + /* Character device assigned by the kernel */ struct cdev cdev; + /* Device class assigned by the kernel */ struct class *class; + /* +* Flags that indicate what we've initialized. These are used to do an +* orderly cleanup of the device. +*/ + bool enabled_device; + bool requested_regions; + bool cdev_added; + bool class_added; + bool msix_enabled; }; static struct vsoc_device vsoc_dev; @@ -153,13 +154,13 @@ static long vsoc_ioctl(struct file *, unsigned int, unsigned long); static int vsoc_mmap(struct file *, struct vm_area_struct *); static int vsoc_open(struct inode *, struct file *); static int vsoc_release(struct inode *, struct file *); -static ssize_t vsoc_read(struct file *, char *, size_t, loff_t *); -static ssize_t vsoc_write(struct file *, const char *, size_t, loff_t *); +static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); static int do_create_fd_scoped_permission( struct vsoc_device_region *region_p, struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg *__user arg); + struct fd_scoped_permission_arg __user *arg); static void do_destroy_fd_scoped_permission( struct vsoc_device_region *owner_region_p, struct fd_scoped_permission *perm); @@ -198,7 +199,7 @@ inline int vsoc_validate_filep(struct file *filp) /* Converts from shared memory offset to virtual address */ static inline void *shm_off_to_virtual_addr(__u32 offset) { - return vsoc_dev.kernel_mapped_shm + offset; + return (void __force *)vsoc_dev.kernel_mapped_shm + offset; } /* Converts from shared memory offset to physical address */ @@ -261,7 +262,7 @@ static struct pci_driver vsoc_pci_driver = { static int do_create_fd_scoped_permission( struct vsoc_device_region *region_p, struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg *__user arg) + struct fd_scoped_permission_arg __user *arg) { struct file *managed_filp; s32 managed_fd; @@ -632,11 +633,11 @@ static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return 0; } -static ssize_t vsoc_read(struct file *filp, char *buffer, size_t len, +static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, loff_t *poffset) { __u32 area_off; - void *area_p; + const void *area_p; ssize_t area_len; int retval = vsoc_validate_filep(filp); @@ -706,7 +707,7 @@ static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) return offset; } -static ssize_t vsoc_write(struct file *filp, const char *buffer, +static ssize_t vsoc_write(struct file *filp, const char __user *buffer, size_t len, loff_t *poffset) { __u32 area_off; @@
[PATCH 3/3] staging: Android: Fix sparse warnings in vsoc driver.
Cc: Greg Kroah-Hartman Cc: Arve Hjønnevåg Cc: Todd Kjos Cc: Martijn Coenen Cc: Greg Hartman Cc: de...@driverdev.osuosl.org Cc: kernel-t...@android.com Signed-off-by: Alistair Strachan --- drivers/staging/android/vsoc.c | 100 - 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 3e6e4af7d6a1..954ed2c5d807 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -81,8 +81,8 @@ struct vsoc_region_data { atomic_t *incoming_signalled; /* Flag indicating the guest has signalled the host. */ atomic_t *outgoing_signalled; - int irq_requested; - int device_created; + bool irq_requested; + bool device_created; }; struct vsoc_device { @@ -91,7 +91,7 @@ struct vsoc_device { /* Physical address of SHARED_MEMORY_BAR. */ phys_addr_t shm_phys_start; /* Kernel virtual address of SHARED_MEMORY_BAR. */ - void *kernel_mapped_shm; + void __iomem *kernel_mapped_shm; /* Size of the entire shared memory window in bytes. */ size_t shm_size; /* @@ -116,22 +116,23 @@ struct vsoc_device { * vsoc_region_data because the kernel deals with them as an array. */ struct msix_entry *msix_entries; - /* -* Flags that indicate what we've initialzied. These are used to do an -* orderly cleanup of the device. -*/ - char enabled_device; - char requested_regions; - char cdev_added; - char class_added; - char msix_enabled; /* Mutex that protectes the permission list */ struct mutex mtx; /* Major number assigned by the kernel */ int major; - + /* Character device assigned by the kernel */ struct cdev cdev; + /* Device class assigned by the kernel */ struct class *class; + /* +* Flags that indicate what we've initialized. These are used to do an +* orderly cleanup of the device. +*/ + bool enabled_device; + bool requested_regions; + bool cdev_added; + bool class_added; + bool msix_enabled; }; static struct vsoc_device vsoc_dev; @@ -153,13 +154,13 @@ static long vsoc_ioctl(struct file *, unsigned int, unsigned long); static int vsoc_mmap(struct file *, struct vm_area_struct *); static int vsoc_open(struct inode *, struct file *); static int vsoc_release(struct inode *, struct file *); -static ssize_t vsoc_read(struct file *, char *, size_t, loff_t *); -static ssize_t vsoc_write(struct file *, const char *, size_t, loff_t *); +static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *); static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin); static int do_create_fd_scoped_permission( struct vsoc_device_region *region_p, struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg *__user arg); + struct fd_scoped_permission_arg __user *arg); static void do_destroy_fd_scoped_permission( struct vsoc_device_region *owner_region_p, struct fd_scoped_permission *perm); @@ -198,7 +199,7 @@ inline int vsoc_validate_filep(struct file *filp) /* Converts from shared memory offset to virtual address */ static inline void *shm_off_to_virtual_addr(__u32 offset) { - return vsoc_dev.kernel_mapped_shm + offset; + return (void __force *)vsoc_dev.kernel_mapped_shm + offset; } /* Converts from shared memory offset to physical address */ @@ -261,7 +262,7 @@ static struct pci_driver vsoc_pci_driver = { static int do_create_fd_scoped_permission( struct vsoc_device_region *region_p, struct fd_scoped_permission_node *np, - struct fd_scoped_permission_arg *__user arg) + struct fd_scoped_permission_arg __user *arg) { struct file *managed_filp; s32 managed_fd; @@ -632,11 +633,11 @@ static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return 0; } -static ssize_t vsoc_read(struct file *filp, char *buffer, size_t len, +static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len, loff_t *poffset) { __u32 area_off; - void *area_p; + const void *area_p; ssize_t area_len; int retval = vsoc_validate_filep(filp); @@ -706,7 +707,7 @@ static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin) return offset; } -static ssize_t vsoc_write(struct file *filp, const char *buffer, +static ssize_t vsoc_write(struct file *filp, const char __user *buffer, size_t len, loff_t *poffset) { __u32 area_off; @@ -772,14 +773,14 @@ static int vsoc_probe_device(struct pci_dev *pdev, pci_name(pdev), result);
[PATCH RFC v2 net-next 3/4] bpfilter: add iptable get/set parsing
From: "David S. Miller"parse iptable binary blobs into bpfilter internal data structures bpfilter.ko only passing the [gs]etsockopt commands from kernel to umh All parsing is done inside umh Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpfilter.h | 179 ++ net/bpfilter/Makefile | 2 +- net/bpfilter/bpfilter_mod.h | 96 ++ net/bpfilter/ctor.c | 80 +++ net/bpfilter/init.c | 33 net/bpfilter/main.c | 51 net/bpfilter/sockopt.c| 153 net/bpfilter/tables.c | 70 + net/bpfilter/targets.c| 51 net/bpfilter/tgts.c | 25 ++ 10 files changed, 739 insertions(+), 1 deletion(-) create mode 100644 net/bpfilter/bpfilter_mod.h create mode 100644 net/bpfilter/ctor.c create mode 100644 net/bpfilter/init.c create mode 100644 net/bpfilter/sockopt.c create mode 100644 net/bpfilter/tables.c create mode 100644 net/bpfilter/targets.c create mode 100644 net/bpfilter/tgts.c diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h index 2ec3cc99ea4c..38d54e9947a1 100644 --- a/include/uapi/linux/bpfilter.h +++ b/include/uapi/linux/bpfilter.h @@ -18,4 +18,183 @@ enum { BPFILTER_IPT_GET_MAX, }; +enum { + BPFILTER_XT_TABLE_MAXNAMELEN = 32, +}; + +enum { + BPFILTER_NF_DROP = 0, + BPFILTER_NF_ACCEPT = 1, + BPFILTER_NF_STOLEN = 2, + BPFILTER_NF_QUEUE = 3, + BPFILTER_NF_REPEAT = 4, + BPFILTER_NF_STOP = 5, + BPFILTER_NF_MAX_VERDICT = BPFILTER_NF_STOP, +}; + +enum { + BPFILTER_INET_HOOK_PRE_ROUTING = 0, + BPFILTER_INET_HOOK_LOCAL_IN = 1, + BPFILTER_INET_HOOK_FORWARD = 2, + BPFILTER_INET_HOOK_LOCAL_OUT= 3, + BPFILTER_INET_HOOK_POST_ROUTING = 4, + BPFILTER_INET_HOOK_MAX, +}; + +enum { + BPFILTER_PROTO_UNSPEC = 0, + BPFILTER_PROTO_INET = 1, + BPFILTER_PROTO_IPV4 = 2, + BPFILTER_PROTO_ARP = 3, + BPFILTER_PROTO_NETDEV = 5, + BPFILTER_PROTO_BRIDGE = 7, + BPFILTER_PROTO_IPV6 = 10, + BPFILTER_PROTO_DECNET = 12, + BPFILTER_PROTO_NUMPROTO, +}; + +#ifndef INT_MAX +#define INT_MAX((int)(~0U>>1)) +#endif +#ifndef INT_MIN +#define INT_MIN (-INT_MAX - 1) +#endif + +enum { + BPFILTER_IP_PRI_FIRST = INT_MIN, + BPFILTER_IP_PRI_CONNTRACK_DEFRAG= -400, + BPFILTER_IP_PRI_RAW = -300, + BPFILTER_IP_PRI_SELINUX_FIRST = -225, + BPFILTER_IP_PRI_CONNTRACK = -200, + BPFILTER_IP_PRI_MANGLE = -150, + BPFILTER_IP_PRI_NAT_DST = -100, + BPFILTER_IP_PRI_FILTER = 0, + BPFILTER_IP_PRI_SECURITY= 50, + BPFILTER_IP_PRI_NAT_SRC = 100, + BPFILTER_IP_PRI_SELINUX_LAST= 225, + BPFILTER_IP_PRI_CONNTRACK_HELPER= 300, + BPFILTER_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, + BPFILTER_IP_PRI_LAST= INT_MAX, +}; + +#define BPFILTER_FUNCTION_MAXNAMELEN 30 +#define BPFILTER_EXTENSION_MAXNAMELEN 29 +#define BPFILTER_TABLE_MAXNAMELEN 32 + +struct bpfilter_match; +struct bpfilter_entry_match { + union { + struct { + __u16 match_size; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; + __u8revision; + } user; + struct { + __u16 match_size; + struct bpfilter_match *match; + } kernel; + __u16 match_size; + } u; + unsigned char data[0]; +}; + +struct bpfilter_target; +struct bpfilter_entry_target { + union { + struct { + __u16 target_size; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; + __u8revision; + } user; + struct { + __u16 target_size; + struct bpfilter_target *target; + } kernel; + __u16 target_size; + } u; + unsigned char data[0]; +}; + +struct bpfilter_standard_target { + struct bpfilter_entry_targettarget; + int verdict; +}; + +struct bpfilter_error_target { + struct bpfilter_entry_targettarget; + char error_name[BPFILTER_FUNCTION_MAXNAMELEN]; +}; + +#define __ALIGN_KERNEL(x, a)__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
[PATCH RFC v2 net-next 3/4] bpfilter: add iptable get/set parsing
From: "David S. Miller" parse iptable binary blobs into bpfilter internal data structures bpfilter.ko only passing the [gs]etsockopt commands from kernel to umh All parsing is done inside umh Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpfilter.h | 179 ++ net/bpfilter/Makefile | 2 +- net/bpfilter/bpfilter_mod.h | 96 ++ net/bpfilter/ctor.c | 80 +++ net/bpfilter/init.c | 33 net/bpfilter/main.c | 51 net/bpfilter/sockopt.c| 153 net/bpfilter/tables.c | 70 + net/bpfilter/targets.c| 51 net/bpfilter/tgts.c | 25 ++ 10 files changed, 739 insertions(+), 1 deletion(-) create mode 100644 net/bpfilter/bpfilter_mod.h create mode 100644 net/bpfilter/ctor.c create mode 100644 net/bpfilter/init.c create mode 100644 net/bpfilter/sockopt.c create mode 100644 net/bpfilter/tables.c create mode 100644 net/bpfilter/targets.c create mode 100644 net/bpfilter/tgts.c diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h index 2ec3cc99ea4c..38d54e9947a1 100644 --- a/include/uapi/linux/bpfilter.h +++ b/include/uapi/linux/bpfilter.h @@ -18,4 +18,183 @@ enum { BPFILTER_IPT_GET_MAX, }; +enum { + BPFILTER_XT_TABLE_MAXNAMELEN = 32, +}; + +enum { + BPFILTER_NF_DROP = 0, + BPFILTER_NF_ACCEPT = 1, + BPFILTER_NF_STOLEN = 2, + BPFILTER_NF_QUEUE = 3, + BPFILTER_NF_REPEAT = 4, + BPFILTER_NF_STOP = 5, + BPFILTER_NF_MAX_VERDICT = BPFILTER_NF_STOP, +}; + +enum { + BPFILTER_INET_HOOK_PRE_ROUTING = 0, + BPFILTER_INET_HOOK_LOCAL_IN = 1, + BPFILTER_INET_HOOK_FORWARD = 2, + BPFILTER_INET_HOOK_LOCAL_OUT= 3, + BPFILTER_INET_HOOK_POST_ROUTING = 4, + BPFILTER_INET_HOOK_MAX, +}; + +enum { + BPFILTER_PROTO_UNSPEC = 0, + BPFILTER_PROTO_INET = 1, + BPFILTER_PROTO_IPV4 = 2, + BPFILTER_PROTO_ARP = 3, + BPFILTER_PROTO_NETDEV = 5, + BPFILTER_PROTO_BRIDGE = 7, + BPFILTER_PROTO_IPV6 = 10, + BPFILTER_PROTO_DECNET = 12, + BPFILTER_PROTO_NUMPROTO, +}; + +#ifndef INT_MAX +#define INT_MAX((int)(~0U>>1)) +#endif +#ifndef INT_MIN +#define INT_MIN (-INT_MAX - 1) +#endif + +enum { + BPFILTER_IP_PRI_FIRST = INT_MIN, + BPFILTER_IP_PRI_CONNTRACK_DEFRAG= -400, + BPFILTER_IP_PRI_RAW = -300, + BPFILTER_IP_PRI_SELINUX_FIRST = -225, + BPFILTER_IP_PRI_CONNTRACK = -200, + BPFILTER_IP_PRI_MANGLE = -150, + BPFILTER_IP_PRI_NAT_DST = -100, + BPFILTER_IP_PRI_FILTER = 0, + BPFILTER_IP_PRI_SECURITY= 50, + BPFILTER_IP_PRI_NAT_SRC = 100, + BPFILTER_IP_PRI_SELINUX_LAST= 225, + BPFILTER_IP_PRI_CONNTRACK_HELPER= 300, + BPFILTER_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, + BPFILTER_IP_PRI_LAST= INT_MAX, +}; + +#define BPFILTER_FUNCTION_MAXNAMELEN 30 +#define BPFILTER_EXTENSION_MAXNAMELEN 29 +#define BPFILTER_TABLE_MAXNAMELEN 32 + +struct bpfilter_match; +struct bpfilter_entry_match { + union { + struct { + __u16 match_size; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; + __u8revision; + } user; + struct { + __u16 match_size; + struct bpfilter_match *match; + } kernel; + __u16 match_size; + } u; + unsigned char data[0]; +}; + +struct bpfilter_target; +struct bpfilter_entry_target { + union { + struct { + __u16 target_size; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; + __u8revision; + } user; + struct { + __u16 target_size; + struct bpfilter_target *target; + } kernel; + __u16 target_size; + } u; + unsigned char data[0]; +}; + +struct bpfilter_standard_target { + struct bpfilter_entry_targettarget; + int verdict; +}; + +struct bpfilter_error_target { + struct bpfilter_entry_targettarget; + char error_name[BPFILTER_FUNCTION_MAXNAMELEN]; +}; + +#define __ALIGN_KERNEL(x, a)__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL_MASK(x, mask)(((x) + (mask)) &
[PATCH v2 net-next 1/4] umh: introduce fork_usermode_blob() helper
Introduce helper: int fork_usermode_blob(void *data, size_t len, struct umh_info *info); struct umh_info { struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; }; that GPLed kernel modules (signed or unsigned) can use it to execute part of its own data as swappable user mode process. The kernel will do: - mount "tmpfs" - allocate a unique file in tmpfs - populate that file with [data, data + len] bytes - user-mode-helper code will do_execve that file and, before the process starts, the kernel will create two unix pipes for bidirectional communication between kernel module and umh - close tmpfs file, effectively deleting it - the fork_usermode_blob will return zero on success and populate 'struct umh_info' with two unix pipes and the pid of the user process As the first step in the development of the bpfilter project the fork_usermode_blob() helper is introduced to allow user mode code to be invoked from a kernel module. The idea is that user mode code plus normal kernel module code are built as part of the kernel build and installed as traditional kernel module into distro specified location, such that from a distribution point of view, there is no difference between regular kernel modules and kernel modules + umh code. Such modules can be signed, modprobed, rmmod, etc. The use of this new helper by a kernel module doesn't make it any special from kernel and user space tooling point of view. Such approach enables kernel to delegate functionality traditionally done by the kernel modules into the user space processes (either root or !root) and reduces security attack surface of the new code. The buggy umh code would crash the user process, but not the kernel. Another advantage is that umh code of the kernel module can be debugged and tested out of user space (e.g. opening the possibility to run clang sanitizers, fuzzers or user space test suites on the umh code). In case of the bpfilter project such architecture allows complex control plane to be done in the user space while bpf based data plane stays in the kernel. Since umh can crash, can be oom-ed by the kernel, killed by the admin, the kernel module that uses them (like bpfilter) needs to manage life time of umh on its own via two unix pipes and the pid of umh. The exit code of such kernel module should kill the umh it started, so that rmmod of the kernel module will cleanup the corresponding umh. Just like if the kernel module does kmalloc() it should kfree() it in the exit code. Signed-off-by: Alexei Starovoitov--- fs/exec.c | 38 --- include/linux/binfmts.h | 1 + include/linux/umh.h | 12 kernel/umh.c| 176 +++- 4 files changed, 215 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 183059c427b9..30a36c2a39bf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execveat_common(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags) +static int __do_execve_file(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags, struct file *file) { char *pathbuf = NULL; struct linux_binprm *bprm; - struct file *file; struct files_struct *displaced; int retval; @@ -1752,7 +1751,8 @@ static int do_execveat_common(int fd, struct filename *filename, check_unsafe_exec(bprm); current->in_execve = 1; - file = do_open_execat(fd, filename, flags); + if (!file) + file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (fd == AT_FDCWD || filename->name[0] == '/') { + if (!filename) { + bprm->filename = "none"; + } else if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') @@ -1826,7 +1828,8 @@ static int do_execveat_common(int fd, struct filename *filename, task_numa_free(current); free_bprm(bprm); kfree(pathbuf); - putname(filename); + if (filename) + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename, if (displaced) reset_files_struct(displaced); out_ret: -
[PATCH v2 net-next 1/4] umh: introduce fork_usermode_blob() helper
Introduce helper: int fork_usermode_blob(void *data, size_t len, struct umh_info *info); struct umh_info { struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; }; that GPLed kernel modules (signed or unsigned) can use it to execute part of its own data as swappable user mode process. The kernel will do: - mount "tmpfs" - allocate a unique file in tmpfs - populate that file with [data, data + len] bytes - user-mode-helper code will do_execve that file and, before the process starts, the kernel will create two unix pipes for bidirectional communication between kernel module and umh - close tmpfs file, effectively deleting it - the fork_usermode_blob will return zero on success and populate 'struct umh_info' with two unix pipes and the pid of the user process As the first step in the development of the bpfilter project the fork_usermode_blob() helper is introduced to allow user mode code to be invoked from a kernel module. The idea is that user mode code plus normal kernel module code are built as part of the kernel build and installed as traditional kernel module into distro specified location, such that from a distribution point of view, there is no difference between regular kernel modules and kernel modules + umh code. Such modules can be signed, modprobed, rmmod, etc. The use of this new helper by a kernel module doesn't make it any special from kernel and user space tooling point of view. Such approach enables kernel to delegate functionality traditionally done by the kernel modules into the user space processes (either root or !root) and reduces security attack surface of the new code. The buggy umh code would crash the user process, but not the kernel. Another advantage is that umh code of the kernel module can be debugged and tested out of user space (e.g. opening the possibility to run clang sanitizers, fuzzers or user space test suites on the umh code). In case of the bpfilter project such architecture allows complex control plane to be done in the user space while bpf based data plane stays in the kernel. Since umh can crash, can be oom-ed by the kernel, killed by the admin, the kernel module that uses them (like bpfilter) needs to manage life time of umh on its own via two unix pipes and the pid of umh. The exit code of such kernel module should kill the umh it started, so that rmmod of the kernel module will cleanup the corresponding umh. Just like if the kernel module does kmalloc() it should kfree() it in the exit code. Signed-off-by: Alexei Starovoitov --- fs/exec.c | 38 --- include/linux/binfmts.h | 1 + include/linux/umh.h | 12 kernel/umh.c| 176 +++- 4 files changed, 215 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 183059c427b9..30a36c2a39bf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execveat_common(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags) +static int __do_execve_file(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags, struct file *file) { char *pathbuf = NULL; struct linux_binprm *bprm; - struct file *file; struct files_struct *displaced; int retval; @@ -1752,7 +1751,8 @@ static int do_execveat_common(int fd, struct filename *filename, check_unsafe_exec(bprm); current->in_execve = 1; - file = do_open_execat(fd, filename, flags); + if (!file) + file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (fd == AT_FDCWD || filename->name[0] == '/') { + if (!filename) { + bprm->filename = "none"; + } else if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') @@ -1826,7 +1828,8 @@ static int do_execveat_common(int fd, struct filename *filename, task_numa_free(current); free_bprm(bprm); kfree(pathbuf); - putname(filename); + if (filename) + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename, if (displaced) reset_files_struct(displaced); out_ret: -
[PATCH RFC v2 net-next 4/4] bpfilter: rough bpfilter codegen example hack
From: Daniel BorkmannSigned-off-by: Daniel Borkmann --- net/bpfilter/Makefile | 2 +- net/bpfilter/bpfilter_mod.h | 285 ++- net/bpfilter/ctor.c | 57 + net/bpfilter/gen.c | 290 net/bpfilter/init.c | 11 +- net/bpfilter/main.c | 15 ++- net/bpfilter/sockopt.c | 137 - net/bpfilter/tables.c | 5 +- net/bpfilter/tgts.c | 1 + 9 files changed, 737 insertions(+), 66 deletions(-) create mode 100644 net/bpfilter/gen.c diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index bec6181de995..3796651c76cb 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -4,7 +4,7 @@ # hostprogs-y := bpfilter_umh -bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o +bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o HOSTCFLAGS += -I. -Itools/include/ # a bit of elf magic to convert bpfilter_umh binary into a binary blob diff --git a/net/bpfilter/bpfilter_mod.h b/net/bpfilter/bpfilter_mod.h index f0de41b20793..b4209985efff 100644 --- a/net/bpfilter/bpfilter_mod.h +++ b/net/bpfilter/bpfilter_mod.h @@ -21,8 +21,8 @@ struct bpfilter_table_info { unsigned intinitial_entries; unsigned inthook_entry[BPFILTER_INET_HOOK_MAX]; unsigned intunderflow[BPFILTER_INET_HOOK_MAX]; - unsigned intstacksize; - void***jumpstack; +// unsigned intstacksize; +// void***jumpstack; unsigned char entries[0] __aligned(8); }; @@ -64,22 +64,55 @@ struct bpfilter_ipt_error { struct bpfilter_target { struct list_headall_target_list; - const char name[BPFILTER_EXTENSION_MAXNAMELEN]; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; unsigned intsize; int hold; u16 family; u8 rev; }; +struct bpfilter_gen_ctx { + struct bpf_insn *img; + u32 len_cur; + u32 len_max; + u32 default_verdict; + int fd; + int ifindex; + booloffloaded; +}; + +union bpf_attr; +int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); + +int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx, + struct bpfilter_ipt_ip *ent, int verdict); +int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx); +void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx); + struct bpfilter_target *bpfilter_target_get_by_name(const char *name); void bpfilter_target_put(struct bpfilter_target *tgt); int bpfilter_target_add(struct bpfilter_target *tgt); -struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl); +struct bpfilter_table_info * +bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl, __u32 size_ents); +struct bpfilter_table_info * +bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl, +struct bpfilter_table_info *info, +__u32 size_ents, __u32 num_ents); +struct bpfilter_table_info * +bpfilter_ipv4_table_finalize2(struct bpfilter_table *tbl, + struct bpfilter_table_info *info, + __u32 size_ents, __u32 num_ents); + int bpfilter_ipv4_register_targets(void); void bpfilter_tables_init(void); int bpfilter_get_info(void *addr, int len); int bpfilter_get_entries(void *cmd, int len); +int bpfilter_set_replace(void *cmd, int len); +int bpfilter_set_add_counters(void *cmd, int len); int bpfilter_ipv4_init(void); int copy_from_user(void *dst, void *addr, int len); @@ -93,4 +126,248 @@ extern int pid; extern int debug_fd; #define ENOTSUPP524 +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC)\ + ((struct bpf_insn) {\ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X,\ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC)\ + ((struct bpf_insn) {\ + .code = BPF_ALU | BPF_OP(OP) |
[PATCH v2 net-next 0/4] bpfilter
Hi All, v1->v2: this patch set is almost a full rewrite of the earlier umh modules approach The v1 of patches and follow up discussion was covered by LWN: https://lwn.net/Articles/749108/ I believe the v2 addresses all issues brought up by Andy and others. Mainly there are zero changes to kernel/module.c Instead of teaching module loading logic to recognize special umh module, let normal kernel modules execute part of its own .init.rodata as a new user space process (Andy's idea) Patch 1 introduces this new helper: int fork_usermode_blob(void *data, size_t len, struct umh_info *info); Input: data + len == executable file Output: struct umh_info { struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; }; Advantages vs v1: - the embedded user mode executable is stored as .init.rodata inside normal kernel module. These pages are freed when .ko finishes loading - the elf file is copied into tmpfs file. The user mode process is swappable. - the communication between user mode process and 'parent' kernel module is done via two unix pipes, hence protocol is not exposed to user space - impossible to launch umh on its own (that was the main issue of v1) and impossible to be man-in-the-middle due to pipes - bpfilter.ko consists of tiny kernel part that passes the data between kernel and umh via pipes and much bigger umh part that doing all the work - 'lsmod' shows bpfilter.ko as usual. 'rmmod bpfilter' removes kernel module and kills corresponding umh - signed bpfilter.ko covers the whole image including umh code Few issues: - architecturally bpfilter.ko can be builtin, but doesn't work yet. Still debugging. Kinda cool to have user mode executables to be part of vmlinux - the user can still attach to the process and debug it with 'gdb /proc/pid/exe pid', but 'gdb -p pid' doesn't work. (a bit worse comparing to v1) - tinyconfig will notice a small increase in .text +766 | TEXT | 7c8b94806bec umh: introduce fork_usermode_blob() helper More details in patches 1 and 2 that are ready to land. Patches 3 and 4 are still rough. They were mainly used for testing and to demonstrate how bpfilter is building on top. The patch 4 approach of converting one iptable rule to few bpf instructions will certainly change in the future, since it doesn't scale to thousands of rules. Alexei Starovoitov (2): umh: introduce fork_usermode_blob() helper net: add skeleton of bpfilter kernel module Daniel Borkmann (1): bpfilter: rough bpfilter codegen example hack David S. Miller (1): bpfilter: add iptable get/set parsing fs/exec.c | 38 - include/linux/binfmts.h | 1 + include/linux/bpfilter.h | 15 ++ include/linux/umh.h | 12 ++ include/uapi/linux/bpfilter.h | 200 ++ kernel/umh.c | 176 +++- net/Kconfig | 2 + net/Makefile | 1 + net/bpfilter/Kconfig | 17 ++ net/bpfilter/Makefile | 24 +++ net/bpfilter/bpfilter_kern.c | 93 +++ net/bpfilter/bpfilter_mod.h | 373 ++ net/bpfilter/ctor.c | 91 +++ net/bpfilter/gen.c| 290 net/bpfilter/init.c | 36 net/bpfilter/main.c | 117 + net/bpfilter/msgfmt.h | 17 ++ net/bpfilter/sockopt.c| 236 ++ net/bpfilter/tables.c | 73 + net/bpfilter/targets.c| 51 ++ net/bpfilter/tgts.c | 26 +++ net/ipv4/Makefile | 2 + net/ipv4/bpfilter/Makefile| 2 + net/ipv4/bpfilter/sockopt.c | 42 + net/ipv4/ip_sockglue.c| 17 ++ 25 files changed, 1940 insertions(+), 12 deletions(-) create mode 100644 include/linux/bpfilter.h create mode 100644 include/uapi/linux/bpfilter.h create mode 100644 net/bpfilter/Kconfig create mode 100644 net/bpfilter/Makefile create mode 100644 net/bpfilter/bpfilter_kern.c create mode 100644 net/bpfilter/bpfilter_mod.h create mode 100644 net/bpfilter/ctor.c create mode 100644 net/bpfilter/gen.c create mode 100644 net/bpfilter/init.c create mode 100644 net/bpfilter/main.c create mode 100644 net/bpfilter/msgfmt.h create mode 100644 net/bpfilter/sockopt.c create mode 100644 net/bpfilter/tables.c create mode 100644 net/bpfilter/targets.c create mode 100644 net/bpfilter/tgts.c create mode 100644 net/ipv4/bpfilter/Makefile create mode 100644 net/ipv4/bpfilter/sockopt.c -- 2.9.5
[PATCH RFC v2 net-next 4/4] bpfilter: rough bpfilter codegen example hack
From: Daniel Borkmann Signed-off-by: Daniel Borkmann --- net/bpfilter/Makefile | 2 +- net/bpfilter/bpfilter_mod.h | 285 ++- net/bpfilter/ctor.c | 57 + net/bpfilter/gen.c | 290 net/bpfilter/init.c | 11 +- net/bpfilter/main.c | 15 ++- net/bpfilter/sockopt.c | 137 - net/bpfilter/tables.c | 5 +- net/bpfilter/tgts.c | 1 + 9 files changed, 737 insertions(+), 66 deletions(-) create mode 100644 net/bpfilter/gen.c diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index bec6181de995..3796651c76cb 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -4,7 +4,7 @@ # hostprogs-y := bpfilter_umh -bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o +bpfilter_umh-objs := main.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o HOSTCFLAGS += -I. -Itools/include/ # a bit of elf magic to convert bpfilter_umh binary into a binary blob diff --git a/net/bpfilter/bpfilter_mod.h b/net/bpfilter/bpfilter_mod.h index f0de41b20793..b4209985efff 100644 --- a/net/bpfilter/bpfilter_mod.h +++ b/net/bpfilter/bpfilter_mod.h @@ -21,8 +21,8 @@ struct bpfilter_table_info { unsigned intinitial_entries; unsigned inthook_entry[BPFILTER_INET_HOOK_MAX]; unsigned intunderflow[BPFILTER_INET_HOOK_MAX]; - unsigned intstacksize; - void***jumpstack; +// unsigned intstacksize; +// void***jumpstack; unsigned char entries[0] __aligned(8); }; @@ -64,22 +64,55 @@ struct bpfilter_ipt_error { struct bpfilter_target { struct list_headall_target_list; - const char name[BPFILTER_EXTENSION_MAXNAMELEN]; + charname[BPFILTER_EXTENSION_MAXNAMELEN]; unsigned intsize; int hold; u16 family; u8 rev; }; +struct bpfilter_gen_ctx { + struct bpf_insn *img; + u32 len_cur; + u32 len_max; + u32 default_verdict; + int fd; + int ifindex; + booloffloaded; +}; + +union bpf_attr; +int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); + +int bpfilter_gen_init(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_prologue(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_epilogue(struct bpfilter_gen_ctx *ctx); +int bpfilter_gen_append(struct bpfilter_gen_ctx *ctx, + struct bpfilter_ipt_ip *ent, int verdict); +int bpfilter_gen_commit(struct bpfilter_gen_ctx *ctx); +void bpfilter_gen_destroy(struct bpfilter_gen_ctx *ctx); + struct bpfilter_target *bpfilter_target_get_by_name(const char *name); void bpfilter_target_put(struct bpfilter_target *tgt); int bpfilter_target_add(struct bpfilter_target *tgt); -struct bpfilter_table_info *bpfilter_ipv4_table_ctor(struct bpfilter_table *tbl); +struct bpfilter_table_info * +bpfilter_ipv4_table_alloc(struct bpfilter_table *tbl, __u32 size_ents); +struct bpfilter_table_info * +bpfilter_ipv4_table_finalize(struct bpfilter_table *tbl, +struct bpfilter_table_info *info, +__u32 size_ents, __u32 num_ents); +struct bpfilter_table_info * +bpfilter_ipv4_table_finalize2(struct bpfilter_table *tbl, + struct bpfilter_table_info *info, + __u32 size_ents, __u32 num_ents); + int bpfilter_ipv4_register_targets(void); void bpfilter_tables_init(void); int bpfilter_get_info(void *addr, int len); int bpfilter_get_entries(void *cmd, int len); +int bpfilter_set_replace(void *cmd, int len); +int bpfilter_set_add_counters(void *cmd, int len); int bpfilter_ipv4_init(void); int copy_from_user(void *dst, void *addr, int len); @@ -93,4 +126,248 @@ extern int pid; extern int debug_fd; #define ENOTSUPP524 +/* Helper macros for filter block array initializers. */ + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC)\ + ((struct bpf_insn) {\ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X,\ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC)\ + ((struct bpf_insn) {\ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg =
[PATCH v2 net-next 0/4] bpfilter
Hi All, v1->v2: this patch set is almost a full rewrite of the earlier umh modules approach The v1 of patches and follow up discussion was covered by LWN: https://lwn.net/Articles/749108/ I believe the v2 addresses all issues brought up by Andy and others. Mainly there are zero changes to kernel/module.c Instead of teaching module loading logic to recognize special umh module, let normal kernel modules execute part of its own .init.rodata as a new user space process (Andy's idea) Patch 1 introduces this new helper: int fork_usermode_blob(void *data, size_t len, struct umh_info *info); Input: data + len == executable file Output: struct umh_info { struct file *pipe_to_umh; struct file *pipe_from_umh; pid_t pid; }; Advantages vs v1: - the embedded user mode executable is stored as .init.rodata inside normal kernel module. These pages are freed when .ko finishes loading - the elf file is copied into tmpfs file. The user mode process is swappable. - the communication between user mode process and 'parent' kernel module is done via two unix pipes, hence protocol is not exposed to user space - impossible to launch umh on its own (that was the main issue of v1) and impossible to be man-in-the-middle due to pipes - bpfilter.ko consists of tiny kernel part that passes the data between kernel and umh via pipes and much bigger umh part that doing all the work - 'lsmod' shows bpfilter.ko as usual. 'rmmod bpfilter' removes kernel module and kills corresponding umh - signed bpfilter.ko covers the whole image including umh code Few issues: - architecturally bpfilter.ko can be builtin, but doesn't work yet. Still debugging. Kinda cool to have user mode executables to be part of vmlinux - the user can still attach to the process and debug it with 'gdb /proc/pid/exe pid', but 'gdb -p pid' doesn't work. (a bit worse comparing to v1) - tinyconfig will notice a small increase in .text +766 | TEXT | 7c8b94806bec umh: introduce fork_usermode_blob() helper More details in patches 1 and 2 that are ready to land. Patches 3 and 4 are still rough. They were mainly used for testing and to demonstrate how bpfilter is building on top. The patch 4 approach of converting one iptable rule to few bpf instructions will certainly change in the future, since it doesn't scale to thousands of rules. Alexei Starovoitov (2): umh: introduce fork_usermode_blob() helper net: add skeleton of bpfilter kernel module Daniel Borkmann (1): bpfilter: rough bpfilter codegen example hack David S. Miller (1): bpfilter: add iptable get/set parsing fs/exec.c | 38 - include/linux/binfmts.h | 1 + include/linux/bpfilter.h | 15 ++ include/linux/umh.h | 12 ++ include/uapi/linux/bpfilter.h | 200 ++ kernel/umh.c | 176 +++- net/Kconfig | 2 + net/Makefile | 1 + net/bpfilter/Kconfig | 17 ++ net/bpfilter/Makefile | 24 +++ net/bpfilter/bpfilter_kern.c | 93 +++ net/bpfilter/bpfilter_mod.h | 373 ++ net/bpfilter/ctor.c | 91 +++ net/bpfilter/gen.c| 290 net/bpfilter/init.c | 36 net/bpfilter/main.c | 117 + net/bpfilter/msgfmt.h | 17 ++ net/bpfilter/sockopt.c| 236 ++ net/bpfilter/tables.c | 73 + net/bpfilter/targets.c| 51 ++ net/bpfilter/tgts.c | 26 +++ net/ipv4/Makefile | 2 + net/ipv4/bpfilter/Makefile| 2 + net/ipv4/bpfilter/sockopt.c | 42 + net/ipv4/ip_sockglue.c| 17 ++ 25 files changed, 1940 insertions(+), 12 deletions(-) create mode 100644 include/linux/bpfilter.h create mode 100644 include/uapi/linux/bpfilter.h create mode 100644 net/bpfilter/Kconfig create mode 100644 net/bpfilter/Makefile create mode 100644 net/bpfilter/bpfilter_kern.c create mode 100644 net/bpfilter/bpfilter_mod.h create mode 100644 net/bpfilter/ctor.c create mode 100644 net/bpfilter/gen.c create mode 100644 net/bpfilter/init.c create mode 100644 net/bpfilter/main.c create mode 100644 net/bpfilter/msgfmt.h create mode 100644 net/bpfilter/sockopt.c create mode 100644 net/bpfilter/tables.c create mode 100644 net/bpfilter/targets.c create mode 100644 net/bpfilter/tgts.c create mode 100644 net/ipv4/bpfilter/Makefile create mode 100644 net/ipv4/bpfilter/sockopt.c -- 2.9.5
[PATCH v2 net-next 2/4] net: add skeleton of bpfilter kernel module
bpfilter.ko consists of bpfilter_kern.c (normal kernel module code) and user mode helper code that is embedded into bpfilter.ko The steps to build bpfilter.ko are the following: - main.c is compiled by HOSTCC into the bpfilter_umh elf executable file - with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file is converted into bpfilter_umh.o object file with _binary_net_bpfilter_bpfilter_umh_start and _end symbols Example: $ nm ./bld_x64/net/bpfilter/bpfilter_umh.o 4cf8 T _binary_net_bpfilter_bpfilter_umh_end 4cf8 A _binary_net_bpfilter_bpfilter_umh_size T _binary_net_bpfilter_bpfilter_umh_start - bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko bpfilter_kern.c is a normal kernel module code that calls the fork_usermode_blob() helper to execute part of its own data as a user mode process. Notice that _binary_net_bpfilter_bpfilter_umh_start - end is placed into .init.rodata section, so it's freed as soon as __init function of bpfilter.ko is finished. As part of __init the bpfilter.ko does first request/reply action via two unix pipe provided by fork_usermode_blob() helper to make sure that umh is healthy. If not it will kill it via pid. Later bpfilter_process_sockopt() will be called from bpfilter hooks in get/setsockopt() to pass iptable commands into umh via bpfilter.ko If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will kill umh as well. Signed-off-by: Alexei Starovoitov--- include/linux/bpfilter.h | 15 +++ include/uapi/linux/bpfilter.h | 21 ++ net/Kconfig | 2 + net/Makefile | 1 + net/bpfilter/Kconfig | 17 net/bpfilter/Makefile | 24 +++ net/bpfilter/bpfilter_kern.c | 93 +++ net/bpfilter/main.c | 63 + net/bpfilter/msgfmt.h | 17 net/ipv4/Makefile | 2 + net/ipv4/bpfilter/Makefile| 2 + net/ipv4/bpfilter/sockopt.c | 42 +++ net/ipv4/ip_sockglue.c| 17 13 files changed, 316 insertions(+) create mode 100644 include/linux/bpfilter.h create mode 100644 include/uapi/linux/bpfilter.h create mode 100644 net/bpfilter/Kconfig create mode 100644 net/bpfilter/Makefile create mode 100644 net/bpfilter/bpfilter_kern.c create mode 100644 net/bpfilter/main.c create mode 100644 net/bpfilter/msgfmt.h create mode 100644 net/ipv4/bpfilter/Makefile create mode 100644 net/ipv4/bpfilter/sockopt.c diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h new file mode 100644 index ..687b1760bb9f --- /dev/null +++ b/include/linux/bpfilter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BPFILTER_H +#define _LINUX_BPFILTER_H + +#include + +struct sock; +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, + unsigned int optlen); +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, + int *optlen); +extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +#endif diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h new file mode 100644 index ..2ec3cc99ea4c --- /dev/null +++ b/include/uapi/linux/bpfilter.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI_LINUX_BPFILTER_H +#define _UAPI_LINUX_BPFILTER_H + +#include + +enum { + BPFILTER_IPT_SO_SET_REPLACE = 64, + BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, + BPFILTER_IPT_SET_MAX, +}; + +enum { + BPFILTER_IPT_SO_GET_INFO = 64, + BPFILTER_IPT_SO_GET_ENTRIES = 65, + BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, + BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, + BPFILTER_IPT_GET_MAX, +}; + +#endif /* _UAPI_LINUX_BPFILTER_H */ diff --git a/net/Kconfig b/net/Kconfig index b62089fb1332..ed6368b306fa 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -201,6 +201,8 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/bpfilter/Kconfig" + source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index a6147c61b174..7f982b7682bd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig new file mode 100644 index ..782a732b9a5c --- /dev/null +++ b/net/bpfilter/Kconfig @@ -0,0 +1,17 @@
[PATCH v2 net-next 2/4] net: add skeleton of bpfilter kernel module
bpfilter.ko consists of bpfilter_kern.c (normal kernel module code) and user mode helper code that is embedded into bpfilter.ko The steps to build bpfilter.ko are the following: - main.c is compiled by HOSTCC into the bpfilter_umh elf executable file - with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file is converted into bpfilter_umh.o object file with _binary_net_bpfilter_bpfilter_umh_start and _end symbols Example: $ nm ./bld_x64/net/bpfilter/bpfilter_umh.o 4cf8 T _binary_net_bpfilter_bpfilter_umh_end 4cf8 A _binary_net_bpfilter_bpfilter_umh_size T _binary_net_bpfilter_bpfilter_umh_start - bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko bpfilter_kern.c is a normal kernel module code that calls the fork_usermode_blob() helper to execute part of its own data as a user mode process. Notice that _binary_net_bpfilter_bpfilter_umh_start - end is placed into .init.rodata section, so it's freed as soon as __init function of bpfilter.ko is finished. As part of __init the bpfilter.ko does first request/reply action via two unix pipe provided by fork_usermode_blob() helper to make sure that umh is healthy. If not it will kill it via pid. Later bpfilter_process_sockopt() will be called from bpfilter hooks in get/setsockopt() to pass iptable commands into umh via bpfilter.ko If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will kill umh as well. Signed-off-by: Alexei Starovoitov --- include/linux/bpfilter.h | 15 +++ include/uapi/linux/bpfilter.h | 21 ++ net/Kconfig | 2 + net/Makefile | 1 + net/bpfilter/Kconfig | 17 net/bpfilter/Makefile | 24 +++ net/bpfilter/bpfilter_kern.c | 93 +++ net/bpfilter/main.c | 63 + net/bpfilter/msgfmt.h | 17 net/ipv4/Makefile | 2 + net/ipv4/bpfilter/Makefile| 2 + net/ipv4/bpfilter/sockopt.c | 42 +++ net/ipv4/ip_sockglue.c| 17 13 files changed, 316 insertions(+) create mode 100644 include/linux/bpfilter.h create mode 100644 include/uapi/linux/bpfilter.h create mode 100644 net/bpfilter/Kconfig create mode 100644 net/bpfilter/Makefile create mode 100644 net/bpfilter/bpfilter_kern.c create mode 100644 net/bpfilter/main.c create mode 100644 net/bpfilter/msgfmt.h create mode 100644 net/ipv4/bpfilter/Makefile create mode 100644 net/ipv4/bpfilter/sockopt.c diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h new file mode 100644 index ..687b1760bb9f --- /dev/null +++ b/include/linux/bpfilter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BPFILTER_H +#define _LINUX_BPFILTER_H + +#include + +struct sock; +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, + unsigned int optlen); +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, + int *optlen); +extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +#endif diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h new file mode 100644 index ..2ec3cc99ea4c --- /dev/null +++ b/include/uapi/linux/bpfilter.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI_LINUX_BPFILTER_H +#define _UAPI_LINUX_BPFILTER_H + +#include + +enum { + BPFILTER_IPT_SO_SET_REPLACE = 64, + BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, + BPFILTER_IPT_SET_MAX, +}; + +enum { + BPFILTER_IPT_SO_GET_INFO = 64, + BPFILTER_IPT_SO_GET_ENTRIES = 65, + BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, + BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, + BPFILTER_IPT_GET_MAX, +}; + +#endif /* _UAPI_LINUX_BPFILTER_H */ diff --git a/net/Kconfig b/net/Kconfig index b62089fb1332..ed6368b306fa 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -201,6 +201,8 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/bpfilter/Kconfig" + source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index a6147c61b174..7f982b7682bd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig new file mode 100644 index ..782a732b9a5c --- /dev/null +++ b/net/bpfilter/Kconfig @@ -0,0 +1,17 @@ +menuconfig
[PATCH net] macsonic: Set platform device coherent_dma_mask
Set the device's coherent_dma_mask to avoid a WARNING splat. Please see commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask"). Cc: linux-m...@lists.linux-m68k.org Signed-off-by: Finn Thain--- drivers/net/ethernet/natsemi/macsonic.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 0937fc2a928e..37b1ffa8bb61 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -523,6 +523,10 @@ static int mac_sonic_platform_probe(struct platform_device *pdev) struct sonic_local *lp; int err; + err = dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(32)); + if (err) + return err; + dev = alloc_etherdev(sizeof(struct sonic_local)); if (!dev) return -ENOMEM; -- 2.16.1
[PATCH net] macsonic: Set platform device coherent_dma_mask
Set the device's coherent_dma_mask to avoid a WARNING splat. Please see commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask"). Cc: linux-m...@lists.linux-m68k.org Signed-off-by: Finn Thain --- drivers/net/ethernet/natsemi/macsonic.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 0937fc2a928e..37b1ffa8bb61 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -523,6 +523,10 @@ static int mac_sonic_platform_probe(struct platform_device *pdev) struct sonic_local *lp; int err; + err = dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(32)); + if (err) + return err; + dev = alloc_etherdev(sizeof(struct sonic_local)); if (!dev) return -ENOMEM; -- 2.16.1
[PATCH net] macmace: Set platform device coherent_dma_mask
Set the device's coherent_dma_mask to avoid a WARNING splat. Please see commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask"). Cc: linux-m...@lists.linux-m68k.org Tested-by: Stan JohnsonSigned-off-by: Finn Thain --- drivers/net/ethernet/apple/macmace.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 137cbb470af2..98292c49ecf0 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -203,6 +203,10 @@ static int mace_probe(struct platform_device *pdev) unsigned char checksum = 0; int err; + err = dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(32)); + if (err) + return err; + dev = alloc_etherdev(PRIV_BYTES); if (!dev) return -ENOMEM; -- 2.16.1
[PATCH net] macmace: Set platform device coherent_dma_mask
Set the device's coherent_dma_mask to avoid a WARNING splat. Please see commit 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask"). Cc: linux-m...@lists.linux-m68k.org Tested-by: Stan Johnson Signed-off-by: Finn Thain --- drivers/net/ethernet/apple/macmace.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index 137cbb470af2..98292c49ecf0 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -203,6 +203,10 @@ static int mace_probe(struct platform_device *pdev) unsigned char checksum = 0; int err; + err = dma_coerce_mask_and_coherent(>dev, DMA_BIT_MASK(32)); + if (err) + return err; + dev = alloc_etherdev(PRIV_BYTES); if (!dev) return -ENOMEM; -- 2.16.1
[PATCH] clk: qcom: Add support for RCG to register for DFS
In the cases where a RCG requires a Dynamic Frequency switch support requires to register which would at runtime read the clock perf level registers to identify the frequencies supported and update the frequency table accordingly. Signed-off-by: Taniya Das--- drivers/clk/qcom/clk-rcg.h | 7 +- drivers/clk/qcom/clk-rcg2.c | 172 drivers/clk/qcom/common.c | 23 +- drivers/clk/qcom/common.h | 14 +++- 4 files changed, 213 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h index 2a7489a..06de69f 100644 --- a/drivers/clk/qcom/clk-rcg.h +++ b/drivers/clk/qcom/clk-rcg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2013, 2018, The Linux Foundation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -144,6 +144,7 @@ struct clk_dyn_rcg { * @cmd_rcgr: corresponds to *_CMD_RCGR * @mnd_width: number of bits in m/n/d values * @hid_width: number of bits in half integer divider + * @flags: additional flag parameters for the RCG * @parent_map: map from software's parent index to hardware's src_sel field * @freq_tbl: frequency table * @clkr: regmap clock handle @@ -153,6 +154,8 @@ struct clk_rcg2 { u32 cmd_rcgr; u8 mnd_width; u8 hid_width; + u8 flags; +#define DFS_ENABLE_RCG BIT(0) const struct parent_map *parent_map; const struct freq_tbl *freq_tbl; struct clk_regmap clkr; @@ -168,4 +171,6 @@ struct clk_rcg2 { extern const struct clk_ops clk_pixel_ops; extern const struct clk_ops clk_gfx3d_ops; +extern int clk_rcg2_get_dfs_clock_rate(struct clk_rcg2 *clk, + struct device *dev); #endif diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e63db10..7c35bca 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,14 @@ #define N_REG 0xc #define D_REG 0x10 +/* Dynamic Frequency Scaling */ +#define MAX_PERF_LEVEL 16 +#define SE_CMD_DFSR_OFFSET 0x14 +#define SE_CMD_DFS_EN BIT(0) +#define SE_PERF_DFSR(level)(0x1c + 0x4 * (level)) +#define SE_PERF_M_DFSR(level) (0x5c + 0x4 * (level)) +#define SE_PERF_N_DFSR(level) (0x9c + 0x4 * (level)) + enum freq_policy { FLOOR, CEIL, @@ -122,6 +131,10 @@ static int clk_rcg2_set_parent(struct clk_hw *hw, u8 index) int ret; u32 cfg = rcg->parent_map[index].cfg << CFG_SRC_SEL_SHIFT; + /* In DFS mode skip updating the RCG CFG */ + if (rcg->flags & DFS_ENABLE_RCG) + return 0; + ret = regmap_update_bits(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, CFG_SRC_SEL_MASK, cfg); if (ret) @@ -296,6 +309,9 @@ static int __clk_rcg2_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); const struct freq_tbl *f; + if (rcg->flags & DFS_ENABLE_RCG) + return -EPERM; + switch (policy) { case FLOOR: f = qcom_find_freq_floor(rcg->freq_tbl, rate); @@ -790,3 +806,159 @@ static int clk_gfx3d_set_rate(struct clk_hw *hw, unsigned long rate, .determine_rate = clk_gfx3d_determine_rate, }; EXPORT_SYMBOL_GPL(clk_gfx3d_ops); + +/* Common APIs to be used for DFS based RCGR */ +static u8 clk_parent_index_pre_div_and_mode(struct clk_hw *hw, u32 offset, + u32 *mode, u32 *pre_div) +{ + struct clk_rcg2 *rcg; + int num_parents; + u32 cfg, mask; + int i, ret; + + if (!hw) + return -EINVAL; + + num_parents = clk_hw_get_num_parents(hw); + + rcg = to_clk_rcg2(hw); + + ret = regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + offset, ); + if (ret) + goto err; + + mask = BIT(rcg->hid_width) - 1; + *pre_div = cfg & mask ? (cfg & mask) : 1; + + *mode = cfg & CFG_MODE_MASK; + *mode >>= CFG_MODE_SHIFT; + + cfg &= CFG_SRC_SEL_MASK; + cfg >>= CFG_SRC_SEL_SHIFT; + + for (i = 0; i < num_parents; i++) + if (cfg == rcg->parent_map[i].cfg) + return i; +err: + return 0; +} + +static int calculate_m_and_n(struct clk_hw *hw, u32 m_offset, u32 n_offset, + u32 mode, u32 *m, u32 *n) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + u32 val, mask; + int ret = 0; + + if (!hw) + return -EINVAL; + + *m = *n = 0; + + if (mode) { + /* Calculate M & N values */ + mask =
[PATCH] clk: qcom: Add support for RCG to register for DFS
In the cases where a RCG requires a Dynamic Frequency switch support requires to register which would at runtime read the clock perf level registers to identify the frequencies supported and update the frequency table accordingly. Signed-off-by: Taniya Das --- drivers/clk/qcom/clk-rcg.h | 7 +- drivers/clk/qcom/clk-rcg2.c | 172 drivers/clk/qcom/common.c | 23 +- drivers/clk/qcom/common.h | 14 +++- 4 files changed, 213 insertions(+), 3 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h index 2a7489a..06de69f 100644 --- a/drivers/clk/qcom/clk-rcg.h +++ b/drivers/clk/qcom/clk-rcg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, The Linux Foundation. All rights reserved. + * Copyright (c) 2013, 2018, The Linux Foundation. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and @@ -144,6 +144,7 @@ struct clk_dyn_rcg { * @cmd_rcgr: corresponds to *_CMD_RCGR * @mnd_width: number of bits in m/n/d values * @hid_width: number of bits in half integer divider + * @flags: additional flag parameters for the RCG * @parent_map: map from software's parent index to hardware's src_sel field * @freq_tbl: frequency table * @clkr: regmap clock handle @@ -153,6 +154,8 @@ struct clk_rcg2 { u32 cmd_rcgr; u8 mnd_width; u8 hid_width; + u8 flags; +#define DFS_ENABLE_RCG BIT(0) const struct parent_map *parent_map; const struct freq_tbl *freq_tbl; struct clk_regmap clkr; @@ -168,4 +171,6 @@ struct clk_rcg2 { extern const struct clk_ops clk_pixel_ops; extern const struct clk_ops clk_gfx3d_ops; +extern int clk_rcg2_get_dfs_clock_rate(struct clk_rcg2 *clk, + struct device *dev); #endif diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index e63db10..7c35bca 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,14 @@ #define N_REG 0xc #define D_REG 0x10 +/* Dynamic Frequency Scaling */ +#define MAX_PERF_LEVEL 16 +#define SE_CMD_DFSR_OFFSET 0x14 +#define SE_CMD_DFS_EN BIT(0) +#define SE_PERF_DFSR(level)(0x1c + 0x4 * (level)) +#define SE_PERF_M_DFSR(level) (0x5c + 0x4 * (level)) +#define SE_PERF_N_DFSR(level) (0x9c + 0x4 * (level)) + enum freq_policy { FLOOR, CEIL, @@ -122,6 +131,10 @@ static int clk_rcg2_set_parent(struct clk_hw *hw, u8 index) int ret; u32 cfg = rcg->parent_map[index].cfg << CFG_SRC_SEL_SHIFT; + /* In DFS mode skip updating the RCG CFG */ + if (rcg->flags & DFS_ENABLE_RCG) + return 0; + ret = regmap_update_bits(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, CFG_SRC_SEL_MASK, cfg); if (ret) @@ -296,6 +309,9 @@ static int __clk_rcg2_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); const struct freq_tbl *f; + if (rcg->flags & DFS_ENABLE_RCG) + return -EPERM; + switch (policy) { case FLOOR: f = qcom_find_freq_floor(rcg->freq_tbl, rate); @@ -790,3 +806,159 @@ static int clk_gfx3d_set_rate(struct clk_hw *hw, unsigned long rate, .determine_rate = clk_gfx3d_determine_rate, }; EXPORT_SYMBOL_GPL(clk_gfx3d_ops); + +/* Common APIs to be used for DFS based RCGR */ +static u8 clk_parent_index_pre_div_and_mode(struct clk_hw *hw, u32 offset, + u32 *mode, u32 *pre_div) +{ + struct clk_rcg2 *rcg; + int num_parents; + u32 cfg, mask; + int i, ret; + + if (!hw) + return -EINVAL; + + num_parents = clk_hw_get_num_parents(hw); + + rcg = to_clk_rcg2(hw); + + ret = regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + offset, ); + if (ret) + goto err; + + mask = BIT(rcg->hid_width) - 1; + *pre_div = cfg & mask ? (cfg & mask) : 1; + + *mode = cfg & CFG_MODE_MASK; + *mode >>= CFG_MODE_SHIFT; + + cfg &= CFG_SRC_SEL_MASK; + cfg >>= CFG_SRC_SEL_SHIFT; + + for (i = 0; i < num_parents; i++) + if (cfg == rcg->parent_map[i].cfg) + return i; +err: + return 0; +} + +static int calculate_m_and_n(struct clk_hw *hw, u32 m_offset, u32 n_offset, + u32 mode, u32 *m, u32 *n) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + u32 val, mask; + int ret = 0; + + if (!hw) + return -EINVAL; + + *m = *n = 0; + + if (mode) { + /* Calculate M & N values */ + mask = BIT(rcg->mnd_width) -
Re: [PATCH V4 5/8] soc: mediatek: pwrap: add pwrap for mt6797 SoCs
Hi, Argus On Wed, 2018-05-02 at 17:21 +0800, argus@mediatek.com wrote: > From: Argus Lin> > mt6797 is a highly integrated SoCs, it uses mt6351 for power > management. We need to add pwrap support to access mt6351. > Pwrap of mt6797 support new feature include starvation and channel > request exception interrupt, dynamic starvation priority > adjustment mechanism. suggest line wrapping closely at 75 columns > > Signed-off-by: Argus Lin > --- > drivers/soc/mediatek/mtk-pmic-wrap.c | 110 > --- > 1 file changed, 102 insertions(+), 8 deletions(-) > > diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c > b/drivers/soc/mediatek/mtk-pmic-wrap.c > index a6366f147b79..0d4a2dae6912 100644 > --- a/drivers/soc/mediatek/mtk-pmic-wrap.c > +++ b/drivers/soc/mediatek/mtk-pmic-wrap.c > @@ -284,6 +284,12 @@ enum pwrap_regs { > PWRAP_DVFS_WDATA7, > PWRAP_SPMINF_STA, > PWRAP_CIPHER_EN, > + > + /* MT6797 series regs */ > + PWRAP_INT1_EN, > + PWRAP_INT1_FLG_RAW, > + PWRAP_INT1_FLG, > + PWRAP_INT1_CLR, > }; > > static int mt2701_regs[] = { > @@ -372,6 +378,43 @@ static int mt2701_regs[] = { > [PWRAP_ADC_RDATA_ADDR2] = 0x154, > }; > > +static int mt6797_regs[] = { > + [PWRAP_MUX_SEL] = 0x0, > + [PWRAP_WRAP_EN] = 0x4, > + [PWRAP_DIO_EN] =0x8, > + [PWRAP_SIDLY] = 0xC, > + [PWRAP_RDDMY] = 0x10, > + [PWRAP_CSHEXT_WRITE] = 0x18, > + [PWRAP_CSHEXT_READ] = 0x1C, > + [PWRAP_CSLEXT_START] = 0x20, > + [PWRAP_CSLEXT_END] =0x24, > + [PWRAP_STAUPD_PRD] =0x28, > + [PWRAP_HARB_HPRIO] =0x50, > + [PWRAP_HIPRIO_ARB_EN] = 0x54, > + [PWRAP_MAN_EN] =0x60, > + [PWRAP_MAN_CMD] = 0x64, > + [PWRAP_WACS0_EN] = 0x70, > + [PWRAP_WACS1_EN] = 0x84, > + [PWRAP_WACS2_EN] = 0x98, > + [PWRAP_INIT_DONE2] =0x9C, > + [PWRAP_WACS2_CMD] = 0xA0, > + [PWRAP_WACS2_RDATA] = 0xA4, > + [PWRAP_WACS2_VLDCLR] = 0xA8, > + [PWRAP_INT_EN] =0xC0, > + [PWRAP_INT_FLG_RAW] = 0xC4, > + [PWRAP_INT_FLG] = 0xC8, > + [PWRAP_INT_CLR] = 0xCC, > + [PWRAP_INT1_EN] = 0xD0, > + [PWRAP_INT1_FLG_RAW] = 0xD4, > + [PWRAP_INT1_FLG] = 0xD8, > + [PWRAP_INT1_CLR] = 0xDC, > + [PWRAP_TIMER_EN] = 0xF4, > + [PWRAP_WDT_UNIT] = 0xFC, > + [PWRAP_WDT_SRC_EN] =0x100, > + [PWRAP_DCM_EN] =0x1CC, > + [PWRAP_DCM_DBC_PRD] = 0x1D4, > +}; > + trim unused registers if any > static int mt7622_regs[] = { > [PWRAP_MUX_SEL] = 0x0, > [PWRAP_WRAP_EN] = 0x4, > @@ -647,6 +690,7 @@ enum pmic_type { > > enum pwrap_type { > PWRAP_MT2701, > + PWRAP_MT6797, > PWRAP_MT7622, > PWRAP_MT8135, > PWRAP_MT8173, > @@ -1006,6 +1050,12 @@ static void pwrap_init_chip_select_ext(struct > pmic_wrapper *wrp, u8 hext_write, > static int pwrap_common_init_reg_clock(struct pmic_wrapper *wrp) > { > switch (wrp->master->type) { > + case PWRAP_MT6797: > + pwrap_writel(wrp, 0x8, PWRAP_RDDMY); > + pwrap_write(wrp, wrp->slave->dew_regs[PWRAP_DEW_RDDMY_NO], > + 0x8); > + pwrap_init_chip_select_ext(wrp, 0x88, 0x55, 3, 0); > + break; the setup for timing is much similar to mt2701 + mt6323 so we can merge the both logic into one, and then hope to eliminate specific pwrap_mt2701_init_reg_clock totally > case PWRAP_MT8173: > pwrap_init_chip_select_ext(wrp, 0, 4, 2, 2); > break; > @@ -1076,11 +1126,14 @@ static int pwrap_init_cipher(struct pmic_wrapper *wrp) > break; > case PWRAP_MT2701: > case PWRAP_MT8173: > + case PWRAP_MT6797: need to be listed in alphabetical order > pwrap_writel(wrp, 1, PWRAP_CIPHER_EN); > break; > case PWRAP_MT7622: > pwrap_writel(wrp, 0, PWRAP_CIPHER_EN); > break; > + default: > + break; > } > > /* Config cipher mode @PMIC */ > @@ -1325,6 +1378,15 @@ static irqreturn_t pwrap_interrupt(int irqno, void > *dev_id) > > pwrap_writel(wrp, 0x, PWRAP_INT_CLR); > > + /* If we support INT1 interrupt, we also need to clear it */ > + if (HAS_CAP(wrp->master->caps, PWRAP_CAP_INT1_EN)) { > + rdata = pwrap_readl(wrp, PWRAP_INT1_FLG); > + > + dev_err(wrp->dev, "unexpected interrupt int1=0x%x\n", rdata); > + > + pwrap_writel(wrp, rdata, PWRAP_INT1_CLR); > + } > + it
Re: [PATCH V4 5/8] soc: mediatek: pwrap: add pwrap for mt6797 SoCs
Hi, Argus On Wed, 2018-05-02 at 17:21 +0800, argus@mediatek.com wrote: > From: Argus Lin > > mt6797 is a highly integrated SoCs, it uses mt6351 for power > management. We need to add pwrap support to access mt6351. > Pwrap of mt6797 support new feature include starvation and channel > request exception interrupt, dynamic starvation priority > adjustment mechanism. suggest line wrapping closely at 75 columns > > Signed-off-by: Argus Lin > --- > drivers/soc/mediatek/mtk-pmic-wrap.c | 110 > --- > 1 file changed, 102 insertions(+), 8 deletions(-) > > diff --git a/drivers/soc/mediatek/mtk-pmic-wrap.c > b/drivers/soc/mediatek/mtk-pmic-wrap.c > index a6366f147b79..0d4a2dae6912 100644 > --- a/drivers/soc/mediatek/mtk-pmic-wrap.c > +++ b/drivers/soc/mediatek/mtk-pmic-wrap.c > @@ -284,6 +284,12 @@ enum pwrap_regs { > PWRAP_DVFS_WDATA7, > PWRAP_SPMINF_STA, > PWRAP_CIPHER_EN, > + > + /* MT6797 series regs */ > + PWRAP_INT1_EN, > + PWRAP_INT1_FLG_RAW, > + PWRAP_INT1_FLG, > + PWRAP_INT1_CLR, > }; > > static int mt2701_regs[] = { > @@ -372,6 +378,43 @@ static int mt2701_regs[] = { > [PWRAP_ADC_RDATA_ADDR2] = 0x154, > }; > > +static int mt6797_regs[] = { > + [PWRAP_MUX_SEL] = 0x0, > + [PWRAP_WRAP_EN] = 0x4, > + [PWRAP_DIO_EN] =0x8, > + [PWRAP_SIDLY] = 0xC, > + [PWRAP_RDDMY] = 0x10, > + [PWRAP_CSHEXT_WRITE] = 0x18, > + [PWRAP_CSHEXT_READ] = 0x1C, > + [PWRAP_CSLEXT_START] = 0x20, > + [PWRAP_CSLEXT_END] =0x24, > + [PWRAP_STAUPD_PRD] =0x28, > + [PWRAP_HARB_HPRIO] =0x50, > + [PWRAP_HIPRIO_ARB_EN] = 0x54, > + [PWRAP_MAN_EN] =0x60, > + [PWRAP_MAN_CMD] = 0x64, > + [PWRAP_WACS0_EN] = 0x70, > + [PWRAP_WACS1_EN] = 0x84, > + [PWRAP_WACS2_EN] = 0x98, > + [PWRAP_INIT_DONE2] =0x9C, > + [PWRAP_WACS2_CMD] = 0xA0, > + [PWRAP_WACS2_RDATA] = 0xA4, > + [PWRAP_WACS2_VLDCLR] = 0xA8, > + [PWRAP_INT_EN] =0xC0, > + [PWRAP_INT_FLG_RAW] = 0xC4, > + [PWRAP_INT_FLG] = 0xC8, > + [PWRAP_INT_CLR] = 0xCC, > + [PWRAP_INT1_EN] = 0xD0, > + [PWRAP_INT1_FLG_RAW] = 0xD4, > + [PWRAP_INT1_FLG] = 0xD8, > + [PWRAP_INT1_CLR] = 0xDC, > + [PWRAP_TIMER_EN] = 0xF4, > + [PWRAP_WDT_UNIT] = 0xFC, > + [PWRAP_WDT_SRC_EN] =0x100, > + [PWRAP_DCM_EN] =0x1CC, > + [PWRAP_DCM_DBC_PRD] = 0x1D4, > +}; > + trim unused registers if any > static int mt7622_regs[] = { > [PWRAP_MUX_SEL] = 0x0, > [PWRAP_WRAP_EN] = 0x4, > @@ -647,6 +690,7 @@ enum pmic_type { > > enum pwrap_type { > PWRAP_MT2701, > + PWRAP_MT6797, > PWRAP_MT7622, > PWRAP_MT8135, > PWRAP_MT8173, > @@ -1006,6 +1050,12 @@ static void pwrap_init_chip_select_ext(struct > pmic_wrapper *wrp, u8 hext_write, > static int pwrap_common_init_reg_clock(struct pmic_wrapper *wrp) > { > switch (wrp->master->type) { > + case PWRAP_MT6797: > + pwrap_writel(wrp, 0x8, PWRAP_RDDMY); > + pwrap_write(wrp, wrp->slave->dew_regs[PWRAP_DEW_RDDMY_NO], > + 0x8); > + pwrap_init_chip_select_ext(wrp, 0x88, 0x55, 3, 0); > + break; the setup for timing is much similar to mt2701 + mt6323 so we can merge the both logic into one, and then hope to eliminate specific pwrap_mt2701_init_reg_clock totally > case PWRAP_MT8173: > pwrap_init_chip_select_ext(wrp, 0, 4, 2, 2); > break; > @@ -1076,11 +1126,14 @@ static int pwrap_init_cipher(struct pmic_wrapper *wrp) > break; > case PWRAP_MT2701: > case PWRAP_MT8173: > + case PWRAP_MT6797: need to be listed in alphabetical order > pwrap_writel(wrp, 1, PWRAP_CIPHER_EN); > break; > case PWRAP_MT7622: > pwrap_writel(wrp, 0, PWRAP_CIPHER_EN); > break; > + default: > + break; > } > > /* Config cipher mode @PMIC */ > @@ -1325,6 +1378,15 @@ static irqreturn_t pwrap_interrupt(int irqno, void > *dev_id) > > pwrap_writel(wrp, 0x, PWRAP_INT_CLR); > > + /* If we support INT1 interrupt, we also need to clear it */ > + if (HAS_CAP(wrp->master->caps, PWRAP_CAP_INT1_EN)) { > + rdata = pwrap_readl(wrp, PWRAP_INT1_FLG); > + > + dev_err(wrp->dev, "unexpected interrupt int1=0x%x\n", rdata); > + > + pwrap_writel(wrp, rdata, PWRAP_INT1_CLR); > + } > + it seems no required to add PWRAP_CAP_INT1_EN: the CAP