On Wed, 11 Oct 2023, John Harrison <john.c.harri...@intel.com> wrote:
> On 10/11/2023 09:38, Jani Nikula wrote:
>> Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt
>> conditional compilation, as i915_gpu_error.c is only built with
>> DRM_I915_CAPTURE_ERROR=y.
>>
>> With this, we can also make i915_first_error_state() static.
>>
>> Signed-off-by: Jani Nikula <jani.nik...@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 75 ++++++++++++++++++++++++-
>>   drivers/gpu/drm/i915/i915_gpu_error.h | 17 +++---
>>   drivers/gpu/drm/i915/i915_sysfs.c     | 79 +--------------------------
>>   3 files changed, 86 insertions(+), 85 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
>> b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index b4c8459deb7b..f9e750217f18 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -57,6 +57,7 @@
>>   #include "i915_memcpy.h"
>>   #include "i915_reg.h"
>>   #include "i915_scatterlist.h"
>> +#include "i915_sysfs.h"
>>   #include "i915_utils.h"
>>   
>>   #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | 
>> __GFP_NOWARN)
>> @@ -2208,7 +2209,7 @@ void i915_capture_error_state(struct intel_gt *gt,
>>      i915_gpu_coredump_put(error);
>>   }
>>   
>> -struct i915_gpu_coredump *
>> +static struct i915_gpu_coredump *
>>   i915_first_error_state(struct drm_i915_private *i915)
>>   {
>>      struct i915_gpu_coredump *error;
>> @@ -2484,3 +2485,75 @@ void i915_gpu_error_debugfs_register(struct 
>> drm_i915_private *i915)
>>      debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
>>                          &i915_gpu_info_fops);
>>   }
>> +
>> +static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
>> +                            struct bin_attribute *attr, char *buf,
>> +                            loff_t off, size_t count)
>> +{
>> +
>> +    struct device *kdev = kobj_to_dev(kobj);
>> +    struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
>> +    struct i915_gpu_coredump *gpu;
>> +    ssize_t ret = 0;
>> +
>> +    /*
>> +     * FIXME: Concurrent clients triggering resets and reading + clearing
>> +     * dumps can cause inconsistent sysfs reads when a user calls in with a
>> +     * non-zero offset to complete a prior partial read but the
>> +     * gpu_coredump has been cleared or replaced.
>> +     */
>> +
>> +    gpu = i915_first_error_state(i915);
>> +    if (IS_ERR(gpu)) {
>> +            ret = PTR_ERR(gpu);
>> +    } else if (gpu) {
>> +            ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
>> +            i915_gpu_coredump_put(gpu);
>> +    } else {
>> +            const char *str = "No error state collected\n";
>> +            size_t len = strlen(str);
>> +
>> +            if (off < len) {
>> +                    ret = min_t(size_t, count, len - off);
>> +                    memcpy(buf, str + off, ret);
>> +            }
>> +    }
> Can this and the debugfs equivalent not be common code? It seems like 
> the implementations are conceptually the same even if the code currently 
> looks quite different.

They probably can, but this is just the code movement part. I initially
sent a bigger refactoring series [1], but decided to chop it up and send
it in smaller pieces, to not burden the reviewers. The first part [2]
has already been merged, and this is follow-up.

BR,
Jani.


[1] https://lore.kernel.org/r/cover.1695924021.git.jani.nik...@intel.com
[2] https://lore.kernel.org/r/cover.1696236329.git.jani.nik...@intel.com

>
> John.
>
>> +
>> +    return ret;
>> +}
>> +
>> +static ssize_t error_state_write(struct file *file, struct kobject *kobj,
>> +                             struct bin_attribute *attr, char *buf,
>> +                             loff_t off, size_t count)
>> +{
>> +    struct device *kdev = kobj_to_dev(kobj);
>> +    struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>> +
>> +    drm_dbg(&dev_priv->drm, "Resetting error state\n");
>> +    i915_reset_error_state(dev_priv);
>> +
>> +    return count;
>> +}
>> +
>> +static const struct bin_attribute error_state_attr = {
>> +    .attr.name = "error",
>> +    .attr.mode = S_IRUSR | S_IWUSR,
>> +    .size = 0,
>> +    .read = error_state_read,
>> +    .write = error_state_write,
>> +};
>> +
>> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
>> +{
>> +    struct device *kdev = i915->drm.primary->kdev;
>> +
>> +    if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
>> +            drm_err(&i915->drm, "error_state sysfs setup failed\n");
>> +}
>> +
>> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
>> +{
>> +    struct device *kdev = i915->drm.primary->kdev;
>> +
>> +    sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h 
>> b/drivers/gpu/drm/i915/i915_gpu_error.h
>> index a6f2a7518cf0..68c964d6720a 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
>> @@ -323,11 +323,12 @@ static inline void i915_gpu_coredump_put(struct 
>> i915_gpu_coredump *gpu)
>>              kref_put(&gpu->ref, __i915_gpu_coredump_free);
>>   }
>>   
>> -struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private 
>> *i915);
>>   void i915_reset_error_state(struct drm_i915_private *i915);
>>   void i915_disable_error_state(struct drm_i915_private *i915, int err);
>>   
>>   void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
>> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
>> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
>>   
>>   #else
>>   
>> @@ -396,12 +397,6 @@ static inline void i915_gpu_coredump_put(struct 
>> i915_gpu_coredump *gpu)
>>   {
>>   }
>>   
>> -static inline struct i915_gpu_coredump *
>> -i915_first_error_state(struct drm_i915_private *i915)
>> -{
>> -    return ERR_PTR(-ENODEV);
>> -}
>> -
>>   static inline void i915_reset_error_state(struct drm_i915_private *i915)
>>   {
>>   }
>> @@ -415,6 +410,14 @@ static inline void 
>> i915_gpu_error_debugfs_register(struct drm_i915_private *i915
>>   {
>>   }
>>   
>> +static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
>> +{
>> +}
>> +
>> +static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private 
>> *i915)
>> +{
>> +}
>> +
>>   #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
>>   
>>   #endif /* _I915_GPU_ERROR_H_ */
>> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c 
>> b/drivers/gpu/drm/i915/i915_sysfs.c
>> index e88bb4f04305..613decd47760 100644
>> --- a/drivers/gpu/drm/i915/i915_sysfs.c
>> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
>> @@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = {
>>      .private = (void *)1
>>   };
>>   
>> -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
>> -
>> -static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
>> -                            struct bin_attribute *attr, char *buf,
>> -                            loff_t off, size_t count)
>> -{
>> -
>> -    struct device *kdev = kobj_to_dev(kobj);
>> -    struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
>> -    struct i915_gpu_coredump *gpu;
>> -    ssize_t ret = 0;
>> -
>> -    /*
>> -     * FIXME: Concurrent clients triggering resets and reading + clearing
>> -     * dumps can cause inconsistent sysfs reads when a user calls in with a
>> -     * non-zero offset to complete a prior partial read but the
>> -     * gpu_coredump has been cleared or replaced.
>> -     */
>> -
>> -    gpu = i915_first_error_state(i915);
>> -    if (IS_ERR(gpu)) {
>> -            ret = PTR_ERR(gpu);
>> -    } else if (gpu) {
>> -            ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
>> -            i915_gpu_coredump_put(gpu);
>> -    } else {
>> -            const char *str = "No error state collected\n";
>> -            size_t len = strlen(str);
>> -
>> -            if (off < len) {
>> -                    ret = min_t(size_t, count, len - off);
>> -                    memcpy(buf, str + off, ret);
>> -            }
>> -    }
>> -
>> -    return ret;
>> -}
>> -
>> -static ssize_t error_state_write(struct file *file, struct kobject *kobj,
>> -                             struct bin_attribute *attr, char *buf,
>> -                             loff_t off, size_t count)
>> -{
>> -    struct device *kdev = kobj_to_dev(kobj);
>> -    struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
>> -
>> -    drm_dbg(&dev_priv->drm, "Resetting error state\n");
>> -    i915_reset_error_state(dev_priv);
>> -
>> -    return count;
>> -}
>> -
>> -static const struct bin_attribute error_state_attr = {
>> -    .attr.name = "error",
>> -    .attr.mode = S_IRUSR | S_IWUSR,
>> -    .size = 0,
>> -    .read = error_state_read,
>> -    .write = error_state_write,
>> -};
>> -
>> -static void i915_setup_error_capture(struct device *kdev)
>> -{
>> -    if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
>> -            drm_err(&kdev_minor_to_i915(kdev)->drm,
>> -                    "error_state sysfs setup failed\n");
>> -}
>> -
>> -static void i915_teardown_error_capture(struct device *kdev)
>> -{
>> -    sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
>> -}
>> -#else
>> -static void i915_setup_error_capture(struct device *kdev) {}
>> -static void i915_teardown_error_capture(struct device *kdev) {}
>> -#endif
>> -
>>   void i915_setup_sysfs(struct drm_i915_private *dev_priv)
>>   {
>>      struct device *kdev = dev_priv->drm.primary->kdev;
>> @@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
>>              drm_warn(&dev_priv->drm,
>>                       "failed to register GT sysfs directory\n");
>>   
>> -    i915_setup_error_capture(kdev);
>> +    i915_gpu_error_sysfs_setup(dev_priv);
>>   
>>      intel_engines_add_sysfs(dev_priv);
>>   }
>> @@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private 
>> *dev_priv)
>>   {
>>      struct device *kdev = dev_priv->drm.primary->kdev;
>>   
>> -    i915_teardown_error_capture(kdev);
>> +    i915_gpu_error_sysfs_teardown(dev_priv);
>>   
>>      device_remove_bin_file(kdev,  &dpf_attrs_1);
>>      device_remove_bin_file(kdev,  &dpf_attrs);
>

-- 
Jani Nikula, Intel

Reply via email to