Am 20.04.2014 19:29, schrieb Lauri Kasanen:
> This was originally un-inlined by Andi Kleen in 2011 citing size concerns.
> Indeed, a first attempt at inlining it grew radeon.ko by 7%.
>
> However, 2% of cpu is spent in this function. Simply inlining it gave 1% more 
> fps
> in Urban Terror.
>
> v2: We know the minimum MMIO size. Adding it to the if allows the compiler to
> optimize the branch out, improving both performance and size.
>
> The v2 patch decreases radeon.ko size by 2%. I didn't re-benchmark, but 
> common sense
> says perf is now more than 1% better.
>
> v3: Also change _wreg, make the threshold a define.
>
> Inlining _wreg increased the size a bit compared to v2, so now radeon.ko
> is only 1% smaller.
>
> Signed-off-by: Lauri Kasanen <cand at gmx.com>

Reviewed-by: Christian K?nig <christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/radeon/r100.c   | 33 ---------------------------------
>   drivers/gpu/drm/radeon/radeon.h | 40 
> ++++++++++++++++++++++++++++++++++++----
>   2 files changed, 36 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
> index b6c3264..a4e7871 100644
> --- a/drivers/gpu/drm/radeon/r100.c
> +++ b/drivers/gpu/drm/radeon/r100.c
> @@ -4086,39 +4086,6 @@ int r100_init(struct radeon_device *rdev)
>       return 0;
>   }
>   
> -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> -                   bool always_indirect)
> -{
> -     if (reg < rdev->rmmio_size && !always_indirect)
> -             return readl(((void __iomem *)rdev->rmmio) + reg);
> -     else {
> -             unsigned long flags;
> -             uint32_t ret;
> -
> -             spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> -             writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> -             ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> -             spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> -
> -             return ret;
> -     }
> -}
> -
> -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
> -               bool always_indirect)
> -{
> -     if (reg < rdev->rmmio_size && !always_indirect)
> -             writel(v, ((void __iomem *)rdev->rmmio) + reg);
> -     else {
> -             unsigned long flags;
> -
> -             spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> -             writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> -             writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> -             spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> -     }
> -}
> -
>   u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
>   {
>       if (reg < rdev->rio_mem_size)
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index f21db7a..a749b6c 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2328,10 +2328,42 @@ int radeon_device_init(struct radeon_device *rdev,
>   void radeon_device_fini(struct radeon_device *rdev);
>   int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
>   
> -uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> -                   bool always_indirect);
> -void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
> -               bool always_indirect);
> +#define RADEON_MIN_MMIO_SIZE 0x10000
> +
> +static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
> +                                 bool always_indirect)
> +{
> +     /* The mmio size is 64kb at minimum. Allows the if to be optimized out. 
> */
> +     if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && 
> !always_indirect)
> +             return readl(((void __iomem *)rdev->rmmio) + reg);
> +     else {
> +             unsigned long flags;
> +             uint32_t ret;
> +
> +             spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> +             writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> +             ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> +             spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> +
> +             return ret;
> +     }
> +}
> +
> +static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, 
> uint32_t v,
> +                             bool always_indirect)
> +{
> +     if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && 
> !always_indirect)
> +             writel(v, ((void __iomem *)rdev->rmmio) + reg);
> +     else {
> +             unsigned long flags;
> +
> +             spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
> +             writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
> +             writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
> +             spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
> +     }
> +}
> +
>   u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
>   void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
>   

Reply via email to