On Wed, Jan 6, 2010 at 1:29 PM, Jerome Glisse <jgli...@redhat.com> wrote:
> R300 family will hard lockup if host path read cache flush is
> done through MMIO to HOST_PATH_CNTL. But scheduling same flush
> through ring seems harmless. This patch remove the hdp_flush
> callback and add a flush after each fence emission which means
> a flush after each IB schedule. Thus we should have same behavior
> without the hard lockup.

We really only need to flush the HDP cache after rendering to vram (or
UMA in the IGP case).  Wouldn't it be better to just flush in those
cases?  We may want to use the hdp flush callback after sw access to
vram as well, so having a separate hdp callback might be better.  See
other comments inline below.

Alex

>
> Tested on R100,R200,R300,R400,R500,R600,R700 family.
>
> Signed-off-by: Jerome Glisse <jgli...@redhat.com>
> ---
>  drivers/gpu/drm/radeon/r100.c        |   14 ++++++--------
>  drivers/gpu/drm/radeon/r300.c        |   16 +++++++++++++++-
>  drivers/gpu/drm/radeon/r420.c        |    1 +
>  drivers/gpu/drm/radeon/r520.c        |    1 +
>  drivers/gpu/drm/radeon/r600.c        |    7 ++-----
>  drivers/gpu/drm/radeon/radeon.h      |    4 ++--
>  drivers/gpu/drm/radeon/radeon_asic.h |   12 ------------
>  drivers/gpu/drm/radeon/radeon_gem.c  |    2 --
>  drivers/gpu/drm/radeon/rs400.c       |    1 +
>  drivers/gpu/drm/radeon/rs600.c       |    1 +
>  drivers/gpu/drm/radeon/rs690.c       |    1 +
>  drivers/gpu/drm/radeon/rv515.c       |    1 +
>  12 files changed, 31 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
> index 44d599a..dc68493 100644
> --- a/drivers/gpu/drm/radeon/r100.c
> +++ b/drivers/gpu/drm/radeon/r100.c
> @@ -356,6 +356,11 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
>        /* Wait until IDLE & CLEAN */
>        radeon_ring_write(rdev, PACKET0(0x1720, 0));
>        radeon_ring_write(rdev, (1 << 16) | (1 << 17));
> +       radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
> +       radeon_ring_write(rdev, rdev->config.r100.hdp_cntl |
> +                               RADEON_HDP_READ_BUFFER_INVALIDATE);
> +       radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
> +       radeon_ring_write(rdev, rdev->config.r100.hdp_cntl);
>        /* Emit fence sequence & fire IRQ */
>        radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
>        radeon_ring_write(rdev, fence->seq);
> @@ -1707,14 +1712,6 @@ void r100_gpu_init(struct radeon_device *rdev)
>        r100_hdp_reset(rdev);
>  }
>
> -void r100_hdp_flush(struct radeon_device *rdev)
> -{
> -       u32 tmp;
> -       tmp = RREG32(RADEON_HOST_PATH_CNTL);
> -       tmp |= RADEON_HDP_READ_BUFFER_INVALIDATE;
> -       WREG32(RADEON_HOST_PATH_CNTL, tmp);
> -}
> -
>  void r100_hdp_reset(struct radeon_device *rdev)
>  {
>        uint32_t tmp;
> @@ -3265,6 +3262,7 @@ static int r100_startup(struct radeon_device *rdev)
>        }
>        /* Enable IRQ */
>        r100_irq_set(rdev);
> +       rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
> index e47af52..f8a959a 100644
> --- a/drivers/gpu/drm/radeon/r300.c
> +++ b/drivers/gpu/drm/radeon/r300.c
> @@ -36,7 +36,15 @@
>  #include "rv350d.h"
>  #include "r300_reg_safe.h"
>
> -/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380 */
> +/* This files gather functions specifics to: r300,r350,rv350,rv370,rv380
> + *
> + * GPU Errata:
> + * - HOST_PATH_CNTL: r300 family seems to dislike write to HOST_PATH_CNTL
> + *   using MMIO to flush host path read cache, this lead to HARDLOCKUP.
> + *   However, scheduling such write to the ring seems harmless, i suspect
> + *   the CP read collide with the flush somehow, or maybe the MC, hard to
> + *   tell. (Jerome Glisse)
> + */
>
>  /*
>  * rv370,rv380 PCIE GART
> @@ -178,6 +186,11 @@ void r300_fence_ring_emit(struct radeon_device *rdev,
>        /* Wait until IDLE & CLEAN */
>        radeon_ring_write(rdev, PACKET0(0x1720, 0));
>        radeon_ring_write(rdev, (1 << 17) | (1 << 16)  | (1 << 9));
> +       radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
> +       radeon_ring_write(rdev, rdev->config.r300.hdp_cntl |
> +                               RADEON_HDP_READ_BUFFER_INVALIDATE);
> +       radeon_ring_write(rdev, PACKET0(RADEON_HOST_PATH_CNTL, 0));
> +       radeon_ring_write(rdev, rdev->config.r300.hdp_cntl);
>        /* Emit fence sequence & fire IRQ */
>        radeon_ring_write(rdev, PACKET0(rdev->fence_drv.scratch_reg, 0));
>        radeon_ring_write(rdev, fence->seq);
> @@ -1214,6 +1227,7 @@ static int r300_startup(struct radeon_device *rdev)
>        }
>        /* Enable IRQ */
>        r100_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
> index f46502a..1d4d16e 100644
> --- a/drivers/gpu/drm/radeon/r420.c
> +++ b/drivers/gpu/drm/radeon/r420.c
> @@ -219,6 +219,7 @@ static int r420_startup(struct radeon_device *rdev)
>        r420_pipes_init(rdev);
>        /* Enable IRQ */
>        r100_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
> index 0f3843b..9a18907 100644
> --- a/drivers/gpu/drm/radeon/r520.c
> +++ b/drivers/gpu/drm/radeon/r520.c
> @@ -186,6 +186,7 @@ static int r520_startup(struct radeon_device *rdev)
>        }
>        /* Enable IRQ */
>        rs600_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
> index 5c6058c..debf069 100644
> --- a/drivers/gpu/drm/radeon/r600.c
> +++ b/drivers/gpu/drm/radeon/r600.c
> @@ -1384,11 +1384,6 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 
> reg, u32 v)
>        (void)RREG32(PCIE_PORT_DATA);
>  }
>
> -void r600_hdp_flush(struct radeon_device *rdev)
> -{
> -       WREG32(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
> -}
> -

On r6xx+, mmio should be ok.

>  /*
>  * CP & Ring
>  */
> @@ -1785,6 +1780,8 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
>        radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
>        radeon_ring_write(rdev, ((rdev->fence_drv.scratch_reg - 
> PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
>        radeon_ring_write(rdev, fence->seq);
> +       radeon_ring_write(rdev, 
> PACKET0(R_005480_HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
> +       radeon_ring_write(rdev, 1);

If you add additional packets here, you'll need to adjust the fence
counts in r600_blit_prepare_copy() in r600_blit_kms.c as well.

>        /* CP_INTERRUPT packet 3 no longer exists, use packet 0 */
>        radeon_ring_write(rdev, PACKET0(CP_INT_STATUS, 0));
>        radeon_ring_write(rdev, RB_INT_STAT);
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index b1ad408..b2e3734 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -652,7 +652,6 @@ struct radeon_asic {
>                               uint32_t offset, uint32_t obj_size);
>        int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
>        void (*bandwidth_update)(struct radeon_device *rdev);
> -       void (*hdp_flush)(struct radeon_device *rdev);
>        void (*hpd_init)(struct radeon_device *rdev);
>        void (*hpd_fini)(struct radeon_device *rdev);
>        bool (*hpd_sense)(struct radeon_device *rdev, enum radeon_hpd_id hpd);
> @@ -665,12 +664,14 @@ struct radeon_asic {
>  struct r100_asic {
>        const unsigned  *reg_safe_bm;
>        unsigned        reg_safe_bm_size;
> +       u32             hdp_cntl;
>  };
>
>  struct r300_asic {
>        const unsigned  *reg_safe_bm;
>        unsigned        reg_safe_bm_size;
>        u32             resync_scratch;
> +       u32             hdp_cntl;
>  };
>
>  struct r600_asic {
> @@ -1006,7 +1007,6 @@ static inline void radeon_ring_write(struct 
> radeon_device *rdev, uint32_t v)
>  #define radeon_set_surface_reg(rdev, r, f, p, o, s) 
> ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
>  #define radeon_clear_surface_reg(rdev, r) 
> ((rdev)->asic->clear_surface_reg((rdev), (r)))
>  #define radeon_bandwidth_update(rdev) (rdev)->asic->bandwidth_update((rdev))
> -#define radeon_hdp_flush(rdev) (rdev)->asic->hdp_flush((rdev))
>  #define radeon_hpd_init(rdev) (rdev)->asic->hpd_init((rdev))
>  #define radeon_hpd_fini(rdev) (rdev)->asic->hpd_fini((rdev))
>  #define radeon_hpd_sense(rdev, hpd) (rdev)->asic->hpd_sense((rdev), (hpd))
> diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
> b/drivers/gpu/drm/radeon/radeon_asic.h
> index 636116b..78fe792 100644
> --- a/drivers/gpu/drm/radeon/radeon_asic.h
> +++ b/drivers/gpu/drm/radeon/radeon_asic.h
> @@ -76,7 +76,6 @@ int r100_clear_surface_reg(struct radeon_device *rdev, int 
> reg);
>  void r100_bandwidth_update(struct radeon_device *rdev);
>  void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
>  int r100_ring_test(struct radeon_device *rdev);
> -void r100_hdp_flush(struct radeon_device *rdev);
>  void r100_hpd_init(struct radeon_device *rdev);
>  void r100_hpd_fini(struct radeon_device *rdev);
>  bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
> @@ -113,7 +112,6 @@ static struct radeon_asic r100_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &r100_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &r100_hpd_init,
>        .hpd_fini = &r100_hpd_fini,
>        .hpd_sense = &r100_hpd_sense,
> @@ -173,7 +171,6 @@ static struct radeon_asic r300_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &r100_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &r100_hpd_init,
>        .hpd_fini = &r100_hpd_fini,
>        .hpd_sense = &r100_hpd_sense,
> @@ -217,7 +214,6 @@ static struct radeon_asic r420_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &r100_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &r100_hpd_init,
>        .hpd_fini = &r100_hpd_fini,
>        .hpd_sense = &r100_hpd_sense,
> @@ -266,7 +262,6 @@ static struct radeon_asic rs400_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &r100_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &r100_hpd_init,
>        .hpd_fini = &r100_hpd_fini,
>        .hpd_sense = &r100_hpd_sense,
> @@ -323,7 +318,6 @@ static struct radeon_asic rs600_asic = {
>        .set_pcie_lanes = NULL,
>        .set_clock_gating = &radeon_atom_set_clock_gating,
>        .bandwidth_update = &rs600_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &rs600_hpd_init,
>        .hpd_fini = &rs600_hpd_fini,
>        .hpd_sense = &rs600_hpd_sense,
> @@ -371,7 +365,6 @@ static struct radeon_asic rs690_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &rs690_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &rs600_hpd_init,
>        .hpd_fini = &rs600_hpd_fini,
>        .hpd_sense = &rs600_hpd_sense,
> @@ -423,7 +416,6 @@ static struct radeon_asic rv515_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &rv515_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &rs600_hpd_init,
>        .hpd_fini = &rs600_hpd_fini,
>        .hpd_sense = &rs600_hpd_sense,
> @@ -466,7 +458,6 @@ static struct radeon_asic r520_asic = {
>        .set_surface_reg = r100_set_surface_reg,
>        .clear_surface_reg = r100_clear_surface_reg,
>        .bandwidth_update = &rv515_bandwidth_update,
> -       .hdp_flush = &r100_hdp_flush,
>        .hpd_init = &rs600_hpd_init,
>        .hpd_fini = &rs600_hpd_fini,
>        .hpd_sense = &rs600_hpd_sense,
> @@ -507,7 +498,6 @@ int r600_ring_test(struct radeon_device *rdev);
>  int r600_copy_blit(struct radeon_device *rdev,
>                   uint64_t src_offset, uint64_t dst_offset,
>                   unsigned num_pages, struct radeon_fence *fence);
> -void r600_hdp_flush(struct radeon_device *rdev);
>  void r600_hpd_init(struct radeon_device *rdev);
>  void r600_hpd_fini(struct radeon_device *rdev);
>  bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
> @@ -543,7 +533,6 @@ static struct radeon_asic r600_asic = {
>        .set_surface_reg = r600_set_surface_reg,
>        .clear_surface_reg = r600_clear_surface_reg,
>        .bandwidth_update = &rv515_bandwidth_update,
> -       .hdp_flush = &r600_hdp_flush,
>        .hpd_init = &r600_hpd_init,
>        .hpd_fini = &r600_hpd_fini,
>        .hpd_sense = &r600_hpd_sense,
> @@ -588,7 +577,6 @@ static struct radeon_asic rv770_asic = {
>        .set_surface_reg = r600_set_surface_reg,
>        .clear_surface_reg = r600_clear_surface_reg,
>        .bandwidth_update = &rv515_bandwidth_update,
> -       .hdp_flush = &r600_hdp_flush,
>        .hpd_init = &r600_hpd_init,
>        .hpd_fini = &r600_hpd_fini,
>        .hpd_sense = &r600_hpd_sense,
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
> b/drivers/gpu/drm/radeon/radeon_gem.c
> index 60df2d7..0e1325e 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -131,7 +131,6 @@ int radeon_gem_set_domain(struct drm_gem_object *gobj,
>                        printk(KERN_ERR "Failed to wait for object !\n");
>                        return r;
>                }
> -               radeon_hdp_flush(robj->rdev);
>        }
>        return 0;
>  }
> @@ -312,7 +311,6 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, 
> void *data,
>        mutex_lock(&dev->struct_mutex);
>        drm_gem_object_unreference(gobj);
>        mutex_unlock(&dev->struct_mutex);
> -       radeon_hdp_flush(robj->rdev);
>        return r;
>  }
>
> diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
> index 368415d..1eb775e 100644
> --- a/drivers/gpu/drm/radeon/rs400.c
> +++ b/drivers/gpu/drm/radeon/rs400.c
> @@ -395,6 +395,7 @@ static int rs400_startup(struct radeon_device *rdev)
>                return r;
>        /* Enable IRQ */
>        r100_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
> index 4f8ea42..d1f2d08 100644
> --- a/drivers/gpu/drm/radeon/rs600.c
> +++ b/drivers/gpu/drm/radeon/rs600.c
> @@ -553,6 +553,7 @@ static int rs600_startup(struct radeon_device *rdev)
>                return r;
>        /* Enable IRQ */
>        rs600_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
> index 1e22f52..dcb78b3 100644
> --- a/drivers/gpu/drm/radeon/rs690.c
> +++ b/drivers/gpu/drm/radeon/rs690.c
> @@ -625,6 +625,7 @@ static int rs690_startup(struct radeon_device *rdev)
>                return r;
>        /* Enable IRQ */
>        rs600_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
> index 59632a5..6275671 100644
> --- a/drivers/gpu/drm/radeon/rv515.c
> +++ b/drivers/gpu/drm/radeon/rv515.c
> @@ -479,6 +479,7 @@ static int rv515_startup(struct radeon_device *rdev)
>        }
>        /* Enable IRQ */
>        rs600_irq_set(rdev);
> +       rdev->config.r300.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
>        /* 1M ring buffer */
>        r = r100_cp_init(rdev, 1024 * 1024);
>        if (r) {
> --
> 1.6.5.2
>
>
> ------------------------------------------------------------------------------
> This SF.Net email is sponsored by the Verizon Developer Community
> Take advantage of Verizon's best-in-class app development support
> A streamlined, 14 day to market process makes app distribution fast and easy
> Join now and get one step closer to millions of Verizon customers
> http://p.sf.net/sfu/verizon-dev2dev
> --
> _______________________________________________
> Dri-devel mailing list
> Dri-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/dri-devel
>

------------------------------------------------------------------------------
This SF.Net email is sponsored by the Verizon Developer Community
Take advantage of Verizon's best-in-class app development support
A streamlined, 14 day to market process makes app distribution fast and easy
Join now and get one step closer to millions of Verizon customers
http://p.sf.net/sfu/verizon-dev2dev 
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to