[PATCH 08/10] drm/radeon: add set_uvd_clocks callback for r7xx v3
v2: avoid 64bit divide v3: rv740 uses the evegreen upll configuration Signed-off-by: Christian K?nig Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c |1 + drivers/gpu/drm/radeon/radeon_asic.h |1 + drivers/gpu/drm/radeon/rv770.c | 156 ++ drivers/gpu/drm/radeon/rv770d.h | 24 ++ 4 files changed, 182 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 03228cb..19bf122 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1183,6 +1183,7 @@ static struct radeon_asic rv770_asic = { .get_pcie_lanes = &r600_get_pcie_lanes, .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_uvd_clocks = &rv770_set_uvd_clocks, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 365c964..2add526 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -424,6 +424,7 @@ int rv770_copy_dma(struct radeon_device *rdev, struct radeon_fence **fence); u32 rv770_get_xclk(struct radeon_device *rdev); int rv770_uvd_resume(struct radeon_device *rdev); +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); /* * evergreen diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 5a78cce..8149219 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -42,6 +42,162 @@ static void rv770_gpu_init(struct radeon_device *rdev); void rv770_fini(struct radeon_device *rdev); static void rv770_pcie_gen2_enable(struct radeon_device *rdev); +int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); + +static int rv770_uvd_calc_post_div(unsigned target_freq, + unsigned vco_freq, + unsigned *div) +{ + /* Fclk = Fvco / PDIV */ + *div = vco_freq / target_freq; + + /* we alway need a frequency less than or equal the target */ + if ((vco_freq / *div) > target_freq) + *div += 1; + + /* out of range ? */ + if (*div > 30) + return -1; /* forget it */ + + *div -= 1; + return vco_freq / (*div + 1); +} + +static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev) +{ + unsigned i; + + /* assert UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < 100; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + /* deassert UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + return 0; +} + +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) +{ + /* start off with something large */ + int optimal_diff_score = 0x7FF; + unsigned optimal_fb_div = 0, optimal_vclk_div = 0; + unsigned optimal_dclk_div = 0, optimal_vco_freq = 0; + unsigned vco_freq, vco_min = 5, vco_max = 16; + unsigned ref_freq = rdev->clock.spll.reference_freq; + int r; + + /* RV740 uses evergreen uvd clk programming */ + if (rdev->family == CHIP_RV740) + return evergreen_set_uvd_clocks(rdev, vclk, dclk); + + /* loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) { + uint64_t fb_div = (uint64_t)vco_freq * 43663; + int calc_clk, diff_score, diff_vclk, diff_dclk; + unsigned vclk_div, dclk_div; + + do_div(fb_div, ref_freq); + fb_div |= 1; + + /* fb div out of range ? */ + if (fb_div > 0x03FF) + break; /* it can oly get worse */ + + /* calc vclk with current vco freq. */ + calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div); + if (calc_clk == -1) + break; /* vco is too big, it has to stop. */ + diff_vclk = vclk - calc_clk; + + /* calc dclk with current vco freq. */ + calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div); + if (calc_clk == -1) + break; /* vco is too big, it has to stop. */ + diff_dclk = dclk - calc_clk; + + /* determine if this vco setting is better than current optimal settings */ +
[PATCH 08/10] drm/radeon: add set_uvd_clocks callback for r7xx v3
v2: avoid 64bit divide v3: rv740 uses the evegreen upll configuration Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_asic.c |1 + drivers/gpu/drm/radeon/radeon_asic.h |1 + drivers/gpu/drm/radeon/rv770.c | 156 ++ drivers/gpu/drm/radeon/rv770d.h | 24 ++ 4 files changed, 182 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 03228cb..19bf122 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1183,6 +1183,7 @@ static struct radeon_asic rv770_asic = { .get_pcie_lanes = &r600_get_pcie_lanes, .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = &radeon_atom_set_clock_gating, + .set_uvd_clocks = &rv770_set_uvd_clocks, }, .pflip = { .pre_page_flip = &rs600_pre_page_flip, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 365c964..2add526 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -424,6 +424,7 @@ int rv770_copy_dma(struct radeon_device *rdev, struct radeon_fence **fence); u32 rv770_get_xclk(struct radeon_device *rdev); int rv770_uvd_resume(struct radeon_device *rdev); +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); /* * evergreen diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 5a78cce..8149219 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -42,6 +42,162 @@ static void rv770_gpu_init(struct radeon_device *rdev); void rv770_fini(struct radeon_device *rdev); static void rv770_pcie_gen2_enable(struct radeon_device *rdev); +int evergreen_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); + +static int rv770_uvd_calc_post_div(unsigned target_freq, + unsigned vco_freq, + unsigned *div) +{ + /* Fclk = Fvco / PDIV */ + *div = vco_freq / target_freq; + + /* we alway need a frequency less than or equal the target */ + if ((vco_freq / *div) > target_freq) + *div += 1; + + /* out of range ? */ + if (*div > 30) + return -1; /* forget it */ + + *div -= 1; + return vco_freq / (*div + 1); +} + +static int rv770_uvd_send_upll_ctlreq(struct radeon_device *rdev) +{ + unsigned i; + + /* assert UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < 100; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + if ((RREG32(CG_UPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + /* deassert UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + return 0; +} + +int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) +{ + /* start off with something large */ + int optimal_diff_score = 0x7FF; + unsigned optimal_fb_div = 0, optimal_vclk_div = 0; + unsigned optimal_dclk_div = 0, optimal_vco_freq = 0; + unsigned vco_freq, vco_min = 5, vco_max = 16; + unsigned ref_freq = rdev->clock.spll.reference_freq; + int r; + + /* RV740 uses evergreen uvd clk programming */ + if (rdev->family == CHIP_RV740) + return evergreen_set_uvd_clocks(rdev, vclk, dclk); + + /* loop through vco from low to high */ + vco_min = max(max(vco_min, vclk), dclk); + for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 500) { + uint64_t fb_div = (uint64_t)vco_freq * 43663; + int calc_clk, diff_score, diff_vclk, diff_dclk; + unsigned vclk_div, dclk_div; + + do_div(fb_div, ref_freq); + fb_div |= 1; + + /* fb div out of range ? */ + if (fb_div > 0x03FF) + break; /* it can oly get worse */ + + /* calc vclk with current vco freq. */ + calc_clk = rv770_uvd_calc_post_div(vclk, vco_freq, &vclk_div); + if (calc_clk == -1) + break; /* vco is too big, it has to stop. */ + diff_vclk = vclk - calc_clk; + + /* calc dclk with current vco freq. */ + calc_clk = rv770_uvd_calc_post_div(dclk, vco_freq, &dclk_div); + if (calc_clk == -1) + break; /* vco is too big, it has to stop. */ + diff_dclk = dclk - calc_clk; + + /* determine if this vco setting is better than current optimal settings */ +