perf result for:
"perf record -F 99 ./ffmpeg_g -i  Tango2_3840x2160_60_10_420_27_LD.266 -f null 
-"

before: 5.24%
1.87%  ffmpeg_g  [.] vvc_deblock_bs_chroma
1.72%  ffmpeg_g  [.] ff_vvc_deblock_bs
1.65%  ffmpeg_g  [.] vvc_deblock_bs_luma

after: 3.48%
1.84%  ffmpeg_g  [.] vvc_deblock_bs_chroma
1.64%  ffmpeg_g  [.] ff_vvc_deblock_bs + vvc_deblock_bs_luma(inlined)
---
 libavcodec/vvc/ctu.c    |  2 +
 libavcodec/vvc/ctu.h    |  3 ++
 libavcodec/vvc/filter.c | 90 +++++++++++++++++------------------------
 3 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/libavcodec/vvc/ctu.c b/libavcodec/vvc/ctu.c
index 8210ab520f..e49976c66b 100644
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@@ -241,6 +241,7 @@ static TransformUnit* add_tu(VVCFrameContext *fc, 
CodingUnit *cu, const int x0,
     tu->height = tu_height;
     tu->joint_cbcr_residual_flag = 0;
     memset(tu->coded_flag, 0, sizeof(tu->coded_flag));
+    tu->avail[LUMA] = tu->avail[CHROMA] = 0;
     tu->nb_tbs = 0;
 
     return tu;
@@ -267,6 +268,7 @@ static TransformBlock* add_tb(TransformUnit *tu, 
VVCLocalContext *lc,
     tb->ts = 0;
     tb->coeffs = lc->coeffs;
     lc->coeffs += tb_width * tb_height;
+    tu->avail[!!c_idx] = true;
     return tb;
 }
 
diff --git a/libavcodec/vvc/ctu.h b/libavcodec/vvc/ctu.h
index eab4612561..c5533c1ad0 100644
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@@ -23,6 +23,8 @@
 #ifndef AVCODEC_VVC_CTU_H
 #define AVCODEC_VVC_CTU_H
 
+#include <stdbool.h>
+
 #include "libavcodec/cabac.h"
 #include "libavutil/mem_internal.h"
 
@@ -172,6 +174,7 @@ typedef struct TransformUnit {
     int y0;
     int width;
     int height;
+    bool avail[CHROMA + 1];                             // contains 
luma/chroma block
 
     uint8_t joint_cbcr_residual_flag;                   ///< 
tu_joint_cbcr_residual_flag
 
diff --git a/libavcodec/vvc/filter.c b/libavcodec/vvc/filter.c
index 9a45a735e0..a7f102bc64 100644
--- a/libavcodec/vvc/filter.c
+++ b/libavcodec/vvc/filter.c
@@ -451,15 +451,15 @@ static int boundary_strength(const VVCLocalContext *lc, 
const MvField *curr, con
 
 //part of 8.8.3.3 Derivation process of transform block boundary
 static void derive_max_filter_length_luma(const VVCFrameContext *fc, const int 
qx, const int qy,
-                                          const int is_intra, const int 
has_subblock, const int vertical, uint8_t *max_len_p, uint8_t *max_len_q)
+    const int size_q, const int has_subblock, const int vertical, uint8_t 
*max_len_p, uint8_t *max_len_q)
 {
     const int px =  vertical ? qx - 1 : qx;
     const int py = !vertical ? qy - 1 : qy;
     const uint8_t *tb_size = vertical ? fc->tab.tb_width[LUMA] : 
fc->tab.tb_height[LUMA];
     const int size_p = tb_size[(py >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width 
+ (px >> MIN_TU_LOG2)];
-    const int size_q = tb_size[(qy >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width 
+ (qx >> MIN_TU_LOG2)];
     const int min_cb_log2 = fc->ps.sps->min_cb_log2_size_y;
     const int off_p = (py >> min_cb_log2) * fc->ps.pps->min_cb_width + (px >> 
min_cb_log2);
+
     if (size_p <= 4 || size_q <= 4) {
         *max_len_p = *max_len_q = 1;
     } else {
@@ -525,7 +525,7 @@ static void vvc_deblock_subblock_bs(const VVCLocalContext 
*lc,
 }
 
 static av_always_inline int deblock_bs(const VVCLocalContext *lc,
-    const int x_p, const int y_p, const int x_q, const int y_q,
+    const int x_p, const int y_p, const int x_q, const int y_q, const 
CodingUnit *cu, const TransformUnit *tu,
     const RefPicList *rpl_p, const int c_idx, const int off_to_cb, const 
uint8_t has_sub_block)
 {
     const VVCFrameContext *fc  = lc->fc;
@@ -542,12 +542,10 @@ static av_always_inline int deblock_bs(const 
VVCLocalContext *lc,
     const MvField *mvf_q       = &tab_mvf[pu_q];
     const uint8_t chroma       = !!c_idx;
     const int tu_p             = (y_p >> log2_min_tu_size) * min_tu_width  + 
(x_p >>  log2_min_tu_size);
-    const int tu_q             = (y_q >> log2_min_tu_size) * min_tu_width  + 
(x_q >>  log2_min_tu_size);
     const int cb_p             = (y_p >> log2_min_cb_size) * min_cb_width  + 
(x_p >>  log2_min_cb_size);
-    const int cb_q             = (y_q >> log2_min_cb_size) * min_cb_width  + 
(x_q >>  log2_min_cb_size);
-    const uint8_t pcmf         = fc->tab.pcmf[chroma][cb_p] && 
fc->tab.pcmf[chroma][cb_q];
-    const uint8_t intra        = fc->tab.cpm[chroma][cb_p] == MODE_INTRA || 
fc->tab.cpm[chroma][cb_q] == MODE_INTRA;
-    const uint8_t same_mode    = fc->tab.cpm[chroma][cb_p] == 
fc->tab.cpm[chroma][cb_q];
+    const uint8_t pcmf         = fc->tab.pcmf[chroma][cb_p] && 
cu->bdpcm_flag[chroma];
+    const uint8_t intra        = fc->tab.cpm[chroma][cb_p] == MODE_INTRA || 
cu->pred_mode == MODE_INTRA;
+    const uint8_t same_mode    = fc->tab.cpm[chroma][cb_p] == cu->pred_mode;
 
     if (pcmf)
         return 0;
@@ -557,12 +555,12 @@ static av_always_inline int deblock_bs(const 
VVCLocalContext *lc,
 
     if (chroma) {
         return fc->tab.tu_coded_flag[c_idx][tu_p] ||
-               fc->tab.tu_coded_flag[c_idx][tu_q] ||
                fc->tab.tu_joint_cbcr_residual_flag[tu_p] ||
-               fc->tab.tu_joint_cbcr_residual_flag[tu_q];
+               tu->coded_flag[c_idx] ||
+               tu->joint_cbcr_residual_flag;
     }
 
-    if (fc->tab.tu_coded_flag[LUMA][tu_p] || fc->tab.tu_coded_flag[LUMA][tu_q])
+    if (fc->tab.tu_coded_flag[LUMA][tu_p] || tu->coded_flag[LUMA])
         return 1;
 
     if ((off_to_cb && ((off_to_cb % 8) || !has_sub_block)))
@@ -606,27 +604,23 @@ static int deblock_is_boundary(const VVCLocalContext *lc, 
const int boundary,
 }
 
 static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
-    const int x0, const int y0, const int width, const int height, const int 
rs, const int vertical)
+    const int x0, const int y0, const int width, const int height,
+    const CodingUnit *cu, const TransformUnit *tu,  int rs, const int vertical)
 {
-    const VVCFrameContext *fc  = lc->fc;
-    const MvField *tab_mvf     = fc->tab.mvf;
-    const int mask             = LUMA_GRID - 1;
-    const int log2_min_pu_size = MIN_PU_LOG2;
-    const int min_pu_width     = fc->ps.pps->min_pu_width;
-    const int min_cb_log2      = fc->ps.sps->min_cb_log2_size_y;
-    const int min_cb_width     = fc->ps.pps->min_cb_width;
-    const int pos              = vertical ? x0 : y0;
-    const int off_q            = (y0 >> min_cb_log2) * min_cb_width + (x0 >> 
min_cb_log2);
-    const int cb               = (vertical ? fc->tab.cb_pos_x : 
fc->tab.cb_pos_y )[LUMA][off_q];
-    const int is_intra         = tab_mvf[(y0 >> log2_min_pu_size) * 
min_pu_width +
-            (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
+    const VVCFrameContext *fc = lc->fc;
+    const PredictionUnit *pu  = &cu->pu;
+    const int mask            = LUMA_GRID - 1;
+    const int pos             = vertical ? x0 : y0;
+    const int cb              = vertical ? cu->x0 : cu->y0;
+    const int is_intra        = cu->pred_mode == MODE_INTRA;
+    const int cb_size         = vertical ? cu->cb_width : cu->cb_height;
+    const int has_sb          = !is_intra && (pu->merge_subblock_flag || 
pu->inter_affine_flag) && cb_size > 8;
 
     if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) {
         const int is_vb         = is_virtual_boundary(fc, pos, vertical);
         const int size          = vertical ? height : width;
+        const int size_q        = vertical ? width  : height;
         const int off           = cb - pos;
-        const int cb_size       = (vertical ? fc->tab.cb_width : 
fc->tab.cb_height)[LUMA][off_q];
-        const int has_sb        = !is_intra && (fc->tab.msf[off_q] || 
fc->tab.iaf[off_q]) && cb_size > 8;
         const int flag          = vertical ? BOUNDARY_LEFT_SLICE : 
BOUNDARY_UPPER_SLICE;
         const RefPicList *rpl_p =
             (lc->boundary_flags & flag) ? ff_vvc_get_ref_list(fc, fc->ref, x0 
- vertical, y0 - !vertical) : lc->sc->rpl;
@@ -635,24 +629,23 @@ static void vvc_deblock_bs_luma(const VVCLocalContext *lc,
             const int x = x0 + i * !vertical;
             const int y = y0 + i * vertical;
             uint8_t max_len_p, max_len_q;
-            const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, rpl_p, LUMA, off, has_sb);
+            const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, cu, tu, rpl_p, LUMA, off, has_sb);
 
             TAB_BS(fc->tab.bs[vertical][LUMA], x, y) = bs;
 
-            derive_max_filter_length_luma(fc, x, y, is_intra, has_sb, 
vertical, &max_len_p, &max_len_q);
+            derive_max_filter_length_luma(fc, x, y, size_q, has_sb, vertical, 
&max_len_p, &max_len_q);
             TAB_MAX_LEN(fc->tab.max_len_p[vertical], x, y) = max_len_p;
             TAB_MAX_LEN(fc->tab.max_len_q[vertical], x, y) = max_len_q;
         }
     }
 
-    if (!is_intra) {
-        if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
-            vvc_deblock_subblock_bs(lc, cb, x0, y0, width, height, vertical);
-    }
+    if (has_sb)
+        vvc_deblock_subblock_bs(lc, cb, x0, y0, width, height, vertical);
 }
 
 static void vvc_deblock_bs_chroma(const VVCLocalContext *lc,
-    const int x0, const int y0, const int width, const int height, const int 
rs, const int vertical)
+    const int x0, const int y0, const int width, const int height,
+    const CodingUnit *cu, const TransformUnit *tu, const int rs, const int 
vertical)
 {
     const VVCFrameContext *fc = lc->fc;
     const int shift           = (vertical ? fc->ps.sps->hshift : 
fc->ps.sps->vshift)[CHROMA];
@@ -667,7 +660,7 @@ static void vvc_deblock_bs_chroma(const VVCLocalContext *lc,
             for (int i = 0; i < size; i += 2) {
                 const int x  = x0 + i * !vertical;
                 const int y  = y0 + i * vertical;
-                const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, NULL, c_idx, 0, 0);
+                const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - 
!vertical, x, y, cu, tu, NULL, c_idx, 0, 0);
 
                 TAB_BS(fc->tab.bs[vertical][c_idx], x, y) = bs;
             }
@@ -682,29 +675,20 @@ void ff_vvc_deblock_bs(VVCLocalContext *lc, const int rx, 
const int ry, const in
 {
     const VVCFrameContext *fc  = lc->fc;
     const VVCSPS *sps          = fc->ps.sps;
-    const VVCPPS *pps          = fc->ps.pps;
-    const int ctb_size         = sps->ctb_size_y;
     const int x0               = rx << sps->ctb_log2_size_y;
     const int y0               = ry << sps->ctb_log2_size_y;
-    const int x_end            = FFMIN(x0 + ctb_size, pps->width) >> 
MIN_TU_LOG2;
-    const int y_end            = FFMIN(y0 + ctb_size, pps->height) >> 
MIN_TU_LOG2;
-    const int has_chroma       = !!sps->r->sps_chroma_format_idc;
-    deblock_bs_fn deblock_bs[] = {
-        vvc_deblock_bs_luma, vvc_deblock_bs_chroma
-    };
 
     ff_vvc_decode_neighbour(lc, x0, y0, rx, ry, rs);
-    for (int vertical = 0; vertical <= 1; vertical++) {
-        for (int is_chroma = 0; is_chroma <= has_chroma; is_chroma++) {
-            const int hs = sps->hshift[is_chroma];
-            const int vs = sps->vshift[is_chroma];
-            for (int y = y0 >> MIN_TU_LOG2; y < y_end; y++) {
-                for (int x = x0 >> MIN_TU_LOG2; x < x_end; x++) {
-                    const int off = y * fc->ps.pps->min_tu_width + x;
-                    if ((fc->tab.tb_pos_x0[is_chroma][off] >> MIN_TU_LOG2) == 
x && (fc->tab.tb_pos_y0[is_chroma][off] >> MIN_TU_LOG2) == y) {
-                        deblock_bs[is_chroma](lc, x << MIN_TU_LOG2, y << 
MIN_TU_LOG2,
-                            fc->tab.tb_width[is_chroma][off] << hs, 
fc->tab.tb_height[is_chroma][off] << vs, rs, vertical);
-                    }
+    for (const CodingUnit *cu = fc->tab.cus[rs]; cu; cu = cu->next) {
+        for (const TransformUnit *tu = cu->tus.head; tu; tu = tu->next) {
+            for (int vertical = 0; vertical <= 1; vertical++) {
+                if (tu->avail[LUMA])
+                    vvc_deblock_bs_luma(lc, tu->x0, tu->y0, tu->width, 
tu->height, cu, tu, rs, vertical);
+                if (tu->avail[CHROMA]) {
+                    if (cu->isp_split_type != ISP_NO_SPLIT && cu->tree_type == 
SINGLE_TREE)
+                        vvc_deblock_bs_chroma(lc, cu->x0, cu->y0, 
cu->cb_width, cu->cb_height, cu, tu, rs, vertical);
+                    else
+                        vvc_deblock_bs_chroma(lc, tu->x0, tu->y0, tu->width, 
tu->height, cu, tu, rs, vertical);
                 }
             }
         }
-- 
2.34.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to