[Libva] [PATCH intel-driver v2 3/8] vpp: factor out calculation of AVS coefficients.

Gwenole Beauchesne Tue, 28 Oct 2014 10:53:03 -0700

Split calculation of AVS coefficients into separate helper functions
in view to supporting alternate algorithms, but we stick to bilinear
interpolation for now.


Actually fix bilinear filtering coefficients if the sharp 8x8 filter
is used. This is still disabled by default though.

Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com>
---
 src/Makefile.am            |   2 +
 src/gen8_post_processing.c | 101 +++++++++++++--------
 src/i965_post_processing.c | 218 ++++++++++++++++++++++++++++++---------------
 src/i965_vpp_avs.c         | 110 +++++++++++++++++++++++
 src/i965_vpp_avs.h         |  85 ++++++++++++++++++
 5 files changed, 408 insertions(+), 108 deletions(-)
 create mode 100644 src/i965_vpp_avs.c
 create mode 100644 src/i965_vpp_avs.h

diff --git a/src/Makefile.am b/src/Makefile.am
index acfa849..3f02391 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -81,6 +81,7 @@ source_c = \
        i965_post_processing.c  \
        gen8_post_processing.c  \
        i965_render.c           \
+       i965_vpp_avs.c          \
        gen8_render.c           \
        intel_batchbuffer.c     \
        intel_batchbuffer_dump.c\
@@ -117,6 +118,7 @@ source_h = \
        i965_post_processing.h  \
        i965_render.h           \
        i965_structs.h          \
+       i965_vpp_avs.h          \
        intel_batchbuffer.h     \
        intel_batchbuffer_dump.h\
        intel_compiler.h        \
diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
index eeae32f..d36c061 100644
--- a/src/gen8_post_processing.c
+++ b/src/gen8_post_processing.c
@@ -39,6 +39,7 @@
 #include "i965_drv_video.h"
 #include "i965_post_processing.h"
 #include "i965_render.h"
+#include "i965_vpp_avs.h"
 #include "intel_media.h"
 
 #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
@@ -739,6 +740,14 @@ static void 
gen7_update_src_surface_uv_offset(VADriverContextP    ctx,
     }
 }
 
+static const AVSConfig gen8_avs_config = {
+    .coeff_frac_bits = 6,
+    .coeff_epsilon = 1.0f / (1U << 6),
+    .num_phases = 16,
+    .num_luma_coeffs = 8,
+    .num_chroma_coeffs = 4,
+};
+
 static VAStatus
 gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_context *pp_context,
                            const struct i965_surface *src_surface,
@@ -755,6 +764,8 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_con
     int width[3], height[3], pitch[3], offset[3];
     int src_width, src_height;
     unsigned char *cc_ptr;
+    AVSState avs;
+    float sx, sy;
 
     memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
 
@@ -868,55 +879,75 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_con
     sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
     sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
 
-    for (i = 0; i < 17; i++) {
-        struct gen8_sampler_8x8_avs_coefficients * const sampler_8x8_state =
-            &sampler_8x8->coefficients[i];
-
-       float coff;
-       coff = i;
-       coff = coff / 16;
+    avs_init_state(&avs, &gen8_avs_config);
 
-        sampler_8x8_state->dw0.table_0x_filter_c0 = 0;
-        sampler_8x8_state->dw0.table_0y_filter_c0 = 0;
-        sampler_8x8_state->dw0.table_0x_filter_c1 = 0;
-        sampler_8x8_state->dw0.table_0y_filter_c1 = 0;
+    sx = (float)dst_rect->width / src_rect->width;
+    sy = (float)dst_rect->height / src_rect->height;
+    avs_update_coefficients(&avs, sx, sy, 0);
 
-        sampler_8x8_state->dw1.table_0x_filter_c2 = 0;
-        sampler_8x8_state->dw1.table_0y_filter_c2 = 0;
+    assert(avs.config->num_phases == 16);
+    for (i = 0; i <= 16; i++) {
+        struct gen8_sampler_8x8_avs_coefficients * const sampler_8x8_state =
+            &sampler_8x8->coefficients[i];
+        const AVSCoeffs * const coeffs = &avs.coeffs[i];
+
+        sampler_8x8_state->dw0.table_0x_filter_c0 =
+            intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
+        sampler_8x8_state->dw0.table_0y_filter_c0 =
+            intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
+        sampler_8x8_state->dw0.table_0x_filter_c1 =
+            intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
+        sampler_8x8_state->dw0.table_0y_filter_c1 =
+            intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
+
+        sampler_8x8_state->dw1.table_0x_filter_c2 =
+            intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
+        sampler_8x8_state->dw1.table_0y_filter_c2 =
+            intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
         sampler_8x8_state->dw1.table_0x_filter_c3 =
-            intel_format_convert(1 - coff, 1, 6, 0);
+            intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
         sampler_8x8_state->dw1.table_0y_filter_c3 =
-            intel_format_convert(1 - coff, 1, 6, 0);
+            intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
 
         sampler_8x8_state->dw2.table_0x_filter_c4 =
-            intel_format_convert(coff, 1, 6, 0);
+            intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
         sampler_8x8_state->dw2.table_0y_filter_c4 =
-            intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->dw2.table_0x_filter_c5 = 0;
-        sampler_8x8_state->dw2.table_0y_filter_c5 = 0;
-
-        sampler_8x8_state->dw3.table_0x_filter_c6 = 0;
-        sampler_8x8_state->dw3.table_0y_filter_c6 = 0;
-        sampler_8x8_state->dw3.table_0x_filter_c7 = 0;
-        sampler_8x8_state->dw3.table_0y_filter_c7 = 0;
+            intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
+        sampler_8x8_state->dw2.table_0x_filter_c5 =
+            intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
+        sampler_8x8_state->dw2.table_0y_filter_c5 =
+            intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
+
+        sampler_8x8_state->dw3.table_0x_filter_c6 =
+            intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
+        sampler_8x8_state->dw3.table_0y_filter_c6 =
+            intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
+        sampler_8x8_state->dw3.table_0x_filter_c7 =
+            intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
+        sampler_8x8_state->dw3.table_0y_filter_c7 =
+            intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
 
         sampler_8x8_state->dw4.pad0 = 0;
         sampler_8x8_state->dw5.pad0 = 0;
-        sampler_8x8_state->dw4.table_1x_filter_c2 = 0;
+        sampler_8x8_state->dw4.table_1x_filter_c2 =
+            intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
         sampler_8x8_state->dw4.table_1x_filter_c3 =
-            intel_format_convert(1 - coff, 1, 6, 0);
+            intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
         sampler_8x8_state->dw5.table_1x_filter_c4 =
-            intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->dw5.table_1x_filter_c5 = 0;
-
-        sampler_8x8_state->dw6.pad0 = 0;
-        sampler_8x8_state->dw7.pad0 = 0;
-        sampler_8x8_state->dw6.table_1y_filter_c2 = 0;
+            intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
+        sampler_8x8_state->dw5.table_1x_filter_c5 =
+            intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
+
+        sampler_8x8_state->dw6.pad0 =
+        sampler_8x8_state->dw7.pad0 =
+        sampler_8x8_state->dw6.table_1y_filter_c2 =
+            intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
         sampler_8x8_state->dw6.table_1y_filter_c3 =
-            intel_format_convert(1 - coff, 1, 6, 0);
+            intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
         sampler_8x8_state->dw7.table_1y_filter_c4 =
-            intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->dw7.table_1y_filter_c5 = 0;
+            intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
+        sampler_8x8_state->dw7.table_1y_filter_c5 =
+            intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
     }
 
     sampler_8x8->dw152.default_sharpness_level = 0;
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
index 4d3c9b1..12cd1ae 100755
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -38,6 +38,7 @@
 #include "i965_drv_video.h"
 #include "i965_post_processing.h"
 #include "i965_render.h"
+#include "i965_vpp_avs.h"
 #include "intel_media.h"
 
 extern VAStatus
@@ -2363,6 +2364,22 @@ pp_avs_set_block_parameter(struct 
i965_post_processing_context *pp_context, int
     return 0;
 }
 
+static const AVSConfig gen5_avs_config = {
+    .coeff_frac_bits = 6,
+    .coeff_epsilon = 1.0f / (1U << 6),
+    .num_phases = 16,
+    .num_luma_coeffs = 8,
+    .num_chroma_coeffs = 4,
+};
+
+static const AVSConfig gen6_avs_config = {
+    .coeff_frac_bits = 6,
+    .coeff_epsilon = 1.0f / (1U << 6),
+    .num_phases = 16,
+    .num_luma_coeffs = 8,
+    .num_chroma_coeffs = 4,
+};
+
 static VAStatus
 pp_nv12_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_context *pp_context,
                        const struct i965_surface *src_surface,
@@ -2372,6 +2389,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_context
                        void *filter_param,
                        int nlas)
 {
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
     struct pp_avs_context *pp_avs_context = (struct pp_avs_context 
*)&pp_context->pp_avs_context;
     struct pp_inline_parameter *pp_inline_parameter = 
pp_context->pp_inline_parameter;
     struct pp_static_parameter *pp_static_parameter = 
pp_context->pp_static_parameter;
@@ -2382,6 +2400,8 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_context
     int in_w, in_h, in_wpitch, in_hpitch;
     int out_w, out_h, out_wpitch, out_hpitch;
     int i;
+    AVSState avs;
+    float sx, sy;
 
     /* surface */
     obj_surface = (struct object_surface *)src_surface->base;
@@ -2433,41 +2453,68 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_context
     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
 
-    for (i = 0; i < 17; i++) {
-        /* for Y channel, currently ignore */
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
-        /* for U/V channel, 0.25 */
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+    avs_init_state(&avs, IS_GEN6(i965->intel.device_info) ? &gen6_avs_config :
+        &gen5_avs_config);
+
+    sx = (float)dst_rect->width / src_rect->width;
+    sy = (float)dst_rect->height / src_rect->height;
+    avs_update_coefficients(&avs, sx, sy, 0);
+
+    assert(avs.config->num_phases == 16);
+    for (i = 0; i <= 16; i++) {
+        const AVSCoeffs * const coeffs = &avs.coeffs[i];
+
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
+            intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
+            intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
+            intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
+            intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
+            intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
+            intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
+            intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
+            intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
+            intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
+            intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
+            intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
+            intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
+            intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
+            intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
+            intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
+            intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
+            intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
+            intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
+            intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
+            intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
+            intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
+            intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
+            intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
+            intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
     }
 
     /* Adaptive filter for all channels (DW4.15) */
@@ -2742,6 +2789,8 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_con
     int index, i;
     int width[3], height[3], pitch[3], offset[3];
     int src_width, src_height;
+    AVSState avs;
+    float sx, sy;
 
     /* source surface */
     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
@@ -2762,44 +2811,67 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct 
i965_post_processing_con
     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
 
-    for (i = 0; i < 17; i++) {
-       float coff;
-       coff = i;
-       coff = coff / 16;
-        /* for Y channel, currently ignore */
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
-        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 
intel_format_convert(1 - coff, 1, 6,0);
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 
intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
-        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 
intel_format_convert(1 - coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 
intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
-        /* for U/V channel, 0.25 */
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
-        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 
intel_format_convert(1 - coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 
intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
-        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 
intel_format_convert(1 - coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 
intel_format_convert(coff, 1, 6, 0);
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
-        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
+    avs_init_state(&avs, &gen6_avs_config);
+
+    sx = (float)dst_rect->width / src_rect->width;
+    sy = (float)dst_rect->height / src_rect->height;
+    avs_update_coefficients(&avs, sx, sy, 0);
+
+    assert(avs.config->num_phases == 16);
+    for (i = 0; i <= 16; i++) {
+        const AVSCoeffs * const coeffs = &avs.coeffs[i];
+
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
+            intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 =
+            intel_format_convert(coeffs->y_k_h[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 =
+            intel_format_convert(coeffs->y_k_h[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 =
+            intel_format_convert(coeffs->y_k_h[3], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 =
+            intel_format_convert(coeffs->y_k_h[4], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 =
+            intel_format_convert(coeffs->y_k_h[5], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 =
+            intel_format_convert(coeffs->y_k_h[6], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 =
+            intel_format_convert(coeffs->y_k_h[7], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 =
+            intel_format_convert(coeffs->uv_k_h[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 =
+            intel_format_convert(coeffs->uv_k_h[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 =
+            intel_format_convert(coeffs->uv_k_h[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 =
+            intel_format_convert(coeffs->uv_k_h[3], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 =
+            intel_format_convert(coeffs->y_k_v[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 =
+            intel_format_convert(coeffs->y_k_v[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 =
+            intel_format_convert(coeffs->y_k_v[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 =
+            intel_format_convert(coeffs->y_k_v[3], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 =
+            intel_format_convert(coeffs->y_k_v[4], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 =
+            intel_format_convert(coeffs->y_k_v[5], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 =
+            intel_format_convert(coeffs->y_k_v[6], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 =
+            intel_format_convert(coeffs->y_k_v[7], 1, 6, 1);
+
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 =
+            intel_format_convert(coeffs->uv_k_v[0], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 =
+            intel_format_convert(coeffs->uv_k_v[1], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 =
+            intel_format_convert(coeffs->uv_k_v[2], 1, 6, 1);
+        sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 =
+            intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
     }
 
     sampler_8x8_state->dw136.default_sharpness_level = 0;
diff --git a/src/i965_vpp_avs.c b/src/i965_vpp_avs.c
new file mode 100644
index 0000000..12f1dfe
--- /dev/null
+++ b/src/i965_vpp_avs.c
@@ -0,0 +1,110 @@
+/*
+ * i965_vpp_avs.c - Adaptive Video Scaler (AVS) block
+ *
+ * Copyright (C) 2014 Intel Corporation
+ *   Author: Gwenole Beauchesne <gwenole.beauche...@intel.com>
+ *
+ * The source code contained or described herein and all documents related to
+ * the source code ("Material") are owned by Intel Corporation or its suppliers
+ * or licensors. Title to the Material remains with Intel Corporation or its
+ * suppliers and licensors. The Material contains trade secrets and proprietary
+ * and confidential information of Intel or its suppliers and licensors. The
+ * Material is protected by worldwide copyright and trade secret laws and
+ * treaty provisions. No part of the Material may be used, copied, reproduced,
+ * modified, published, uploaded, posted, transmitted, distributed, or
+ * disclosed in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other intellectual
+ * property right is granted to or conferred upon you by disclosure or delivery
+ * of the Materials, either expressly, by implication, inducement, estoppel or
+ * otherwise. Any license under such intellectual property rights must be
+ * express and approved by Intel in writing.
+ */
+
+#include "sysdeps.h"
+#include <math.h>
+#include <va/va.h>
+#include "i965_vpp_avs.h"
+
+typedef void (*AVSGenCoeffsFunc)(float *coeffs, int num_coeffs, int phase,
+    int num_phases, float f);
+
+/* Initializes all coefficients to zero */
+static void
+avs_init_coeffs(float *coeffs, int num_coeffs)
+{
+#if defined(__STDC_IEC_559__) && (__STDC_IEC_559__ > 0)
+    memset(coeffs, 0, num_coeffs * sizeof(*coeffs));
+#else
+    int i;
+
+    for (i = 0; i < num_coeffs; i++)
+        coeffs[i] = 0.0f;
+#endif
+}
+
+/* Convolution kernel for linear interpolation */
+static float
+avs_kernel_linear(float x)
+{
+    const float abs_x = fabsf(x);
+
+    return abs_x < 1.0f ? 1 - abs_x : 0.0f;
+}
+
+/* Generate coefficients for default quality (bilinear) */
+static void
+avs_gen_coeffs_linear(float *coeffs, int num_coeffs, int phase, int num_phases,
+    float f)
+{
+    const int c = num_coeffs/2 - 1;
+    const float p = (float)phase / (num_phases*2);
+
+    avs_init_coeffs(coeffs, num_coeffs);
+    coeffs[c] = avs_kernel_linear(p);
+    coeffs[c + 1] = avs_kernel_linear(p - 1);
+}
+
+/* Generate coefficients with the supplied scaler */
+static void
+avs_gen_coeffs(AVSState *avs, float sx, float sy, AVSGenCoeffsFunc gen_coeffs)
+{
+    const AVSConfig * const config = avs->config;
+    int i;
+
+    for (i = 0; i <= config->num_phases; i++) {
+        AVSCoeffs * const coeffs = &avs->coeffs[i];
+
+        gen_coeffs(coeffs->y_k_h, config->num_luma_coeffs,
+            i, config->num_phases, sx);
+        gen_coeffs(coeffs->uv_k_h, config->num_chroma_coeffs,
+            i, config->num_phases, sx);
+        gen_coeffs(coeffs->y_k_v, config->num_luma_coeffs,
+            i, config->num_phases, sy);
+        gen_coeffs(coeffs->uv_k_v, config->num_chroma_coeffs,
+            i, config->num_phases, sy);
+    }
+}
+
+/* Initializes AVS state with the supplied configuration */
+void
+avs_init_state(AVSState *avs, const AVSConfig *config)
+{
+    avs->config = config;
+}
+
+/* Updates AVS coefficients for the supplied factors and quality level */
+bool
+avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags)
+{
+    AVSGenCoeffsFunc gen_coeffs;
+
+    flags &= VA_FILTER_SCALING_MASK;
+    switch (flags) {
+    default:
+        gen_coeffs = avs_gen_coeffs_linear;
+        break;
+    }
+    avs_gen_coeffs(avs, sx, sy, gen_coeffs);
+    return true;
+}
diff --git a/src/i965_vpp_avs.h b/src/i965_vpp_avs.h
new file mode 100644
index 0000000..0938c4d
--- /dev/null
+++ b/src/i965_vpp_avs.h
@@ -0,0 +1,85 @@
+/*
+ * i965_vpp_avs.h - Adaptive Video Scaler (AVS) block
+ *
+ * Copyright (C) 2014 Intel Corporation
+ *   Author: Gwenole Beauchesne <gwenole.beauche...@intel.com>
+ *
+ * The source code contained or described herein and all documents related to
+ * the source code ("Material") are owned by Intel Corporation or its suppliers
+ * or licensors. Title to the Material remains with Intel Corporation or its
+ * suppliers and licensors. The Material contains trade secrets and proprietary
+ * and confidential information of Intel or its suppliers and licensors. The
+ * Material is protected by worldwide copyright and trade secret laws and
+ * treaty provisions. No part of the Material may be used, copied, reproduced,
+ * modified, published, uploaded, posted, transmitted, distributed, or
+ * disclosed in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other intellectual
+ * property right is granted to or conferred upon you by disclosure or delivery
+ * of the Materials, either expressly, by implication, inducement, estoppel or
+ * otherwise. Any license under such intellectual property rights must be
+ * express and approved by Intel in writing.
+ */
+
+#ifndef I965_VPP_AVS_H
+#define I965_VPP_AVS_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+/** Maximum number of phases for the sharp filter */
+#define AVS_MAX_PHASES 16
+
+/** Maximum number of coefficients for luma samples */
+#define AVS_MAX_LUMA_COEFFS 8
+
+/** Maximum number of coefficients for chroma samples */
+#define AVS_MAX_CHROMA_COEFFS 4
+
+typedef struct avs_coeffs               AVSCoeffs;
+typedef struct avs_config               AVSConfig;
+typedef struct avs_state                AVSState;
+
+/** AVS coefficients for one phase */
+struct avs_coeffs {
+    /** Coefficients for luma samples on the X-axis (horizontal) */
+    float y_k_h[AVS_MAX_LUMA_COEFFS];
+    /** Coefficients for luma samples on the Y-axis (vertical) */
+    float y_k_v[AVS_MAX_LUMA_COEFFS];
+    /** Coefficients for chroma samples on the X-axis (horizontal) */
+    float uv_k_h[AVS_MAX_CHROMA_COEFFS];
+    /** Coefficients for chroma samples on the Y-axis (vertical) */
+    float uv_k_v[AVS_MAX_CHROMA_COEFFS];
+};
+
+/** Static configuration (per-generation) */
+struct avs_config {
+    /** Number of bits used for the fractional part of a coefficient */
+    int coeff_frac_bits;
+    /** The smallest float that could be represented as a coefficient */
+    float coeff_epsilon;
+    /** Number of phases for the sharp filter */
+    int num_phases;
+    /** Number of coefficients for luma samples */
+    int num_luma_coeffs;
+    /** Number of coefficients for chroma samples */
+    int num_chroma_coeffs;
+};
+
+/** AVS block state */
+struct avs_state {
+    /** Per-generation configuration parameters */
+    const AVSConfig *config;
+    /** Coefficients for the polyphase scaler */
+    AVSCoeffs coeffs[AVS_MAX_PHASES + 1];
+};
+
+/** Initializes AVS state with the supplied configuration */
+void
+avs_init_state(AVSState *avs, const AVSConfig *config);
+
+/** Updates AVS coefficients for the supplied factors and quality level */
+bool
+avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags);
+
+#endif /* I965_VPP_AVS_H */
-- 
1.9.1

_______________________________________________
Libva mailing list
Libva@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libva

[Libva] [PATCH intel-driver v2 3/8] vpp: factor out calculation of AVS coefficients.

Reply via email to