[x265] [PATCH] encoder: Add support for Temporal Layering of the encoded bitstream

aarthi Mon, 02 Feb 2015 06:15:15 -0800

# HG changeset patch
# User Aarthi Thirumalai
# Date 1422885719 -19800
#      Mon Feb 02 19:31:59 2015 +0530
# Node ID e241b255b992fdf5c83c1744240ff5a6beaa188b
# Parent  1760823cdd46572b5db364cb93495bbff6908e17
encoder: Add support for Temporal Layering of the encoded bitstream.


use --temporal-layers to enable Temporal Sub Layers while encoding, signals NAL 
units of coded slices
with their temporalId. Output bitstreams can be extracted either at the base 
temporal layer
(layer 0) with roughly half the frame rate or at a higher temporal layer (layer 
1)
that decodes all the frames in the sequence.

diff -r 1760823cdd46 -r e241b255b992 doc/reST/cli.rst
--- a/doc/reST/cli.rst  Mon Feb 02 16:27:07 2015 +0530
+++ b/doc/reST/cli.rst  Mon Feb 02 19:31:59 2015 +0530
@@ -1048,15 +1048,6 @@
        target bitrate in CBR mode. Bitrate adherence is prioritised
        over quality. Rate tolerance is reduced to 50%. Default disabled.
        
-       This option is for use-cases which require the final average bitrate 
-       to be within very strict limits of the target - preventing overshoots 
-       completely, and achieve bitrates within 5% of target bitrate, 
-       especially in short segment encodes. Typically, the encoder stays 
-       conservative, waiting until there is enough feedback in terms of 
-       encoded frames to control QP. strict-cbr allows the encoder to be 
-       more aggressive in hitting the target bitrate even for short segment 
-       videos. Experimental.
-       
 .. option:: --cbqpoffs <integer>
 
        Offset of Cb chroma QP from the luma QP selected by rate control.
@@ -1097,6 +1088,12 @@
        The maximum single adjustment in QP allowed to rate control. Default
        4
 
+.. option:: --ratetol <float>
+
+       The degree of rate fluctuation that x265 tolerates. Rate tolerance
+       is used along with overflow (difference between actual and target
+       bitrate), to adjust qp. Default is 1.0
+
 .. option:: --qblur <float>
 
        Temporally blur quants. Default 0.5
@@ -1374,6 +1371,12 @@
        1. MD5
        2. CRC
        3. Checksum
+       
+.. option:: --temporal-layers,--no-temporal-layers
+
+       Enable Temporal Sub Layers in the bitstream and signal the temporal 
layer ids
+       in the VPS, SPS and coded slice NAL unit headers. As of now, 
+       maxTemporalSubLayers that can be enabled = 2 when this option is turned 
on. 
 
 Debugging options
 =================
diff -r 1760823cdd46 -r e241b255b992 source/common/param.cpp
--- a/source/common/param.cpp   Mon Feb 02 16:27:07 2015 +0530
+++ b/source/common/param.cpp   Mon Feb 02 19:31:59 2015 +0530
@@ -181,6 +181,7 @@
     param->bIntraInBFrames = 0;
     param->bLossless = 0;
     param->bCULossless = 0;
+    param->bEnableTemporalSubLayers = 1;
 
     /* Rate control options */
     param->rc.vbvMaxBitrate = 0;
@@ -806,6 +807,10 @@
     OPT("scaling-list") p->scalingLists = strdup(value);
     OPT("lambda-file") p->rc.lambdaFileName = strdup(value);
     OPT("analysis-file") p->analysisFileName = strdup(value);
+    OPT("temporal-layers")
+    {
+        p->bEnableTemporalSubLayers = atobool(value);
+    }
     else
         return X265_PARAM_BAD_NAME;
 #undef OPT
diff -r 1760823cdd46 -r e241b255b992 source/common/slice.h
--- a/source/common/slice.h     Mon Feb 02 16:27:07 2015 +0530
+++ b/source/common/slice.h     Mon Feb 02 19:31:59 2015 +0530
@@ -149,6 +149,7 @@
 
 struct VPS
 {
+    uint32_t         maxTempSubLayers;
     uint32_t         numReorderPics;
     uint32_t         maxDecPicBuffering;
     HRDInfo          hrdParameters;
@@ -228,6 +229,7 @@
     bool     bUseAMP; // use param
     uint32_t maxAMPDepth;
 
+    uint32_t maxTempSubLayers; // max number of Temporal Sub layers
     uint32_t maxDecPicBuffering; // these are dups of VPS values
     int      numReorderPics;
     int      maxLatencyIncrease;
diff -r 1760823cdd46 -r e241b255b992 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cpp        Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/encoder.cpp        Mon Feb 02 19:31:59 2015 +0530
@@ -1331,6 +1331,7 @@
     sps->bUseAMP = m_param->bEnableAMP;
     sps->maxAMPDepth = m_param->bEnableAMP ? g_maxCUDepth : 0;
 
+    sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
     sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
     sps->numReorderPics = m_vps.numReorderPics;
     sps->maxLatencyIncrease = m_param->bframes;
@@ -1540,6 +1541,16 @@
         p->bDistributeMotionEstimation = p->bDistributeModeAnalysis = 0;
     }
 
+    if (p->bEnableTemporalSubLayers)
+    {
+        if (p->bFrameAdaptive)
+            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed 
GOP structure, requires --b-adapt 0\n");
+        p->bFrameAdaptive = 0;
+        if (p->scenecutThreshold)
+            x265_log(p, X265_LOG_WARNING, "Scalable Video Coding needs fixed 
GOP structure, requires --scenecut 0\n");
+        p->scenecutThreshold = 0;
+    }
+
     m_bframeDelay = p->bframes ? (p->bBPyramid ? 2 : 1) : 0;
 
     p->bFrameBias = X265_MIN(X265_MAX(-90, p->bFrameBias), 100);
diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.cpp
--- a/source/encoder/entropy.cpp        Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/entropy.cpp        Mon Feb 02 19:31:59 2015 +0530
@@ -51,17 +51,20 @@
     WRITE_CODE(0,       4, "vps_video_parameter_set_id");
     WRITE_CODE(3,       2, "vps_reserved_three_2bits");
     WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
-    WRITE_CODE(0,       3, "vps_max_sub_layers_minus1");
-    WRITE_FLAG(1,          "vps_temporal_id_nesting_flag");
+    WRITE_CODE(vps.maxTempSubLayers - 1,       3, "vps_max_sub_layers_minus1");
+    WRITE_FLAG(vps.maxTempSubLayers == 1 ? 1 : 0, 
"vps_temporal_id_nesting_flag");
     WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
 
-    codeProfileTier(vps.ptl);
+    codeProfileTier(vps.ptl ,vps.maxTempSubLayers - 1);
 
     WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
-    WRITE_UVLC(vps.maxDecPicBuffering - 1, 
"vps_max_dec_pic_buffering_minus1[i]");
-    WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
+    for(uint32_t i = 0; i <= vps.maxTempSubLayers - 1; i++)
+    {
+        WRITE_UVLC(vps.maxDecPicBuffering - 1, 
"vps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
+        WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
+    }
 
-    WRITE_UVLC(0,    "vps_max_latency_increase_plus1[i]");
     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
     WRITE_UVLC(0,    "vps_max_op_sets_minus1");
     WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info 
in SPS-VUI */
@@ -71,10 +74,10 @@
 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const 
ProfileTierLevel& ptl)
 {
     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
-    WRITE_CODE(0, 3, "sps_max_sub_layers_minus1");
-    WRITE_FLAG(1,    "sps_temporal_id_nesting_flag");
+    WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
+    WRITE_FLAG(sps.maxTempSubLayers == 1 ? 1 : 0, 
"sps_temporal_id_nesting_flag");
 
-    codeProfileTier(ptl);
+    codeProfileTier(ptl, sps.maxTempSubLayers - 1);
 
     WRITE_UVLC(0, "sps_seq_parameter_set_id");
     WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
@@ -101,9 +104,12 @@
     WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
 
-    WRITE_UVLC(sps.maxDecPicBuffering - 1, 
"sps_max_dec_pic_buffering_minus1[i]");
-    WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
-    WRITE_UVLC(sps.maxLatencyIncrease + 1, 
"sps_max_latency_increase_plus1[i]");
+    for(uint32_t i = 0; i <= sps.maxTempSubLayers - 1; i++)
+    {
+        WRITE_UVLC(sps.maxDecPicBuffering - 1, 
"sps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
+        WRITE_UVLC(sps.maxLatencyIncrease + 1, 
"sps_max_latency_increase_plus1[i]");
+    }
 
     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    
"log2_min_coding_block_size_minus3");
     WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, 
"log2_diff_max_min_coding_block_size");
@@ -184,7 +190,7 @@
     WRITE_FLAG(0, "pps_extension_flag");
 }
 
-void Entropy::codeProfileTier(const ProfileTierLevel& ptl)
+void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int 
maxTempSubLayerMinus1)
 {
     WRITE_CODE(0, 2,                "XXX_profile_space[]");
     WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
@@ -222,6 +228,13 @@
     }
 
     WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
+    if (maxTempSubLayerMinus1 > 0)
+    {
+      WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
+      WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
+      for (int i = maxTempSubLayerMinus1; i < 8 ; i++)
+          WRITE_CODE(0, 2, "reserved_zero_2bits");
+    }
 }
 
 void Entropy::codeVUI(const VUI& vui)
@@ -331,24 +344,27 @@
 
 void Entropy::codeHrdParameters(const HRDInfo& hrd)
 {
-    WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
-    WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
-    WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
+    for(int i = 0; i <= 1; i++)
+    {
+        WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
+        WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
+        WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
 
-    WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
-    WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
+        WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
+        WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
 
-    WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, 
"initial_cpb_removal_delay_length_minus1");
-    WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, 
"au_cpb_removal_delay_length_minus1");
-    WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, 
"dpb_output_delay_length_minus1");
+        WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, 
"initial_cpb_removal_delay_length_minus1");
+        WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, 
"au_cpb_removal_delay_length_minus1");
+        WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, 
"dpb_output_delay_length_minus1");
 
-    WRITE_FLAG(1, "fixed_pic_rate_general_flag");
-    WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
-    WRITE_UVLC(0, "cpb_cnt_minus1");
+        WRITE_FLAG(1, "fixed_pic_rate_general_flag");
+        WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
+        WRITE_UVLC(0, "cpb_cnt_minus1");
 
-    WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
-    WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
-    WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+        WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
+        WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
+        WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
+    }
 }
 
 void Entropy::codeAUD(const Slice& slice)
diff -r 1760823cdd46 -r e241b255b992 source/encoder/entropy.h
--- a/source/encoder/entropy.h  Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/entropy.h  Mon Feb 02 19:31:59 2015 +0530
@@ -230,7 +230,7 @@
     void writeEpExGolomb(uint32_t symbol, uint32_t count);
     void writeCoefRemainExGolomb(uint32_t symbol, const uint32_t absGoRice);
 
-    void codeProfileTier(const ProfileTierLevel& ptl);
+    void codeProfileTier(const ProfileTierLevel& ptl, int 
maxTempSubLayerMinus1);
     void codeScalingList(const ScalingList&);
     void codeScalingList(const ScalingList& scalingList, uint32_t sizeId, 
uint32_t listId);
 
diff -r 1760823cdd46 -r e241b255b992 source/encoder/level.cpp
--- a/source/encoder/level.cpp  Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/level.cpp  Mon Feb 02 19:31:59 2015 +0530
@@ -60,6 +60,7 @@
 /* determine minimum decoder level required to decode the described video */
 void determineLevel(const x265_param &param, VPS& vps)
 {
+    vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
     if (param.bLossless)
         vps.ptl.profileIdc = Profile::NONE;
     else if (param.internalCsp == X265_CSP_I420)
diff -r 1760823cdd46 -r e241b255b992 source/encoder/nal.cpp
--- a/source/encoder/nal.cpp    Mon Feb 02 16:27:07 2015 +0530
+++ b/source/encoder/nal.cpp    Mon Feb 02 19:31:59 2015 +0530
@@ -107,6 +107,9 @@
      * nuh_reserved_zero_6bits  6-bits
      * nuh_temporal_id_plus1    3-bits */
     out[bytes++] = (uint8_t)nalUnitType << 1;
+    if (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N)
+        out[bytes++] = 2;
+    else
     out[bytes++] = 1;
 
     /* 7.4.1 ...
diff -r 1760823cdd46 -r e241b255b992 source/x265.h
--- a/source/x265.h     Mon Feb 02 16:27:07 2015 +0530
+++ b/source/x265.h     Mon Feb 02 19:31:59 2015 +0530
@@ -789,6 +789,12 @@
      * CU. */
     int       bCULossless;
 
+    /* Enable Temporal Sub Layers while encoding, signals NAL units of coded 
slices
+     * with their temporalId. Output bitstreams can be extracted either at the 
base temporal layer
+     * (layer 0) with roughly half the frame rate or at a higher temporal 
layer (layer 1)
+     * that decodes all the frames in the sequence. */
+    int       bEnableTemporalSubLayers;
+
     /*== Rate Control ==*/
 
     struct
diff -r 1760823cdd46 -r e241b255b992 source/x265cli.h
--- a/source/x265cli.h  Mon Feb 02 16:27:07 2015 +0530
+++ b/source/x265cli.h  Mon Feb 02 19:31:59 2015 +0530
@@ -193,6 +193,7 @@
     { "analysis-mode",  required_argument, NULL, 0 },
     { "analysis-file",  required_argument, NULL, 0 },
     { "strict-cbr",           no_argument, NULL, 0 },
+    { "temporal-layers",        no_argument, NULL, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 },
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

[x265] [PATCH] encoder: Add support for Temporal Layering of the encoded bitstream

Reply via email to