From 74cae80fbbfb7c098ed58f76c84acf60a384404e Mon Sep 17 00:00:00 2001
From: Keshav E <keshav@multicorewareinc.com>
Date: Fri, 16 Dec 2022 19:03:10 +0530
Subject: [PATCH] Add VPS and SPS sub layer information to the bitstream

---
 source/common/slice.h           | 12 ++++-----
 source/encoder/dpb.cpp          |  2 +-
 source/encoder/encoder.cpp      | 12 ++++++---
 source/encoder/entropy.cpp      | 19 +++++++------
 source/encoder/frameencoder.cpp |  6 ++---
 source/encoder/level.cpp        | 47 +++++++++++++++++++++++++++------
 source/encoder/nal.cpp          |  4 +--
 source/encoder/nal.h            |  2 +-
 source/encoder/ratecontrol.cpp  |  2 +-
 source/encoder/sei.cpp          |  2 +-
 10 files changed, 73 insertions(+), 35 deletions(-)

diff --git a/source/common/slice.h b/source/common/slice.h
index 9b7cb0bed..f6718ee95 100644
--- a/source/common/slice.h
+++ b/source/common/slice.h
@@ -156,9 +156,9 @@ struct VPS
     HRDInfo          hrdParameters;
     ProfileTierLevel ptl;
     uint32_t         maxTempSubLayers;
-    uint32_t         numReorderPics;
-    uint32_t         maxDecPicBuffering;
-    uint32_t         maxLatencyIncrease;
+    uint32_t         numReorderPics[MAX_T_LAYERS];
+    uint32_t         maxDecPicBuffering[MAX_T_LAYERS];
+    uint32_t         maxLatencyIncrease[MAX_T_LAYERS];
 };
 
 struct Window
@@ -235,9 +235,9 @@ struct SPS
     uint32_t maxAMPDepth;
 
     uint32_t maxTempSubLayers;   // max number of Temporal Sub layers
-    uint32_t maxDecPicBuffering; // these are dups of VPS values
-    uint32_t maxLatencyIncrease;
-    int      numReorderPics;
+    uint32_t maxDecPicBuffering[MAX_T_LAYERS]; // these are dups of VPS values
+    uint32_t maxLatencyIncrease[MAX_T_LAYERS];
+    int      numReorderPics[MAX_T_LAYERS];
 
     RPS      spsrps[MAX_NUM_SHORT_TERM_RPS];
     int      spsrpsNum;
diff --git a/source/encoder/dpb.cpp b/source/encoder/dpb.cpp
index 79274f5dd..bfe6f2290 100644
--- a/source/encoder/dpb.cpp
+++ b/source/encoder/dpb.cpp
@@ -179,7 +179,7 @@ void DPB::prepareEncode(Frame *newFrame)
     // Do decoding refresh marking if any
     decodingRefreshMarking(pocCurr, slice->m_nalUnitType);
 
-    computeRPS(pocCurr, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering);
+    computeRPS(pocCurr, slice->isIRAP(), &slice->m_rps, slice->m_sps->maxDecPicBuffering[newFrame->m_tempLayer]);
 
     // Mark pictures in m_piclist as unreferenced if they are not included in RPS
     applyReferencePictureSet(&slice->m_rps, pocCurr);
diff --git a/source/encoder/encoder.cpp b/source/encoder/encoder.cpp
index b2a8ab838..91c0ae24f 100644
--- a/source/encoder/encoder.cpp
+++ b/source/encoder/encoder.cpp
@@ -3349,10 +3349,14 @@ void Encoder::initSPS(SPS *sps)
     sps->bUseAMP = m_param->bEnableAMP;
     sps->maxAMPDepth = m_param->bEnableAMP ? m_param->maxCUDepth : 0;
 
-    sps->maxTempSubLayers = m_param->bEnableTemporalSubLayers ? 2 : 1;
-    sps->maxDecPicBuffering = m_vps.maxDecPicBuffering;
-    sps->numReorderPics = m_vps.numReorderPics;
-    sps->maxLatencyIncrease = m_vps.maxLatencyIncrease = m_param->bframes;
+    sps->maxTempSubLayers = m_vps.maxTempSubLayers;// Getting the value from the user
+
+    for(uint8_t i = 0; i < sps->maxTempSubLayers; i++)
+    {
+        sps->maxDecPicBuffering[i] = m_vps.maxDecPicBuffering[i];
+        sps->numReorderPics[i] = m_vps.numReorderPics[i];
+        sps->maxLatencyIncrease[i] = m_vps.maxLatencyIncrease[i] = m_param->bframes;
+    }
 
     sps->bUseStrongIntraSmoothing = m_param->bEnableStrongIntraSmoothing;
     sps->bTemporalMVPEnabled = m_param->bEnableTemporalMvp;
diff --git a/source/encoder/entropy.cpp b/source/encoder/entropy.cpp
index 201056220..18de2789e 100644
--- a/source/encoder/entropy.cpp
+++ b/source/encoder/entropy.cpp
@@ -245,9 +245,9 @@ void Entropy::codeVPS(const VPS& vps)
 
     for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
     {
-        WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
-        WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
-        WRITE_UVLC(vps.maxLatencyIncrease + 1, "vps_max_latency_increase_plus1[i]");
+        WRITE_UVLC(vps.maxDecPicBuffering[i] - 1, "vps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(vps.numReorderPics[i],         "vps_num_reorder_pics[i]");
+        WRITE_UVLC(vps.maxLatencyIncrease[i] + 1, "vps_max_latency_increase_plus1[i]");
     }
 
     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
@@ -291,9 +291,9 @@ void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const Prof
 
     for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
     {
-        WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
-        WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
-        WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
+        WRITE_UVLC(sps.maxDecPicBuffering[i] - 1, "sps_max_dec_pic_buffering_minus1[i]");
+        WRITE_UVLC(sps.numReorderPics[i],         "sps_num_reorder_pics[i]");
+        WRITE_UVLC(sps.maxLatencyIncrease[i] + 1, "sps_max_latency_increase_plus1[i]");
     }
 
     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
@@ -418,8 +418,11 @@ void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers)
 
     if (maxTempSubLayers > 1)
     {
-         WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
-         WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
+        for(int i = 0; i < maxTempSubLayers - 1; i++)
+        {
+            WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
+            WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
+        }
          for (int i = maxTempSubLayers - 1; i < 8 ; i++)
              WRITE_CODE(0, 2, "reserved_zero_2bits");
     }
diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp
index 1ce0d393c..659b87c89 100644
--- a/source/encoder/frameencoder.cpp
+++ b/source/encoder/frameencoder.cpp
@@ -777,7 +777,7 @@ void FrameEncoder::compressFrame()
             // wait after removal of the access unit with the most recent
             // buffering period SEI message
             sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
-            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics + poc - m_rce.encodeOrder;
+            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics[m_frame->m_tempLayer] + poc - m_rce.encodeOrder;
         }
 
         sei->writeSEImessages(m_bs, *slice->m_sps, NAL_UNIT_PREFIX_SEI, m_nalList, m_param->bSingleSeiNal);
@@ -1098,7 +1098,7 @@ void FrameEncoder::compressFrame()
             
             m_bs.writeByteAlignment();
 
-            m_nalList.serialize(slice->m_nalUnitType, m_bs);
+            m_nalList.serialize(slice->m_nalUnitType, m_bs, (!!m_param->bEnableTemporalSubLayers ? m_frame->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
         }
     }
     else
@@ -1119,7 +1119,7 @@ void FrameEncoder::compressFrame()
             m_entropyCoder.codeSliceHeaderWPPEntryPoints(m_substreamSizes, (slice->m_sps->numCuInHeight - 1), maxStreamSize);
         m_bs.writeByteAlignment();
 
-        m_nalList.serialize(slice->m_nalUnitType, m_bs);
+        m_nalList.serialize(slice->m_nalUnitType, m_bs, (!!m_param->bEnableTemporalSubLayers ? m_frame->m_tempLayer + 1 : (1 + (slice->m_nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N))));
     }
 
     if (m_param->decodedPictureHashSEI)
diff --git a/source/encoder/level.cpp b/source/encoder/level.cpp
index 8934a26b3..b224a5d3f 100644
--- a/source/encoder/level.cpp
+++ b/source/encoder/level.cpp
@@ -72,7 +72,7 @@ void determineLevel(const x265_param &param, VPS& vps)
      * for intra-only profiles (vps.ptl.intraConstraintFlag) */
     vps.ptl.lowerBitRateConstraintFlag = true;
 
-    vps.maxTempSubLayers = param.bEnableTemporalSubLayers ? 2 : 1;
+    vps.maxTempSubLayers = !!param.bEnableTemporalSubLayers ? param.bEnableTemporalSubLayers : 1;
     
     if (param.internalCsp == X265_CSP_I420 && param.internalBitDepth <= 10)
     {
@@ -167,7 +167,7 @@ void determineLevel(const x265_param &param, VPS& vps)
 
         /* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than
          * or equal to MaxDpbSize */
-        if (vps.maxDecPicBuffering > maxDpbSize)
+        if (vps.maxDecPicBuffering[vps.maxTempSubLayers - 1] > maxDpbSize)
             continue;
 
         /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
@@ -182,8 +182,8 @@ void determineLevel(const x265_param &param, VPS& vps)
         }
 
         /* The value of NumPocTotalCurr shall be less than or equal to 8 */
-        int numPocTotalCurr = param.maxNumReferences + vps.numReorderPics;
-        if (numPocTotalCurr > 8)
+        int numPocTotalCurr = param.maxNumReferences + vps.numReorderPics[vps.maxTempSubLayers - 1];
+        if (numPocTotalCurr > 10)
         {
             x265_log(&param, X265_LOG_WARNING, "level %s detected, but NumPocTotalCurr (total references) is non-compliant\n", levels[i].name);
             vps.ptl.profileIdc = Profile::NONE;
@@ -289,9 +289,40 @@ void determineLevel(const x265_param &param, VPS& vps)
  * circumstances it will be quite noisy */
 bool enforceLevel(x265_param& param, VPS& vps)
 {
-    vps.numReorderPics = (param.bBPyramid && param.bframes > 1) ? 2 : !!param.bframes;
-    vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 2, (uint32_t)param.maxNumReferences) + 1);
+    vps.maxTempSubLayers = !!param.bEnableTemporalSubLayers ? param.bEnableTemporalSubLayers : 1;
+    for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
+    {
+        vps.numReorderPics[i] = (i == 0) ? ((param.bBPyramid && param.bframes > 1) ? 2 : !!param.bframes) : i;
+        vps.maxDecPicBuffering[i] = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics[i] + 2, (uint32_t)param.maxNumReferences) + 1);
+    }
 
+    if (!!param.bEnableTemporalSubLayers)
+    {
+        for (int i = 0; i < MAX_T_LAYERS - 1; i++)
+        {
+            // a lower layer can not have higher value of numReorderPics than a higher layer
+            if (vps.numReorderPics[i + 1] < vps.numReorderPics[i])
+            {
+                vps.numReorderPics[i + 1] = vps.numReorderPics[i];
+            }
+            // the value of numReorderPics[i] shall be in the range of 0 to maxDecPicBuffering[i] - 1, inclusive
+            if (vps.numReorderPics[i] > vps.maxDecPicBuffering[i] - 1)
+            {
+                vps.maxDecPicBuffering[i] = vps.numReorderPics[i] + 1;
+            }
+            // a lower layer can not have higher value of maxDecPicBuffering than a higher layer
+            if (vps.maxDecPicBuffering[i + 1] < vps.maxDecPicBuffering[i])
+            {
+                vps.maxDecPicBuffering[i + 1] = vps.maxDecPicBuffering[i];
+            }
+        }
+
+        // the value of numReorderPics[i] shall be in the range of 0 to maxDecPicBuffering[ i ] -  1, inclusive
+        if (vps.numReorderPics[MAX_T_LAYERS - 1] > vps.maxDecPicBuffering[MAX_T_LAYERS - 1] - 1)
+        {
+            vps.maxDecPicBuffering[MAX_T_LAYERS - 1] = vps.numReorderPics[MAX_T_LAYERS - 1] + 1;
+        }
+    }
     /* no level specified by user, just auto-detect from the configuration */
     if (param.levelIdc <= 0)
         return true;
@@ -391,10 +422,10 @@ bool enforceLevel(x265_param& param, VPS& vps)
     }
 
     int savedRefCount = param.maxNumReferences;
-    while (vps.maxDecPicBuffering > maxDpbSize && param.maxNumReferences > 1)
+    while (vps.maxDecPicBuffering[vps.maxTempSubLayers - 1] > maxDpbSize && param.maxNumReferences > 1)
     {
         param.maxNumReferences--;
-        vps.maxDecPicBuffering = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics + 1, (uint32_t)param.maxNumReferences) + 1);
+        vps.maxDecPicBuffering[vps.maxTempSubLayers - 1] = X265_MIN(MAX_NUM_REF, X265_MAX(vps.numReorderPics[vps.maxTempSubLayers - 1] + 1, (uint32_t)param.maxNumReferences) + 1);
     }
     if (param.maxNumReferences != savedRefCount)
         x265_log(&param, X265_LOG_WARNING, "Lowering max references to %d to meet level requirement\n", param.maxNumReferences);
diff --git a/source/encoder/nal.cpp b/source/encoder/nal.cpp
index 45c0db4d7..3e41c5111 100644
--- a/source/encoder/nal.cpp
+++ b/source/encoder/nal.cpp
@@ -57,7 +57,7 @@ void NALList::takeContents(NALList& other)
     other.m_buffer = X265_MALLOC(uint8_t, m_allocSize);
 }
 
-void NALList::serialize(NalUnitType nalUnitType, const Bitstream& bs)
+void NALList::serialize(NalUnitType nalUnitType, const Bitstream& bs, uint8_t temporalID)
 {
     static const char startCodePrefix[] = { 0, 0, 0, 1 };
 
@@ -114,7 +114,7 @@ void NALList::serialize(NalUnitType nalUnitType, const Bitstream& bs)
      * nuh_reserved_zero_6bits  6-bits
      * nuh_temporal_id_plus1    3-bits */
     out[bytes++] = (uint8_t)nalUnitType << 1;
-    out[bytes++] = 1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N);
+    out[bytes++] = temporalID;
 
     /* 7.4.1 ...
      * Within the NAL unit, the following three-byte sequences shall not occur at
diff --git a/source/encoder/nal.h b/source/encoder/nal.h
index 66f7954a7..2443413ce 100644
--- a/source/encoder/nal.h
+++ b/source/encoder/nal.h
@@ -56,7 +56,7 @@ public:
 
     void takeContents(NALList& other);
 
-    void serialize(NalUnitType nalUnitType, const Bitstream& bs);
+    void serialize(NalUnitType nalUnitType, const Bitstream& bs, uint8_t temporalID = 1);
 
     uint32_t serializeSubstreams(uint32_t* streamSizeBytes, uint32_t streamCount, const Bitstream* streams);
 };
diff --git a/source/encoder/ratecontrol.cpp b/source/encoder/ratecontrol.cpp
index b645676c8..090d0a35f 100644
--- a/source/encoder/ratecontrol.cpp
+++ b/source/encoder/ratecontrol.cpp
@@ -905,7 +905,7 @@ void RateControl::initHRD(SPS& sps)
 
     TimingInfo *time = &sps.vuiParameters.timingInfo;
     int maxCpbOutputDelay = (int)(X265_MIN(m_param->keyframeMax * MAX_DURATION * time->timeScale / time->numUnitsInTick, INT_MAX));
-    int maxDpbOutputDelay = (int)(sps.maxDecPicBuffering * MAX_DURATION * time->timeScale / time->numUnitsInTick);
+    int maxDpbOutputDelay = (int)(sps.maxDecPicBuffering[sps.maxTempSubLayers - 1] * MAX_DURATION * time->timeScale / time->numUnitsInTick);
     int maxDelay = (int)(90000.0 * cpbSizeUnscale / bitRateUnscale + 0.5);
 
     hrd->initialCpbRemovalDelayLength = 2 + x265_clip3(4, 22, 32 - calcLength(maxDelay));
diff --git a/source/encoder/sei.cpp b/source/encoder/sei.cpp
index fade1c747..3002d11ad 100644
--- a/source/encoder/sei.cpp
+++ b/source/encoder/sei.cpp
@@ -68,7 +68,7 @@ void SEI::writeSEImessages(Bitstream& bs, const SPS& sps, NalUnitType nalUnitTyp
     {
         if (nalUnitType != NAL_UNIT_UNSPECIFIED)
             bs.writeByteAlignment();
-        list.serialize(nalUnitType, bs);
+        list.serialize(nalUnitType, bs, (1 + (nalUnitType == NAL_UNIT_CODED_SLICE_TSA_N)));
     }
 }
 
-- 
2.28.0.windows.1

