LCOV - code coverage report
Current view: top level - libavcodec - hevc_filter.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 540 547 98.7 %
Date: 2017-12-18 20:14:19 Functions: 16 16 100.0 %

          Line data    Source code
       1             : /*
       2             :  * HEVC video decoder
       3             :  *
       4             :  * Copyright (C) 2012 - 2013 Guillaume Martres
       5             :  * Copyright (C) 2013 Seppo Tomperi
       6             :  * Copyright (C) 2013 Wassim Hamidouche
       7             :  *
       8             :  * This file is part of FFmpeg.
       9             :  *
      10             :  * FFmpeg is free software; you can redistribute it and/or
      11             :  * modify it under the terms of the GNU Lesser General Public
      12             :  * License as published by the Free Software Foundation; either
      13             :  * version 2.1 of the License, or (at your option) any later version.
      14             :  *
      15             :  * FFmpeg is distributed in the hope that it will be useful,
      16             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      18             :  * Lesser General Public License for more details.
      19             :  *
      20             :  * You should have received a copy of the GNU Lesser General Public
      21             :  * License along with FFmpeg; if not, write to the Free Software
      22             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      23             :  */
      24             : 
      25             : #include "libavutil/common.h"
      26             : #include "libavutil/internal.h"
      27             : 
      28             : #include "cabac_functions.h"
      29             : #include "hevcdec.h"
      30             : 
      31             : #include "bit_depth_template.c"
      32             : 
      33             : #define LUMA 0
      34             : #define CB 1
      35             : #define CR 2
      36             : 
      37             : static const uint8_t tctable[54] = {
      38             :     0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0, 1, // QP  0...18
      39             :     1, 1, 1, 1, 1, 1, 1,  1,  2,  2,  2,  2,  3,  3,  3,  3, 4, 4, 4, // QP 19...37
      40             :     5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24           // QP 38...53
      41             : };
      42             : 
      43             : static const uint8_t betatable[52] = {
      44             :      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  8, // QP 0...18
      45             :      9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, // QP 19...37
      46             :     38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64                      // QP 38...51
      47             : };
      48             : 
      49    20528260 : static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
      50             : {
      51             :     static const int qp_c[] = {
      52             :         29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
      53             :     };
      54             :     int qp, qp_i, offset, idxt;
      55             : 
      56             :     // slice qp offset is not used for deblocking
      57    20528260 :     if (c_idx == 1)
      58    10264130 :         offset = s->ps.pps->cb_qp_offset;
      59             :     else
      60    10264130 :         offset = s->ps.pps->cr_qp_offset;
      61             : 
      62    20528260 :     qp_i = av_clip(qp_y + offset, 0, 57);
      63    20528260 :     if (s->ps.sps->chroma_format_idc == 1) {
      64    17683324 :         if (qp_i < 30)
      65     2541198 :             qp = qp_i;
      66    15142126 :         else if (qp_i > 43)
      67      113927 :             qp = qp_i - 6;
      68             :         else
      69    15028199 :             qp = qp_c[qp_i - 30];
      70             :     } else {
      71     2844936 :         qp = av_clip(qp_i, 0, 51);
      72             :     }
      73             : 
      74    20528260 :     idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
      75    20528260 :     return tctable[idxt];
      76             : }
      77             : 
      78     1159487 : static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
      79             : {
      80     1159487 :     HEVCLocalContext *lc     = s->HEVClc;
      81     1159487 :     int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
      82     2318974 :     int MinCuQpDeltaSizeMask = (1 << (s->ps.sps->log2_ctb_size -
      83     1159487 :                                       s->ps.pps->diff_cu_qp_delta_depth)) - 1;
      84     1159487 :     int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
      85     1159487 :     int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
      86     1159487 :     int min_cb_width         = s->ps.sps->min_cb_width;
      87     1159487 :     int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
      88     1159487 :     int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
      89     1965884 :     int availableA           = (xBase   & ctb_size_mask) &&
      90      806397 :                                (xQgBase & ctb_size_mask);
      91     1948601 :     int availableB           = (yBase   & ctb_size_mask) &&
      92      789114 :                                (yQgBase & ctb_size_mask);
      93             :     int qPy_pred, qPy_a, qPy_b;
      94             : 
      95             :     // qPy_pred
      96     1159487 :     if (lc->first_qp_group || (!xQgBase && !yQgBase)) {
      97       71464 :         lc->first_qp_group = !lc->tu.is_cu_qp_delta_coded;
      98       71464 :         qPy_pred = s->sh.slice_qp;
      99             :     } else {
     100     1088023 :         qPy_pred = lc->qPy_pred;
     101             :     }
     102             : 
     103             :     // qPy_a
     104     1159487 :     if (availableA == 0)
     105      486511 :         qPy_a = qPy_pred;
     106             :     else
     107      672976 :         qPy_a = s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width];
     108             : 
     109             :     // qPy_b
     110     1159487 :     if (availableB == 0)
     111      489861 :         qPy_b = qPy_pred;
     112             :     else
     113      669626 :         qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];
     114             : 
     115             :     av_assert2(qPy_a >= -s->ps.sps->qp_bd_offset && qPy_a < 52);
     116             :     av_assert2(qPy_b >= -s->ps.sps->qp_bd_offset && qPy_b < 52);
     117             : 
     118     1159487 :     return (qPy_a + qPy_b + 1) >> 1;
     119             : }
     120             : 
     121     1159487 : void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
     122             : {
     123     1159487 :     int qp_y = get_qPy_pred(s, xBase, yBase, log2_cb_size);
     124             : 
     125     1159487 :     if (s->HEVClc->tu.cu_qp_delta != 0) {
     126      325265 :         int off = s->ps.sps->qp_bd_offset;
     127      975795 :         s->HEVClc->qp_y = FFUMOD(qp_y + s->HEVClc->tu.cu_qp_delta + 52 + 2 * off,
     128      650530 :                                  52 + off) - off;
     129             :     } else
     130      834222 :         s->HEVClc->qp_y = qp_y;
     131     1159487 : }
     132             : 
     133   109894686 : static int get_qPy(HEVCContext *s, int xC, int yC)
     134             : {
     135   109894686 :     int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
     136   109894686 :     int x                 = xC >> log2_min_cb_size;
     137   109894686 :     int y                 = yC >> log2_min_cb_size;
     138   109894686 :     return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
     139             : }
     140             : 
     141      365896 : static void copy_CTB(uint8_t *dst, const uint8_t *src, int width, int height,
     142             :                      ptrdiff_t stride_dst, ptrdiff_t stride_src)
     143             : {
     144             : int i, j;
     145             : 
     146      365896 :     if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
     147     2373109 :         for (i = 0; i < height; i++) {
     148    20821408 :             for (j = 0; j < width; j+=8)
     149    18502024 :                 AV_COPY64U(dst+j, src+j);
     150     2319384 :             dst += stride_dst;
     151     2319384 :             src += stride_src;
     152             :         }
     153             :     } else {
     154    16761003 :         for (i = 0; i < height; i++) {
     155   100259728 :             for (j = 0; j < width; j+=16)
     156    83810896 :                 AV_COPY128(dst+j, src+j);
     157    16448832 :             dst += stride_dst;
     158    16448832 :             src += stride_src;
     159             :         }
     160             :     }
     161      365896 : }
     162             : 
     163     1286298 : static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
     164             : {
     165     1286298 :     if (pixel_shift)
     166      213836 :         *(uint16_t *)dst = *(uint16_t *)src;
     167             :     else
     168     1072462 :         *dst = *src;
     169     1286298 : }
     170             : 
     171     1242642 : static void copy_vert(uint8_t *dst, const uint8_t *src,
     172             :                       int pixel_shift, int height,
     173             :                       ptrdiff_t stride_dst, ptrdiff_t stride_src)
     174             : {
     175             :     int i;
     176     1242642 :     if (pixel_shift == 0) {
     177    51146993 :         for (i = 0; i < height; i++) {
     178    50078032 :             *dst = *src;
     179    50078032 :             dst += stride_dst;
     180    50078032 :             src += stride_src;
     181             :         }
     182             :     } else {
     183    11153345 :         for (i = 0; i < height; i++) {
     184    10979664 :             *(uint16_t *)dst = *(uint16_t *)src;
     185    10979664 :             dst += stride_dst;
     186    10979664 :             src += stride_src;
     187             :         }
     188             :     }
     189     1242642 : }
     190             : 
     191      473400 : static void copy_CTB_to_hv(HEVCContext *s, const uint8_t *src,
     192             :                            ptrdiff_t stride_src, int x, int y, int width, int height,
     193             :                            int c_idx, int x_ctb, int y_ctb)
     194             : {
     195      473400 :     int sh = s->ps.sps->pixel_shift;
     196      473400 :     int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
     197      473400 :     int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
     198             : 
     199             :     /* copy horizontal edges */
     200      473400 :     memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
     201      473400 :         src, width << sh);
     202      946800 :     memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
     203      946800 :         src + stride_src * (height - 1), width << sh);
     204             : 
     205             :     /* copy vertical edges */
     206      473400 :     copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
     207             : 
     208      473400 :     copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
     209      473400 : }
     210             : 
     211      365896 : static void restore_tqb_pixels(HEVCContext *s,
     212             :                                uint8_t *src1, const uint8_t *dst1,
     213             :                                ptrdiff_t stride_src, ptrdiff_t stride_dst,
     214             :                                int x0, int y0, int width, int height, int c_idx)
     215             : {
     216      727503 :     if ( s->ps.pps->transquant_bypass_enable_flag ||
     217      361711 :             (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
     218             :         int x, y;
     219        4393 :         int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
     220        4393 :         int hshift       = s->ps.sps->hshift[c_idx];
     221        4393 :         int vshift       = s->ps.sps->vshift[c_idx];
     222        4393 :         int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
     223        4393 :         int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
     224        4393 :         int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
     225        4393 :         int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
     226        4393 :         int len          = (min_pu_size >> hshift) << s->ps.sps->pixel_shift;
     227       53865 :         for (y = y_min; y < y_max; y++) {
     228      682176 :             for (x = x_min; x < x_max; x++) {
     229      632704 :                 if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
     230             :                     int n;
     231         264 :                     uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
     232         264 :                     const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << s->ps.sps->pixel_shift);
     233         968 :                     for (n = 0; n < (min_pu_size >> vshift); n++) {
     234         704 :                         memcpy(src, dst, len);
     235         704 :                         src += stride_src;
     236         704 :                         dst += stride_dst;
     237             :                     }
     238             :                 }
     239             :             }
     240             :         }
     241             :     }
     242      365896 : }
     243             : 
     244             : #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
     245             : 
     246     1158053 : static void sao_filter_CTB(HEVCContext *s, int x, int y)
     247             : {
     248             :     static const uint8_t sao_tab[8] = { 0, 1, 2, 2, 3, 3, 4, 4 };
     249     1158053 :     HEVCLocalContext *lc = s->HEVClc;
     250             :     int c_idx;
     251             :     int edges[4];  // 0 left 1 top 2 right 3 bottom
     252     1158053 :     int x_ctb                = x >> s->ps.sps->log2_ctb_size;
     253     1158053 :     int y_ctb                = y >> s->ps.sps->log2_ctb_size;
     254     1158053 :     int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
     255     1158053 :     int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
     256     1158053 :     SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
     257             :     // flags indicating unfilterable edges
     258     1158053 :     uint8_t vert_edge[]      = { 0, 0 };
     259     1158053 :     uint8_t horiz_edge[]     = { 0, 0 };
     260     1158053 :     uint8_t diag_edge[]      = { 0, 0, 0, 0 };
     261     1158053 :     uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
     262     1326413 :     uint8_t no_tile_filter   = s->ps.pps->tiles_enabled_flag &&
     263      168360 :                                !s->ps.pps->loop_filter_across_tiles_enabled_flag;
     264     1158053 :     uint8_t restore          = no_tile_filter || !lfase;
     265     1158053 :     uint8_t left_tile_edge   = 0;
     266     1158053 :     uint8_t right_tile_edge  = 0;
     267     1158053 :     uint8_t up_tile_edge     = 0;
     268     1158053 :     uint8_t bottom_tile_edge = 0;
     269             : 
     270     1158053 :     edges[0]   = x_ctb == 0;
     271     1158053 :     edges[1]   = y_ctb == 0;
     272     1158053 :     edges[2]   = x_ctb == s->ps.sps->ctb_width  - 1;
     273     1158053 :     edges[3]   = y_ctb == s->ps.sps->ctb_height - 1;
     274             : 
     275     1158053 :     if (restore) {
     276       48208 :         if (!edges[0]) {
     277       45236 :             left_tile_edge  = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
     278       45236 :             vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
     279             :         }
     280       48208 :         if (!edges[2]) {
     281       45236 :             right_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
     282       45236 :             vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
     283             :         }
     284       48208 :         if (!edges[1]) {
     285       43074 :             up_tile_edge     = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]];
     286       43074 :             horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
     287             :         }
     288       48208 :         if (!edges[3]) {
     289       43074 :             bottom_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->ps.sps->ctb_width]];
     290       43074 :             horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
     291             :         }
     292       48208 :         if (!edges[0] && !edges[1]) {
     293       40668 :             diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
     294             :         }
     295       48208 :         if (!edges[1] && !edges[2]) {
     296       40668 :             diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
     297             :         }
     298       48208 :         if (!edges[2] && !edges[3]) {
     299       40668 :             diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
     300             :         }
     301       48208 :         if (!edges[0] && !edges[3]) {
     302       40668 :             diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
     303             :         }
     304             :     }
     305             : 
     306     4632212 :     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
     307     3474159 :         int x0       = x >> s->ps.sps->hshift[c_idx];
     308     3474159 :         int y0       = y >> s->ps.sps->vshift[c_idx];
     309     3474159 :         ptrdiff_t stride_src = s->frame->linesize[c_idx];
     310     3474159 :         int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->hshift[c_idx];
     311     3474159 :         int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> s->ps.sps->vshift[c_idx];
     312     3474159 :         int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> s->ps.sps->hshift[c_idx]) - x0);
     313     3474159 :         int height   = FFMIN(ctb_size_v, (s->ps.sps->height >> s->ps.sps->vshift[c_idx]) - y0);
     314     3474159 :         int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
     315     3474159 :         uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->ps.sps->pixel_shift)];
     316             :         ptrdiff_t stride_dst;
     317             :         uint8_t *dst;
     318             : 
     319     3474159 :         switch (sao->type_idx[c_idx]) {
     320      108273 :         case SAO_BAND:
     321      108273 :             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
     322             :                            x_ctb, y_ctb);
     323      215785 :             if (s->ps.pps->transquant_bypass_enable_flag ||
     324      107520 :                 (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
     325         769 :             dst = lc->edge_emu_buffer;
     326         769 :             stride_dst = 2*MAX_PB_SIZE;
     327         769 :             copy_CTB(dst, src, width << s->ps.sps->pixel_shift, height, stride_dst, stride_src);
     328        2307 :             s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
     329        1538 :                                             sao->offset_val[c_idx], sao->band_position[c_idx],
     330             :                                             width, height);
     331         769 :             restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
     332             :                                x, y, width, height, c_idx);
     333             :             } else {
     334      322512 :             s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src,
     335      215008 :                                             sao->offset_val[c_idx], sao->band_position[c_idx],
     336             :                                             width, height);
     337             :             }
     338      108273 :             sao->type_idx[c_idx] = SAO_APPLIED;
     339      108273 :             break;
     340      365127 :         case SAO_EDGE:
     341             :         {
     342      365127 :             int w = s->ps.sps->width >> s->ps.sps->hshift[c_idx];
     343      365127 :             int h = s->ps.sps->height >> s->ps.sps->vshift[c_idx];
     344      365127 :             int left_edge = edges[0];
     345      365127 :             int top_edge = edges[1];
     346      365127 :             int right_edge = edges[2];
     347      365127 :             int bottom_edge = edges[3];
     348      365127 :             int sh = s->ps.sps->pixel_shift;
     349             :             int left_pixels, right_pixels;
     350             : 
     351      365127 :             stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
     352      365127 :             dst = lc->edge_emu_buffer + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
     353             : 
     354      365127 :             if (!top_edge) {
     355      339733 :                 int left = 1 - left_edge;
     356      339733 :                 int right = 1 - right_edge;
     357             :                 const uint8_t *src1[2];
     358             :                 uint8_t *dst1;
     359             :                 int src_idx, pos;
     360             : 
     361      339733 :                 dst1 = dst - stride_dst - (left << sh);
     362      339733 :                 src1[0] = src - stride_src - (left << sh);
     363      339733 :                 src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
     364      339733 :                 pos = 0;
     365      339733 :                 if (left) {
     366      323578 :                     src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
     367             :                                SAO_APPLIED);
     368      323578 :                     copy_pixel(dst1, src1[src_idx], sh);
     369      323578 :                     pos += (1 << sh);
     370             :                 }
     371      339733 :                 src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
     372             :                            SAO_APPLIED);
     373      339733 :                 memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
     374      339733 :                 if (right) {
     375      323044 :                     pos += width << sh;
     376      323044 :                     src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
     377             :                                SAO_APPLIED);
     378      323044 :                     copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
     379             :                 }
     380             :             }
     381      365127 :             if (!bottom_edge) {
     382      335863 :                 int left = 1 - left_edge;
     383      335863 :                 int right = 1 - right_edge;
     384             :                 const uint8_t *src1[2];
     385             :                 uint8_t *dst1;
     386             :                 int src_idx, pos;
     387             : 
     388      335863 :                 dst1 = dst + height * stride_dst - (left << sh);
     389      335863 :                 src1[0] = src + height * stride_src - (left << sh);
     390      335863 :                 src1[1] = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
     391      335863 :                 pos = 0;
     392      335863 :                 if (left) {
     393      320273 :                     src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
     394             :                                SAO_APPLIED);
     395      320273 :                     copy_pixel(dst1, src1[src_idx], sh);
     396      320273 :                     pos += (1 << sh);
     397             :                 }
     398      335863 :                 src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
     399             :                            SAO_APPLIED);
     400      335863 :                 memcpy(dst1 + pos, src1[src_idx] + pos, width << sh);
     401      335863 :                 if (right) {
     402      319403 :                     pos += width << sh;
     403      319403 :                     src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
     404             :                                SAO_APPLIED);
     405      319403 :                     copy_pixel(dst1 + pos, src1[src_idx] + pos, sh);
     406             :                 }
     407             :             }
     408      365127 :             left_pixels = 0;
     409      365127 :             if (!left_edge) {
     410      347001 :                 if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
     411      591684 :                     copy_vert(dst - (1 << sh),
     412      295842 :                               s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
     413      295842 :                               sh, height, stride_dst, 1 << sh);
     414             :                 } else {
     415       51159 :                     left_pixels = 1;
     416             :                 }
     417             :             }
     418      365127 :             right_pixels = 0;
     419      365127 :             if (!right_edge) {
     420      346340 :                 if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
     421           0 :                     copy_vert(dst + (width << sh),
     422           0 :                               s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
     423           0 :                               sh, height, stride_dst, 1 << sh);
     424             :                 } else {
     425      346340 :                     right_pixels = 1;
     426             :                 }
     427             :             }
     428             : 
     429      730254 :             copy_CTB(dst - (left_pixels << sh),
     430      365127 :                      src - (left_pixels << sh),
     431      365127 :                      (width + left_pixels + right_pixels) << sh,
     432             :                      height, stride_dst, stride_src);
     433             : 
     434      365127 :             copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
     435             :                            x_ctb, y_ctb);
     436      365127 :             s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx],
     437             :                                             sao->eo_class[c_idx], width, height);
     438      365127 :             s->hevcdsp.sao_edge_restore[restore](src, dst,
     439             :                                                 stride_src, stride_dst,
     440             :                                                 sao,
     441             :                                                 edges, width,
     442             :                                                 height, c_idx,
     443             :                                                 vert_edge,
     444             :                                                 horiz_edge,
     445             :                                                 diag_edge);
     446      365127 :             restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
     447             :                                x, y, width, height, c_idx);
     448      365127 :             sao->type_idx[c_idx] = SAO_APPLIED;
     449      365127 :             break;
     450             :         }
     451             :         }
     452             :     }
     453     1158053 : }
     454             : 
     455     2057768 : static int get_pcm(HEVCContext *s, int x, int y)
     456             : {
     457     2057768 :     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
     458             :     int x_pu, y_pu;
     459             : 
     460     2057768 :     if (x < 0 || y < 0)
     461           0 :         return 2;
     462             : 
     463     2057768 :     x_pu = x >> log2_min_pu_size;
     464     2057768 :     y_pu = y >> log2_min_pu_size;
     465             : 
     466     2057768 :     if (x_pu >= s->ps.sps->min_pu_width || y_pu >= s->ps.sps->min_pu_height)
     467        1408 :         return 2;
     468     2056360 :     return s->is_pcm[y_pu * s->ps.sps->min_pu_width + x_pu];
     469             : }
     470             : 
     471             : #define TC_CALC(qp, bs)                                                 \
     472             :     tctable[av_clip((qp) + DEFAULT_INTRA_TC_OFFSET * ((bs) - 1) +       \
     473             :                     (tc_offset & -2),                                   \
     474             :                     0, MAX_QP + DEFAULT_INTRA_TC_OFFSET)]
     475             : 
     476     1358723 : static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     477             : {
     478             :     uint8_t *src;
     479             :     int x, y;
     480             :     int chroma, beta;
     481             :     int32_t c_tc[2], tc[2];
     482     1358723 :     uint8_t no_p[2] = { 0 };
     483     1358723 :     uint8_t no_q[2] = { 0 };
     484             : 
     485     1358723 :     int log2_ctb_size = s->ps.sps->log2_ctb_size;
     486             :     int x_end, x_end2, y_end;
     487     1358723 :     int ctb_size        = 1 << log2_ctb_size;
     488     2717446 :     int ctb             = (x0 >> log2_ctb_size) +
     489     1358723 :                           (y0 >> log2_ctb_size) * s->ps.sps->ctb_width;
     490     1358723 :     int cur_tc_offset   = s->deblock[ctb].tc_offset;
     491     1358723 :     int cur_beta_offset = s->deblock[ctb].beta_offset;
     492             :     int left_tc_offset, left_beta_offset;
     493             :     int tc_offset, beta_offset;
     494     2804827 :     int pcmf = (s->ps.sps->pcm_enabled_flag &&
     495     2717390 :                 s->ps.sps->pcm.loop_filter_disable_flag) ||
     496     1358667 :                s->ps.pps->transquant_bypass_enable_flag;
     497             : 
     498     1358723 :     if (x0) {
     499     1286870 :         left_tc_offset   = s->deblock[ctb - 1].tc_offset;
     500     1286870 :         left_beta_offset = s->deblock[ctb - 1].beta_offset;
     501             :     } else {
     502       71853 :         left_tc_offset   = 0;
     503       71853 :         left_beta_offset = 0;
     504             :     }
     505             : 
     506     1358723 :     x_end = x0 + ctb_size;
     507     1358723 :     if (x_end > s->ps.sps->width)
     508       25274 :         x_end = s->ps.sps->width;
     509     1358723 :     y_end = y0 + ctb_size;
     510     1358723 :     if (y_end > s->ps.sps->height)
     511       94290 :         y_end = s->ps.sps->height;
     512             : 
     513     1358723 :     tc_offset   = cur_tc_offset;
     514     1358723 :     beta_offset = cur_beta_offset;
     515             : 
     516     1358723 :     x_end2 = x_end;
     517     1358723 :     if (x_end2 != s->ps.sps->width)
     518     1286870 :         x_end2 -= 8;
     519    11165937 :     for (y = y0; y < y_end; y += 8) {
     520             :         // vertical filtering luma
     521    84533297 :         for (x = x0 ? x0 : 8; x < x_end; x += 8) {
     522    74726083 :             const int bs0 = s->vertical_bs[(x +  y      * s->bs_width) >> 2];
     523    74726083 :             const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
     524    74726083 :             if (bs0 || bs1) {
     525    16914736 :                 const int qp = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
     526             : 
     527    16914736 :                 beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
     528             : 
     529    16914736 :                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
     530    16914736 :                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
     531    16914736 :                 src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
     532    16914736 :                 if (pcmf) {
     533      186079 :                     no_p[0] = get_pcm(s, x - 1, y);
     534      186079 :                     no_p[1] = get_pcm(s, x - 1, y + 4);
     535      186079 :                     no_q[0] = get_pcm(s, x, y);
     536      186079 :                     no_q[1] = get_pcm(s, x, y + 4);
     537      372158 :                     s->hevcdsp.hevc_v_loop_filter_luma_c(src,
     538      186079 :                                                          s->frame->linesize[LUMA],
     539             :                                                          beta, tc, no_p, no_q);
     540             :                 } else
     541    33457314 :                     s->hevcdsp.hevc_v_loop_filter_luma(src,
     542    16728657 :                                                        s->frame->linesize[LUMA],
     543             :                                                        beta, tc, no_p, no_q);
     544             :             }
     545             :         }
     546             : 
     547     9807214 :         if(!y)
     548      116772 :              continue;
     549             : 
     550             :         // horizontal filtering luma
     551    84081395 :         for (x = x0 ? x0 - 8 : 0; x < x_end2; x += 8) {
     552    74390953 :             const int bs0 = s->horizontal_bs[( x      + y * s->bs_width) >> 2];
     553    74390953 :             const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
     554    74390953 :             if (bs0 || bs1) {
     555    17244077 :                 const int qp = (get_qPy(s, x, y - 1)     + get_qPy(s, x, y)     + 1) >> 1;
     556             : 
     557    17244077 :                 tc_offset   = x >= x0 ? cur_tc_offset : left_tc_offset;
     558    17244077 :                 beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
     559             : 
     560    17244077 :                 beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
     561    17244077 :                 tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
     562    17244077 :                 tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
     563    17244077 :                 src     = &s->frame->data[LUMA][y * s->frame->linesize[LUMA] + (x << s->ps.sps->pixel_shift)];
     564    17244077 :                 if (pcmf) {
     565      187803 :                     no_p[0] = get_pcm(s, x, y - 1);
     566      187803 :                     no_p[1] = get_pcm(s, x + 4, y - 1);
     567      187803 :                     no_q[0] = get_pcm(s, x, y);
     568      187803 :                     no_q[1] = get_pcm(s, x + 4, y);
     569      375606 :                     s->hevcdsp.hevc_h_loop_filter_luma_c(src,
     570      187803 :                                                          s->frame->linesize[LUMA],
     571             :                                                          beta, tc, no_p, no_q);
     572             :                 } else
     573    34112548 :                     s->hevcdsp.hevc_h_loop_filter_luma(src,
     574    17056274 :                                                        s->frame->linesize[LUMA],
     575             :                                                        beta, tc, no_p, no_q);
     576             :             }
     577             :         }
     578             :     }
     579             : 
     580     1358723 :     if (s->ps.sps->chroma_format_idc) {
     581     4076169 :         for (chroma = 1; chroma <= 2; chroma++) {
     582     2717446 :             int h = 1 << s->ps.sps->hshift[chroma];
     583     2717446 :             int v = 1 << s->ps.sps->vshift[chroma];
     584             : 
     585             :             // vertical filtering chroma
     586    12931068 :             for (y = y0; y < y_end; y += (8 * v)) {
     587    49818298 :                 for (x = x0 ? x0 : 8 * h; x < x_end; x += (8 * h)) {
     588    39604676 :                     const int bs0 = s->vertical_bs[(x +  y            * s->bs_width) >> 2];
     589    39604676 :                     const int bs1 = s->vertical_bs[(x + (y + (4 * v)) * s->bs_width) >> 2];
     590             : 
     591    39604676 :                     if ((bs0 == 2) || (bs1 == 2)) {
     592     5380090 :                         const int qp0 = (get_qPy(s, x - 1, y)           + get_qPy(s, x, y)           + 1) >> 1;
     593     5380090 :                         const int qp1 = (get_qPy(s, x - 1, y + (4 * v)) + get_qPy(s, x, y + (4 * v)) + 1) >> 1;
     594             : 
     595     5380090 :                         c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
     596     5380090 :                         c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
     597     5380090 :                         src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[chroma]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[chroma]) << s->ps.sps->pixel_shift)];
     598     5380090 :                         if (pcmf) {
     599       70838 :                             no_p[0] = get_pcm(s, x - 1, y);
     600       70838 :                             no_p[1] = get_pcm(s, x - 1, y + (4 * v));
     601       70838 :                             no_q[0] = get_pcm(s, x, y);
     602       70838 :                             no_q[1] = get_pcm(s, x, y + (4 * v));
     603      141676 :                             s->hevcdsp.hevc_v_loop_filter_chroma_c(src,
     604       70838 :                                                                    s->frame->linesize[chroma],
     605             :                                                                    c_tc, no_p, no_q);
     606             :                         } else
     607    10618504 :                             s->hevcdsp.hevc_v_loop_filter_chroma(src,
     608     5309252 :                                                                  s->frame->linesize[chroma],
     609             :                                                                  c_tc, no_p, no_q);
     610             :                     }
     611             :                 }
     612             : 
     613    10213622 :                 if(!y)
     614      233544 :                     continue;
     615             : 
     616             :                 // horizontal filtering chroma
     617     9980078 :                 tc_offset = x0 ? left_tc_offset : cur_tc_offset;
     618     9980078 :                 x_end2 = x_end;
     619     9980078 :                 if (x_end != s->ps.sps->width)
     620     9465302 :                     x_end2 = x_end - 8 * h;
     621    49255704 :                 for (x = x0 ? x0 - 8 * h : 0; x < x_end2; x += (8 * h)) {
     622    39275626 :                     const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
     623    39275626 :                     const int bs1 = s->horizontal_bs[((x + 4 * h) + y * s->bs_width) >> 2];
     624    39275626 :                     if ((bs0 == 2) || (bs1 == 2)) {
     625     5148680 :                         const int qp0 = bs0 == 2 ? (get_qPy(s, x,           y - 1) + get_qPy(s, x,           y) + 1) >> 1 : 0;
     626     5148680 :                         const int qp1 = bs1 == 2 ? (get_qPy(s, x + (4 * h), y - 1) + get_qPy(s, x + (4 * h), y) + 1) >> 1 : 0;
     627             : 
     628     5148680 :                         c_tc[0]   = bs0 == 2 ? chroma_tc(s, qp0, chroma, tc_offset)     : 0;
     629     5148680 :                         c_tc[1]   = bs1 == 2 ? chroma_tc(s, qp1, chroma, cur_tc_offset) : 0;
     630     5148680 :                         src       = &s->frame->data[chroma][(y >> s->ps.sps->vshift[1]) * s->frame->linesize[chroma] + ((x >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
     631     5148680 :                         if (pcmf) {
     632       69722 :                             no_p[0] = get_pcm(s, x,           y - 1);
     633       69722 :                             no_p[1] = get_pcm(s, x + (4 * h), y - 1);
     634       69722 :                             no_q[0] = get_pcm(s, x,           y);
     635       69722 :                             no_q[1] = get_pcm(s, x + (4 * h), y);
     636      139444 :                             s->hevcdsp.hevc_h_loop_filter_chroma_c(src,
     637       69722 :                                                                    s->frame->linesize[chroma],
     638             :                                                                    c_tc, no_p, no_q);
     639             :                         } else
     640    10157916 :                             s->hevcdsp.hevc_h_loop_filter_chroma(src,
     641     5078958 :                                                                  s->frame->linesize[chroma],
     642             :                                                                  c_tc, no_p, no_q);
     643             :                     }
     644             :                 }
     645             :             }
     646             :         }
     647             :     }
     648     1358723 : }
     649             : 
     650   223407120 : static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh,
     651             :                              RefPicList *neigh_refPicList)
     652             : {
     653   223407120 :     if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
     654             :         // same L0 and L1
     655   279350525 :         if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]]  &&
     656   157152085 :             s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] &&
     657    18325892 :             neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) {
     658    36014182 :             if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
     659    36027514 :                  FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) &&
     660      733362 :                 (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
     661      164769 :                  FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4))
     662      410243 :                 return 1;
     663             :             else
     664    17777196 :                 return 0;
     665   242975647 :         } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
     666   120638754 :                    neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
     667   235664764 :             if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
     668   230731762 :                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4)
     669     5082715 :                 return 1;
     670             :             else
     671   114979686 :                 return 0;
     672     2978990 :         } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
     673      704498 :                    neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
     674      239945 :             if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
     675      168018 :                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)
     676       74847 :                 return 1;
     677             :             else
     678       77090 :                 return 0;
     679             :         } else {
     680     2122555 :             return 1;
     681             :         }
     682    82882788 :     } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
     683             :         Mv A, B;
     684             :         int ref_A, ref_B;
     685             : 
     686    74245429 :         if (curr->pred_flag & 1) {
     687    62846996 :             A     = curr->mv[0];
     688    62846996 :             ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]];
     689             :         } else {
     690    11398433 :             A     = curr->mv[1];
     691    11398433 :             ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]];
     692             :         }
     693             : 
     694    74245429 :         if (neigh->pred_flag & 1) {
     695    62861935 :             B     = neigh->mv[0];
     696    62861935 :             ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]];
     697             :         } else {
     698    11383494 :             B     = neigh->mv[1];
     699    11383494 :             ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]];
     700             :         }
     701             : 
     702    74245429 :         if (ref_A == ref_B) {
     703    70570021 :             if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4)
     704     2432371 :                 return 1;
     705             :             else
     706    68137650 :                 return 0;
     707             :         } else
     708     3675408 :             return 1;
     709             :     }
     710             : 
     711     8637359 :     return 1;
     712             : }
     713             : 
     714    19281673 : void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
     715             :                                            int log2_trafo_size)
     716             : {
     717    19281673 :     HEVCLocalContext *lc = s->HEVClc;
     718    19281673 :     MvField *tab_mvf     = s->ref->tab_mvf;
     719    19281673 :     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
     720    19281673 :     int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
     721    19281673 :     int min_pu_width     = s->ps.sps->min_pu_width;
     722    19281673 :     int min_tu_width     = s->ps.sps->min_tb_width;
     723    57845019 :     int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
     724    38563346 :                            (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
     725             :     int boundary_upper, boundary_left;
     726             :     int i, j, bs;
     727             : 
     728    19281673 :     boundary_upper = y0 > 0 && !(y0 & 7);
     729    34209999 :     if (boundary_upper &&
     730    16730698 :         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
     731     2296713 :           lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
     732    15361567 :           (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
     733    15296937 :          (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
     734      543964 :           lc->boundary_flags & BOUNDARY_UPPER_TILE &&
     735      114253 :           (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
     736       80989 :         boundary_upper = 0;
     737             : 
     738    19281673 :     if (boundary_upper) {
     739    29694674 :         RefPicList *rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ?
     740    27611114 :                               ff_hevc_get_ref_list(s, s->ref, x0, y0 - 1) :
     741    12763777 :                               s->ref->refPicList;
     742    14847337 :         int yp_pu = (y0 - 1) >> log2_min_pu_size;
     743    14847337 :         int yq_pu =  y0      >> log2_min_pu_size;
     744    14847337 :         int yp_tu = (y0 - 1) >> log2_min_tu_size;
     745    14847337 :         int yq_tu =  y0      >> log2_min_tu_size;
     746             : 
     747    61223987 :             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
     748    46376650 :                 int x_pu = (x0 + i) >> log2_min_pu_size;
     749    46376650 :                 int x_tu = (x0 + i) >> log2_min_tu_size;
     750    46376650 :                 MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
     751    46376650 :                 MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
     752    46376650 :                 uint8_t top_cbf_luma  = s->cbf_luma[yp_tu * min_tu_width + x_tu];
     753    46376650 :                 uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu];
     754             : 
     755    46376650 :                 if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA)
     756    13311290 :                     bs = 2;
     757    33065360 :                 else if (curr_cbf_luma || top_cbf_luma)
     758     9362679 :                     bs = 1;
     759             :                 else
     760    23702681 :                     bs = boundary_strength(s, curr, top, rpl_top);
     761    46376650 :                 s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs;
     762             :             }
     763             :     }
     764             : 
     765             :     // bs for vertical TU boundaries
     766    19281673 :     boundary_left = x0 > 0 && !(x0 & 7);
     767    34307112 :     if (boundary_left &&
     768    16847177 :         ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
     769     1890190 :           lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
     770    15085240 :           (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
     771    15458231 :          (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
     772      501343 :           lc->boundary_flags & BOUNDARY_LEFT_TILE &&
     773       59900 :           (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
     774       22259 :         boundary_left = 0;
     775             : 
     776    19281673 :     if (boundary_left) {
     777    30006360 :         RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ?
     778    29762115 :                                ff_hevc_get_ref_list(s, s->ref, x0 - 1, y0) :
     779    14758935 :                                s->ref->refPicList;
     780    15003180 :         int xp_pu = (x0 - 1) >> log2_min_pu_size;
     781    15003180 :         int xq_pu =  x0      >> log2_min_pu_size;
     782    15003180 :         int xp_tu = (x0 - 1) >> log2_min_tu_size;
     783    15003180 :         int xq_tu =  x0      >> log2_min_tu_size;
     784             : 
     785    62323820 :             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
     786    47320640 :                 int y_pu      = (y0 + i) >> log2_min_pu_size;
     787    47320640 :                 int y_tu      = (y0 + i) >> log2_min_tu_size;
     788    47320640 :                 MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
     789    47320640 :                 MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
     790    47320640 :                 uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
     791    47320640 :                 uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
     792             : 
     793    47320640 :                 if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA)
     794    13518982 :                     bs = 2;
     795    33801658 :                 else if (curr_cbf_luma || left_cbf_luma)
     796     9462043 :                     bs = 1;
     797             :                 else
     798    24339615 :                     bs = boundary_strength(s, curr, left, rpl_left);
     799    47320640 :                 s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs;
     800             :             }
     801             :     }
     802             : 
     803    19281673 :     if (log2_trafo_size > log2_min_pu_size && !is_intra) {
     804     7187040 :         RefPicList *rpl = s->ref->refPicList;
     805             : 
     806             :         // bs for TU internal horizontal PU boundaries
     807    16488350 :         for (j = 8; j < (1 << log2_trafo_size); j += 8) {
     808     9301310 :             int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
     809     9301310 :             int yq_pu = (y0 + j)     >> log2_min_pu_size;
     810             : 
     811    96983722 :             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
     812    87682412 :                 int x_pu = (x0 + i) >> log2_min_pu_size;
     813    87682412 :                 MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
     814    87682412 :                 MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
     815             : 
     816    87682412 :                 bs = boundary_strength(s, curr, top, rpl);
     817    87682412 :                 s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
     818             :             }
     819             :         }
     820             : 
     821             :         // bs for TU internal vertical PU boundaries
     822    40163740 :         for (j = 0; j < (1 << log2_trafo_size); j += 4) {
     823    32976700 :             int y_pu = (y0 + j) >> log2_min_pu_size;
     824             : 
     825   120659112 :             for (i = 8; i < (1 << log2_trafo_size); i += 8) {
     826    87682412 :                 int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
     827    87682412 :                 int xq_pu = (x0 + i)     >> log2_min_pu_size;
     828    87682412 :                 MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
     829    87682412 :                 MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
     830             : 
     831    87682412 :                 bs = boundary_strength(s, curr, left, rpl);
     832    87682412 :                 s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
     833             :             }
     834             :         }
     835             :     }
     836    19281673 : }
     837             : 
     838             : #undef LUMA
     839             : #undef CB
     840             : #undef CR
     841             : 
     842     1360763 : void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
     843             : {
     844     1360763 :     int x_end = x >= s->ps.sps->width  - ctb_size;
     845     1360763 :     int skip = 0;
     846     2721526 :     if (s->avctx->skip_loop_filter >= AVDISCARD_ALL ||
     847     2722546 :         (s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && !IS_IDR(s)) ||
     848     1359743 :         (s->avctx->skip_loop_filter >= AVDISCARD_NONINTRA &&
     849     1359743 :          s->sh.slice_type != HEVC_SLICE_I) ||
     850     1359743 :         (s->avctx->skip_loop_filter >= AVDISCARD_BIDIR &&
     851     1359743 :          s->sh.slice_type == HEVC_SLICE_B) ||
     852     1359743 :         (s->avctx->skip_loop_filter >= AVDISCARD_NONREF &&
     853        1020 :         ff_hevc_nal_is_nonref(s->nal_unit_type)))
     854        2040 :         skip = 1;
     855             : 
     856     1360763 :     if (!skip)
     857     1358723 :         deblocking_filter_CTB(s, x, y);
     858     2518816 :     if (s->ps.sps->sao_enabled && !skip) {
     859     1158053 :         int y_end = y >= s->ps.sps->height - ctb_size;
     860     1158053 :         if (y && x)
     861     1002063 :             sao_filter_CTB(s, x - ctb_size, y - ctb_size);
     862     1158053 :         if (x && y_end)
     863       92978 :             sao_filter_CTB(s, x - ctb_size, y);
     864     1158053 :         if (y && x_end) {
     865       54375 :             sao_filter_CTB(s, x, y - ctb_size);
     866       54375 :             if (s->threads_type & FF_THREAD_FRAME )
     867           0 :                 ff_thread_report_progress(&s->ref->tf, y, 0);
     868             :         }
     869     1158053 :         if (x_end && y_end) {
     870        8637 :             sao_filter_CTB(s, x , y);
     871        8637 :             if (s->threads_type & FF_THREAD_FRAME )
     872           0 :                 ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0);
     873             :         }
     874      202710 :     } else if (s->threads_type & FF_THREAD_FRAME && x_end)
     875           0 :         ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0);
     876     1360763 : }
     877             : 
     878     1360763 : void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size)
     879             : {
     880     1360763 :     int x_end = x_ctb >= s->ps.sps->width  - ctb_size;
     881     1360763 :     int y_end = y_ctb >= s->ps.sps->height - ctb_size;
     882     1360763 :     if (y_ctb && x_ctb)
     883     1181389 :         ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
     884     1360763 :     if (y_ctb && x_end)
     885       62482 :         ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size);
     886     1360763 :     if (x_ctb && y_end)
     887      107453 :         ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size);
     888     1360763 : }

Generated by: LCOV version 1.13