LCOV - code coverage report
Current view: top level - libavcodec - hevcpred_template.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 313 319 98.1 %
Date: 2017-12-11 04:34:20 Functions: 48 64 75.0 %

          Line data    Source code
       1             : /*
       2             :  * HEVC video decoder
       3             :  *
       4             :  * Copyright (C) 2012 - 2013 Guillaume Martres
       5             :  *
       6             :  * This file is part of FFmpeg.
       7             :  *
       8             :  * FFmpeg is free software; you can redistribute it and/or
       9             :  * modify it under the terms of the GNU Lesser General Public
      10             :  * License as published by the Free Software Foundation; either
      11             :  * version 2.1 of the License, or (at your option) any later version.
      12             :  *
      13             :  * FFmpeg is distributed in the hope that it will be useful,
      14             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      16             :  * Lesser General Public License for more details.
      17             :  *
      18             :  * You should have received a copy of the GNU Lesser General Public
      19             :  * License along with FFmpeg; if not, write to the Free Software
      20             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      21             :  */
      22             : 
      23             : #include "libavutil/pixdesc.h"
      24             : 
      25             : #include "bit_depth_template.c"
      26             : #include "hevcpred.h"
      27             : 
      28             : #define POS(x, y) src[(x) + stride * (y)]
      29             : 
      30    19889395 : static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
      31             :                                               int log2_size, int c_idx)
      32             : {
      33             : #define PU(x) \
      34             :     ((x) >> s->ps.sps->log2_min_pu_size)
      35             : #define MVF(x, y) \
      36             :     (s->ref->tab_mvf[(x) + (y) * min_pu_width])
      37             : #define MVF_PU(x, y) \
      38             :     MVF(PU(x0 + ((x) * (1 << hshift))), PU(y0 + ((y) * (1 << vshift))))
      39             : #define IS_INTRA(x, y) \
      40             :     (MVF_PU(x, y).pred_flag == PF_INTRA)
      41             : #define MIN_TB_ADDR_ZS(x, y) \
      42             :     s->ps.pps->min_tb_addr_zs[(y) * (s->ps.sps->tb_mask+2) + (x)]
      43             : #define EXTEND(ptr, val, len)         \
      44             : do {                                  \
      45             :     pixel4 pix = PIXEL_SPLAT_X4(val); \
      46             :     for (i = 0; i < (len); i += 4)    \
      47             :         AV_WN4P(ptr + i, pix);        \
      48             : } while (0)
      49             : 
      50             : #define EXTEND_RIGHT_CIP(ptr, start, length)                                   \
      51             :         for (i = start; i < (start) + (length); i += 4)                        \
      52             :             if (!IS_INTRA(i, -1))                                              \
      53             :                 AV_WN4P(&ptr[i], a);                                           \
      54             :             else                                                               \
      55             :                 a = PIXEL_SPLAT_X4(ptr[i+3])
      56             : #define EXTEND_LEFT_CIP(ptr, start, length) \
      57             :         for (i = start; i > (start) - (length); i--) \
      58             :             if (!IS_INTRA(i - 1, -1)) \
      59             :                 ptr[i - 1] = ptr[i]
      60             : #define EXTEND_UP_CIP(ptr, start, length)                                      \
      61             :         for (i = (start); i > (start) - (length); i -= 4)                      \
      62             :             if (!IS_INTRA(-1, i - 3))                                          \
      63             :                 AV_WN4P(&ptr[i - 3], a);                                       \
      64             :             else                                                               \
      65             :                 a = PIXEL_SPLAT_X4(ptr[i - 3])
      66             : #define EXTEND_DOWN_CIP(ptr, start, length)                                    \
      67             :         for (i = start; i < (start) + (length); i += 4)                        \
      68             :             if (!IS_INTRA(-1, i))                                              \
      69             :                 AV_WN4P(&ptr[i], a);                                           \
      70             :             else                                                               \
      71             :                 a = PIXEL_SPLAT_X4(ptr[i + 3])
      72             : 
      73    19889395 :     HEVCLocalContext *lc = s->HEVClc;
      74             :     int i;
      75    19889395 :     int hshift = s->ps.sps->hshift[c_idx];
      76    19889395 :     int vshift = s->ps.sps->vshift[c_idx];
      77    19889395 :     int size = (1 << log2_size);
      78    19889395 :     int size_in_luma_h = size << hshift;
      79    19889395 :     int size_in_tbs_h  = size_in_luma_h >> s->ps.sps->log2_min_tb_size;
      80    19889395 :     int size_in_luma_v = size << vshift;
      81    19889395 :     int size_in_tbs_v  = size_in_luma_v >> s->ps.sps->log2_min_tb_size;
      82    19889395 :     int x = x0 >> hshift;
      83    19889395 :     int y = y0 >> vshift;
      84    19889395 :     int x_tb = (x0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
      85    19889395 :     int y_tb = (y0 >> s->ps.sps->log2_min_tb_size) & s->ps.sps->tb_mask;
      86             : 
      87    19889395 :     int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
      88             : 
      89    19889395 :     ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
      90    19889395 :     pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;
      91             : 
      92    19889395 :     int min_pu_width = s->ps.sps->min_pu_width;
      93             : 
      94    29238938 :     enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
      95     9349543 :                               lc->tu.intra_pred_mode;
      96             :     pixel4 a;
      97             :     pixel  left_array[2 * MAX_TB_SIZE + 1];
      98             :     pixel  filtered_left_array[2 * MAX_TB_SIZE + 1];
      99             :     pixel  top_array[2 * MAX_TB_SIZE + 1];
     100             :     pixel  filtered_top_array[2 * MAX_TB_SIZE + 1];
     101             : 
     102    19889395 :     pixel  *left          = left_array + 1;
     103    19889395 :     pixel  *top           = top_array  + 1;
     104    19889395 :     pixel  *filtered_left = filtered_left_array + 1;
     105    19889395 :     pixel  *filtered_top  = filtered_top_array  + 1;
     106    19889395 :     int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->ps.sps->tb_mask);
     107    19889395 :     int cand_left        = lc->na.cand_left;
     108    19889395 :     int cand_up_left     = lc->na.cand_up_left;
     109    19889395 :     int cand_up          = lc->na.cand_up;
     110    19889395 :     int cand_up_right    = lc->na.cand_up_right    && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->ps.sps->tb_mask, y_tb - 1);
     111             : 
     112    39778790 :     int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->ps.sps->height) -
     113    19889395 :                            (y0 + size_in_luma_v)) >> vshift;
     114    39778790 :     int top_right_size   = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) -
     115    19889395 :                            (x0 + size_in_luma_h)) >> hshift;
     116             : 
     117    19889395 :     if (s->ps.pps->constrained_intra_pred_flag == 1) {
     118      135525 :         int size_in_luma_pu_v = PU(size_in_luma_v);
     119      135525 :         int size_in_luma_pu_h = PU(size_in_luma_h);
     120      135525 :         int on_pu_edge_x    = !av_mod_uintp2(x0, s->ps.sps->log2_min_pu_size);
     121      135525 :         int on_pu_edge_y    = !av_mod_uintp2(y0, s->ps.sps->log2_min_pu_size);
     122      135525 :         if (!size_in_luma_pu_h)
     123           0 :             size_in_luma_pu_h++;
     124      135525 :         if (cand_bottom_left == 1 && on_pu_edge_x) {
     125       42080 :             int x_left_pu   = PU(x0 - 1);
     126       42080 :             int y_bottom_pu = PU(y0 + size_in_luma_v);
     127       42080 :             int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_bottom_pu);
     128       42080 :             cand_bottom_left = 0;
     129       89902 :             for (i = 0; i < max; i += 2)
     130       47822 :                 cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
     131             :         }
     132      135525 :         if (cand_left == 1 && on_pu_edge_x) {
     133      133107 :             int x_left_pu   = PU(x0 - 1);
     134      133107 :             int y_left_pu   = PU(y0);
     135      133107 :             int max = FFMIN(size_in_luma_pu_v, s->ps.sps->min_pu_height - y_left_pu);
     136      133107 :             cand_left = 0;
     137      287436 :             for (i = 0; i < max; i += 2)
     138      154329 :                 cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
     139             :         }
     140      135525 :         if (cand_up_left == 1) {
     141      129084 :             int x_left_pu   = PU(x0 - 1);
     142      129084 :             int y_top_pu    = PU(y0 - 1);
     143      129084 :             cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
     144             :         }
     145      135525 :         if (cand_up == 1 && on_pu_edge_y) {
     146      131403 :             int x_top_pu    = PU(x0);
     147      131403 :             int y_top_pu    = PU(y0 - 1);
     148      131403 :             int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_top_pu);
     149      131403 :             cand_up = 0;
     150      282852 :             for (i = 0; i < max; i += 2)
     151      151449 :                 cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
     152             :         }
     153      135525 :         if (cand_up_right == 1 && on_pu_edge_y) {
     154       87193 :             int y_top_pu    = PU(y0 - 1);
     155       87193 :             int x_right_pu  = PU(x0 + size_in_luma_h);
     156       87193 :             int max = FFMIN(size_in_luma_pu_h, s->ps.sps->min_pu_width - x_right_pu);
     157       87193 :             cand_up_right = 0;
     158      187988 :             for (i = 0; i < max; i += 2)
     159      100795 :                 cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
     160             :         }
     161      135525 :         memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel));
     162      135525 :         memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel));
     163      135525 :         top[-1] = 128;
     164             :     }
     165    19889395 :     if (cand_up_left) {
     166    19297347 :         left[-1] = POS(-1, -1);
     167    19297347 :         top[-1]  = left[-1];
     168             :     }
     169    19889395 :     if (cand_up)
     170    19522292 :         memcpy(top, src - stride, size * sizeof(pixel));
     171    19889395 :     if (cand_up_right) {
     172    12739818 :         memcpy(top + size, src - stride + size, size * sizeof(pixel));
     173    12739818 :         EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
     174             :                size - top_right_size);
     175             :     }
     176    19889395 :     if (cand_left)
     177   137203867 :         for (i = 0; i < size; i++)
     178   117551288 :             left[i] = POS(-1, i);
     179    19889395 :     if (cand_bottom_left) {
     180    43985418 :         for (i = size; i < size + bottom_left_size; i++)
     181    37452900 :             left[i] = POS(-1, i);
     182     6532518 :         EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
     183             :                size - bottom_left_size);
     184             :     }
     185             : 
     186    19889395 :     if (s->ps.pps->constrained_intra_pred_flag == 1) {
     187      135525 :         if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
     188      267702 :             int size_max_x = x0 + ((2 * size) << hshift) < s->ps.sps->width ?
     189      133851 :                                     2 * size : (s->ps.sps->width - x0) >> hshift;
     190      267702 :             int size_max_y = y0 + ((2 * size) << vshift) < s->ps.sps->height ?
     191      133851 :                                     2 * size : (s->ps.sps->height - y0) >> vshift;
     192      133851 :             int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
     193      133851 :             if (!cand_up_right) {
     194      104394 :                 size_max_x = x0 + ((size) << hshift) < s->ps.sps->width ?
     195       52197 :                                                     size : (s->ps.sps->width - x0) >> hshift;
     196             :             }
     197      133851 :             if (!cand_bottom_left) {
     198      189324 :                 size_max_y = y0 + (( size) << vshift) < s->ps.sps->height ?
     199       94662 :                                                      size : (s->ps.sps->height - y0) >> vshift;
     200             :             }
     201      133851 :             if (cand_bottom_left || cand_left || cand_up_left) {
     202      264764 :                 while (j > -1 && !IS_INTRA(-1, j))
     203        8640 :                     j--;
     204      256124 :                 if (!IS_INTRA(-1, j)) {
     205           0 :                     j = 0;
     206           0 :                     while (j < size_max_x && !IS_INTRA(j, -1))
     207           0 :                         j++;
     208           0 :                     EXTEND_LEFT_CIP(top, j, j + 1);
     209           0 :                     left[-1] = top[-1];
     210             :                 }
     211             :             } else {
     212        5789 :                 j = 0;
     213       15218 :                 while (j < size_max_x && !IS_INTRA(j, -1))
     214        3640 :                     j++;
     215        5789 :                 if (j > 0)
     216         409 :                     if (x0 > 0) {
     217         380 :                         EXTEND_LEFT_CIP(top, j, j + 1);
     218             :                     } else {
     219          29 :                         EXTEND_LEFT_CIP(top, j, j);
     220          29 :                         top[-1] = top[0];
     221             :                     }
     222        5789 :                 left[-1] = top[-1];
     223             :             }
     224      133851 :             left[-1] = top[-1];
     225      133851 :             if (cand_bottom_left || cand_left) {
     226      126898 :                 a = PIXEL_SPLAT_X4(left[-1]);
     227      126898 :                 EXTEND_DOWN_CIP(left, 0, size_max_y);
     228             :             }
     229      133851 :             if (!cand_left)
     230        7301 :                 EXTEND(left, left[-1], size);
     231      133851 :             if (!cand_bottom_left)
     232       94662 :                 EXTEND(left + size, left[size - 1], size);
     233      133851 :             if (x0 != 0 && y0 != 0) {
     234      130185 :                 a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
     235      130185 :                 EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
     236      260370 :                 if (!IS_INTRA(-1, - 1))
     237        7426 :                     left[-1] = left[0];
     238        3666 :             } else if (x0 == 0) {
     239        1552 :                 EXTEND(left, 0, size_max_y);
     240             :             } else {
     241        2114 :                 a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
     242        2114 :                 EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
     243             :             }
     244      133851 :             top[-1] = left[-1];
     245      133851 :             if (y0 != 0) {
     246      131737 :                 a = PIXEL_SPLAT_X4(left[-1]);
     247      131737 :                 EXTEND_RIGHT_CIP(top, 0, size_max_x);
     248             :             }
     249             :         }
     250             :     }
     251             :     // Infer the unavailable samples
     252    19889395 :     if (!cand_bottom_left) {
     253    13356877 :         if (cand_left) {
     254    13120409 :             EXTEND(left + size, left[size - 1], size);
     255      236468 :         } else if (cand_up_left) {
     256        1164 :             EXTEND(left, left[-1], 2 * size);
     257        1164 :             cand_left = 1;
     258      235304 :         } else if (cand_up) {
     259      224947 :             left[-1] = top[0];
     260      224947 :             EXTEND(left, left[-1], 2 * size);
     261      224947 :             cand_up_left = 1;
     262      224947 :             cand_left    = 1;
     263       10357 :         } else if (cand_up_right) {
     264         340 :             EXTEND(top, top[size], size);
     265         340 :             left[-1] = top[size];
     266         340 :             EXTEND(left, left[-1], 2 * size);
     267         340 :             cand_up      = 1;
     268         340 :             cand_up_left = 1;
     269         340 :             cand_left    = 1;
     270             :         } else { // No samples available
     271       10017 :             left[-1] = (1 << (BIT_DEPTH - 1));
     272       10017 :             EXTEND(top,  left[-1], 2 * size);
     273       10017 :             EXTEND(left, left[-1], 2 * size);
     274             :         }
     275             :     }
     276             : 
     277    19889395 :     if (!cand_left)
     278       10365 :         EXTEND(left, left[size], size);
     279    19889395 :     if (!cand_up_left) {
     280      366761 :         left[-1] = left[0];
     281             :     }
     282    19889395 :     if (!cand_up)
     283      366763 :         EXTEND(top, left[-1], size);
     284    19889395 :     if (!cand_up_right)
     285     7149577 :         EXTEND(top + size, top[size - 1], size);
     286             : 
     287    19889395 :     top[-1] = left[-1];
     288             : 
     289             :     // Filtering process
     290    19889395 :     if (!s->ps.sps->intra_smoothing_disabled_flag && (c_idx == 0  || s->ps.sps->chroma_format_idc == 3)) {
     291     9562278 :         if (mode != INTRA_DC && size != 4){
     292     3039639 :             int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
     293     3039639 :             int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
     294             :                                           FFABS((int)(mode - 10U)));
     295     3039639 :             if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
     296     1224391 :                 int threshold = 1 << (BIT_DEPTH - 5);
     297     1224391 :                 if (s->ps.sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
     298      139757 :                     log2_size == 5 &&
     299      193224 :                     FFABS(top[-1]  + top[63]  - 2 * top[31])  < threshold &&
     300       53467 :                     FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
     301             :                     // We can't just overwrite values in top because it could be
     302             :                     // a pointer into src
     303       28886 :                     filtered_top[-1] = top[-1];
     304       28886 :                     filtered_top[63] = top[63];
     305     1848704 :                     for (i = 0; i < 63; i++)
     306     5459454 :                         filtered_top[i] = ((64 - (i + 1)) * top[-1] +
     307     3639636 :                                            (i + 1)  * top[63] + 32) >> 6;
     308     1848704 :                     for (i = 0; i < 63; i++)
     309     5459454 :                         left[i] = ((64 - (i + 1)) * left[-1] +
     310     3639636 :                                    (i + 1)  * left[63] + 32) >> 6;
     311       28886 :                     top = filtered_top;
     312             :                 } else {
     313     1195505 :                     filtered_left[2 * size - 1] = left[2 * size - 1];
     314     1195505 :                     filtered_top[2 * size - 1]  = top[2 * size - 1];
     315    33748832 :                     for (i = 2 * size - 2; i >= 0; i--)
     316    97659981 :                         filtered_left[i] = (left[i + 1] + 2 * left[i] +
     317    65106654 :                                             left[i - 1] + 2) >> 2;
     318     2391010 :                     filtered_top[-1]  =
     319     2391010 :                     filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
     320    33748832 :                     for (i = 2 * size - 2; i >= 0; i--)
     321    97659981 :                         filtered_top[i] = (top[i + 1] + 2 * top[i] +
     322    65106654 :                                            top[i - 1] + 2) >> 2;
     323     1195505 :                     left = filtered_left;
     324     1195505 :                     top  = filtered_top;
     325             :                 }
     326             :             }
     327             :         }
     328             :     }
     329             : 
     330    19889395 :     switch (mode) {
     331     4109971 :     case INTRA_PLANAR:
     332     4109971 :         s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
     333             :                                           (uint8_t *)left, stride);
     334     4109971 :         break;
     335     2427411 :     case INTRA_DC:
     336     2427411 :         s->hpc.pred_dc((uint8_t *)src, (uint8_t *)top,
     337             :                        (uint8_t *)left, stride, log2_size, c_idx);
     338     2427411 :         break;
     339    13352013 :     default:
     340    13352013 :         s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top,
     341             :                                            (uint8_t *)left, stride, c_idx,
     342             :                                            mode);
     343    13352013 :         break;
     344             :     }
     345    19889395 : }
     346             : 
     347             : #define INTRA_PRED(size)                                                            \
     348             : static void FUNC(intra_pred_ ## size)(HEVCContext *s, int x0, int y0, int c_idx)    \
     349             : {                                                                                   \
     350             :     FUNC(intra_pred)(s, x0, y0, size, c_idx);                                       \
     351             : }
     352             : 
     353    14166370 : INTRA_PRED(2)
     354     4097750 : INTRA_PRED(3)
     355     1357110 : INTRA_PRED(4)
     356      268165 : INTRA_PRED(5)
     357             : 
     358             : #undef INTRA_PRED
     359             : 
     360     4109971 : static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
     361             :                                   const uint8_t *_left, ptrdiff_t stride,
     362             :                                   int trafo_size)
     363             : {
     364             :     int x, y;
     365     4109971 :     pixel *src        = (pixel *)_src;
     366     4109971 :     const pixel *top  = (const pixel *)_top;
     367     4109971 :     const pixel *left = (const pixel *)_left;
     368     4109971 :     int size = 1 << trafo_size;
     369    30297771 :     for (y = 0; y < size; y++)
     370   292440024 :         for (x = 0; x < size; x++)
     371   798756672 :             POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size]  +
     372   532504448 :                          (size - 1 - y) * top[x]  + (y + 1) * left[size] + size) >> (trafo_size + 1);
     373     4109971 : }
     374             : 
     375             : #define PRED_PLANAR(size)\
     376             : static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top,        \
     377             :                                        const uint8_t *left, ptrdiff_t stride)   \
     378             : {                                                                               \
     379             :     FUNC(pred_planar)(src, top, left, stride, size + 2);                        \
     380             : }
     381             : 
     382     2809760 : PRED_PLANAR(0)
     383      882623 : PRED_PLANAR(1)
     384      342190 : PRED_PLANAR(2)
     385       75398 : PRED_PLANAR(3)
     386             : 
     387             : #undef PRED_PLANAR
     388             : 
     389     2427411 : static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
     390             :                           const uint8_t *_left,
     391             :                           ptrdiff_t stride, int log2_size, int c_idx)
     392             : {
     393             :     int i, j, x, y;
     394     2427411 :     int size          = (1 << log2_size);
     395     2427411 :     pixel *src        = (pixel *)_src;
     396     2427411 :     const pixel *top  = (const pixel *)_top;
     397     2427411 :     const pixel *left = (const pixel *)_left;
     398     2427411 :     int dc            = size;
     399             :     pixel4 a;
     400    19159659 :     for (i = 0; i < size; i++)
     401    16732248 :         dc += left[i] + top[i];
     402             : 
     403     2427411 :     dc >>= log2_size + 1;
     404             : 
     405     2427411 :     a = PIXEL_SPLAT_X4(dc);
     406             : 
     407    19159659 :     for (i = 0; i < size; i++)
     408    63737928 :         for (j = 0; j < size; j+=4)
     409    47005680 :             AV_WN4P(&POS(j, i), a);
     410             : 
     411     2427411 :     if (c_idx == 0 && size < 32) {
     412      870231 :         POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
     413     5738272 :         for (x = 1; x < size; x++)
     414     4868041 :             POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
     415     5738272 :         for (y = 1; y < size; y++)
     416     4868041 :             POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
     417             :     }
     418     2427411 : }
     419             : 
     420    13352013 : static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
     421             :                                                 const uint8_t *_top,
     422             :                                                 const uint8_t *_left,
     423             :                                                 ptrdiff_t stride, int c_idx,
     424             :                                                 int mode, int size)
     425             : {
     426             :     int x, y;
     427    13352013 :     pixel *src        = (pixel *)_src;
     428    13352013 :     const pixel *top  = (const pixel *)_top;
     429    13352013 :     const pixel *left = (const pixel *)_left;
     430             : 
     431             :     static const int intra_pred_angle[] = {
     432             :          32,  26,  21,  17, 13,  9,  5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
     433             :         -26, -21, -17, -13, -9, -5, -2, 0, 2,  5,  9, 13,  17,  21,  26,  32
     434             :     };
     435             :     static const int inv_angle[] = {
     436             :         -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
     437             :         -630, -910, -1638, -4096
     438             :     };
     439             : 
     440    13352013 :     int angle = intra_pred_angle[mode - 2];
     441             :     pixel ref_array[3 * MAX_TB_SIZE + 4];
     442    13352013 :     pixel *ref_tmp = ref_array + size;
     443             :     const pixel *ref;
     444    13352013 :     int last = (size * angle) >> 5;
     445             : 
     446    13352013 :     if (mode >= 18) {
     447     7269177 :         ref = top - 1;
     448     7269177 :         if (angle < 0 && last < -1) {
     449     6196558 :             for (x = 0; x <= size; x += 4)
     450     4412037 :                 AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1]));
     451     8298165 :             for (x = last; x <= -1; x++)
     452     6513644 :                 ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
     453     1784521 :             ref = ref_tmp;
     454             :         }
     455             : 
     456    48712457 :         for (y = 0; y < size; y++) {
     457    41443280 :             int idx  = ((y + 1) * angle) >> 5;
     458    41443280 :             int fact = ((y + 1) * angle) & 31;
     459    41443280 :             if (fact) {
     460    75744550 :                 for (x = 0; x < size; x += 4) {
     461   152580084 :                     POS(x    , y) = ((32 - fact) * ref[x + idx + 1] +
     462   101720056 :                                            fact  * ref[x + idx + 2] + 16) >> 5;
     463   152580084 :                     POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
     464   101720056 :                                            fact  * ref[x + 1 + idx + 2] + 16) >> 5;
     465   152580084 :                     POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
     466   101720056 :                                            fact  * ref[x + 2 + idx + 2] + 16) >> 5;
     467   152580084 :                     POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
     468   101720056 :                                            fact  * ref[x + 3 + idx + 2] + 16) >> 5;
     469             :                 }
     470             :             } else {
     471    52523746 :                 for (x = 0; x < size; x += 4)
     472    35964988 :                     AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
     473             :             }
     474             :         }
     475     7269177 :         if (mode == 26 && c_idx == 0 && size < 32) {
     476     5705134 :             for (y = 0; y < size; y++)
     477     4886660 :                 POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
     478             :         }
     479             :     } else {
     480     6082836 :         ref = left - 1;
     481     6082836 :         if (angle < 0 && last < -1) {
     482     4141708 :             for (x = 0; x <= size; x += 4)
     483     2961932 :                 AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
     484     5188589 :             for (x = last; x <= -1; x++)
     485     4008813 :                 ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
     486     1179776 :             ref = ref_tmp;
     487             :         }
     488             : 
     489    41462028 :         for (x = 0; x < size; x++) {
     490    35379192 :             int idx  = ((x + 1) * angle) >> 5;
     491    35379192 :             int fact = ((x + 1) * angle) & 31;
     492    35379192 :             if (fact) {
     493   245736908 :                 for (y = 0; y < size; y++) {
     494   660923472 :                     POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
     495   440615648 :                                        fact  * ref[y + idx + 2] + 16) >> 5;
     496             :                 }
     497             :             } else {
     498    99006316 :                 for (y = 0; y < size; y++)
     499    89056208 :                     POS(x, y) = ref[y + idx + 1];
     500             :             }
     501             :         }
     502     6082836 :         if (mode == 10 && c_idx == 0 && size < 32) {
     503     1039199 :             for (x = 0; x < size; x += 4) {
     504      618222 :                 POS(x,     0) = av_clip_pixel(left[0] + ((top[x    ] - top[-1]) >> 1));
     505      618222 :                 POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1));
     506      618222 :                 POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1));
     507      618222 :                 POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1));
     508             :             }
     509             :         }
     510             :     }
     511    13352013 : }
     512             : 
     513     9809314 : static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
     514             :                                  const uint8_t *left,
     515             :                                  ptrdiff_t stride, int c_idx, int mode)
     516             : {
     517     9809314 :     FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 2);
     518     9809314 : }
     519             : 
     520     2653576 : static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
     521             :                                  const uint8_t *left,
     522             :                                  ptrdiff_t stride, int c_idx, int mode)
     523             : {
     524     2653576 :     FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 3);
     525     2653576 : }
     526             : 
     527      755958 : static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
     528             :                                  const uint8_t *left,
     529             :                                  ptrdiff_t stride, int c_idx, int mode)
     530             : {
     531      755958 :     FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 4);
     532      755958 : }
     533             : 
     534      133165 : static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
     535             :                                  const uint8_t *left,
     536             :                                  ptrdiff_t stride, int c_idx, int mode)
     537             : {
     538      133165 :     FUNC(pred_angular)(src, top, left, stride, c_idx, mode, 1 << 5);
     539      133165 : }
     540             : 
     541             : #undef EXTEND_LEFT_CIP
     542             : #undef EXTEND_RIGHT_CIP
     543             : #undef EXTEND_UP_CIP
     544             : #undef EXTEND_DOWN_CIP
     545             : #undef IS_INTRA
     546             : #undef MVF_PU
     547             : #undef MVF
     548             : #undef PU
     549             : #undef EXTEND
     550             : #undef MIN_TB_ADDR_ZS
     551             : #undef POS

Generated by: LCOV version 1.13