FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavfilter/vf_fsppdsp.c
Date:	2026-05-03 08:24:11

	Exec	Total	Coverage
Lines:	112	184	60.9%
Functions:	4	6	66.7%
Branches:	30	34	88.2%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
    
       * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
    
       * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
    
       *
    
       * This file is part of FFmpeg.
    
       *
    
       * FFmpeg is free software; you can redistribute it and/or modify
    
       * it under the terms of the GNU General Public License as published by
    
       * the Free Software Foundation; either version 2 of the License, or
    
       * (at your option) any later version.
    
       *
    
       * FFmpeg is distributed in the hope that it will be useful,
    
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    
       * GNU General Public License for more details.
    
       *
    
       * You should have received a copy of the GNU General Public License along
    
       * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
    
       * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    
       */
    
      #include <stdint.h>
    
      #include "vf_fsppdsp.h"
    
      #include "libavutil/common.h"
    
      #include "libavutil/mathematics.h"
    
      #include "libavutil/mem_internal.h"
    
      #define DCTSIZE 8
    
      #define FIX(x,s)  (int)((x) * (1 << s) + 0.5)
    
      #define MULTIPLY16H(x,k)   (((x) * (k)) >> 16)
    
      #define THRESHOLD(r,x,t)                         \
    
          if (((unsigned)((x) + t)) >= t * 2) r = (x); \
    
          else r = 0;
    
      #define DESCALE(x,n)  (((x) + (1 << ((n) - 1))) >> n)
    
      typedef int32_t int_simd16_t;
    
      enum {
    
          FIX_0_382683433   = FIX(0.382683433, 14),
    
          FIX_0_541196100   = FIX(0.541196100, 14),
    
          FIX_0_707106781   = FIX(M_SQRT1_2  , 14),
    
          FIX_1_306562965   = FIX(1.306562965, 14),
    
          FIX_1_414213562_A = FIX(M_SQRT2    , 14),
    
          FIX_1_847759065   = FIX(1.847759065, 13),
    
          FIX_2_613125930   = FIX(-2.613125930, 13),
    
          FIX_1_414213562   = FIX(M_SQRT2    , 13),
    
          FIX_1_082392200   = FIX(1.082392200, 13),
    
      };
    
      DECLARE_ALIGNED(8, const uint8_t, ff_fspp_dither)[8][8] = {
    
          {  0,  48,  12,  60,   3,  51,  15,  63, },
    
          { 32,  16,  44,  28,  35,  19,  47,  31, },
    
          {  8,  56,   4,  52,  11,  59,   7,  55, },
    
          { 40,  24,  36,  20,  43,  27,  39,  23, },
    
          {  2,  50,  14,  62,   1,  49,  13,  61, },
    
          { 34,  18,  46,  30,  33,  17,  45,  29, },
    
          { 10,  58,   6,  54,   9,  57,   5,  53, },
    
          { 42,  26,  38,  22,  41,  25,  37,  21, },
    
      };
    
      //This func reads from 1 slice, 1 and clears 0 & 1
    
      3
      void ff_store_slice_c(uint8_t *restrict dst, int16_t *restrict src,
    
                            ptrdiff_t dst_stride, ptrdiff_t src_stride,
    
                            ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
    
      {
    
      #define STORE(pos)                                                             \
    
          temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale);        \
    
          src[x + pos] = src[x + pos - 8 * src_stride] = 0;                          \
    
          temp = av_clip_uint8(temp);                                                \
    
          dst[x + pos] = temp;
    
        2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 3 times.

      18
          for (int y = 0; y < height; y++) {
    
      15
              const uint8_t *d = ff_fspp_dither[y];
    
        2/2✓ Branch 0 taken 310 times.
✓ Branch 1 taken 15 times.

      325
              for (int x = 0; x < width; x += 8) {
    
                  int temp;
    
      310
                  STORE(0);
    
      310
                  STORE(1);
    
      310
                  STORE(2);
    
      310
                  STORE(3);
    
      310
                  STORE(4);
    
      310
                  STORE(5);
    
      310
                  STORE(6);
    
      310
                  STORE(7);
    
              }
    
      15
              src += src_stride;
    
      15
              dst += dst_stride;
    
          }
    
      3
      }
    
      //This func reads from 2 slices, 0 & 2  and clears 2-nd
    
      3
      void ff_store_slice2_c(uint8_t *restrict dst, int16_t *restrict src,
    
                             ptrdiff_t dst_stride, ptrdiff_t src_stride,
    
                             ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
    
      {
    
      #define STORE2(pos)                                                                                       \
    
          temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale);  \
    
          src[x + pos + 16 * src_stride] = 0;                                                                   \
    
          temp = av_clip_uint8(temp);                                                                           \
    
          dst[x + pos] = temp;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.

      9
          for (int y = 0; y < height; y++) {
    
      6
              const uint8_t *d = ff_fspp_dither[y];
    
        2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 6 times.

      162
              for (int x = 0; x < width; x += 8) {
    
                  int temp;
    
      156
                  STORE2(0);
    
      156
                  STORE2(1);
    
      156
                  STORE2(2);
    
      156
                  STORE2(3);
    
      156
                  STORE2(4);
    
      156
                  STORE2(5);
    
      156
                  STORE2(6);
    
      156
                  STORE2(7);
    
              }
    
      6
              src += src_stride;
    
      6
              dst += dst_stride;
    
          }
    
      3
      }
    
      3
      void ff_mul_thrmat_c(const int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr, int q)
    
      {
    
        2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 3 times.

      195
          for (int a = 0; a < 64; a++)
    
      192
              thr_adr[a] = q * thr_adr_noq[a];
    
      3
      }
    
      3
      void ff_column_fidct_c(const int16_t *restrict thr_adr, const int16_t *restrict data,
    
                             int16_t *restrict output, int cnt)
    
      {
    
          int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
          int_simd16_t tmp10, tmp11, tmp12, tmp13;
    
          int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
    
          int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
    
          int16_t *wsptr;
    
      3
          wsptr = output;
    
        2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 3 times.

      99
          for (; cnt > 0; cnt -= 2) { //start positions
    
      96
              const int16_t *threshold = thr_adr;//threshold_mtx
    
        2/2✓ Branch 0 taken 768 times.
✓ Branch 1 taken 96 times.

      864
              for (int ctr = DCTSIZE; ctr > 0; ctr--) {
    
                  // Process columns from input, add to output.
    
      768
                  tmp0 = data[DCTSIZE * 0] + data[DCTSIZE * 7];
    
      768
                  tmp7 = data[DCTSIZE * 0] - data[DCTSIZE * 7];
    
      768
                  tmp1 = data[DCTSIZE * 1] + data[DCTSIZE * 6];
    
      768
                  tmp6 = data[DCTSIZE * 1] - data[DCTSIZE * 6];
    
      768
                  tmp2 = data[DCTSIZE * 2] + data[DCTSIZE * 5];
    
      768
                  tmp5 = data[DCTSIZE * 2] - data[DCTSIZE * 5];
    
      768
                  tmp3 = data[DCTSIZE * 3] + data[DCTSIZE * 4];
    
      768
                  tmp4 = data[DCTSIZE * 3] - data[DCTSIZE * 4];
    
                  // Even part of FDCT
    
      768
                  tmp10 = tmp0 + tmp3;
    
      768
                  tmp13 = tmp0 - tmp3;
    
      768
                  tmp11 = tmp1 + tmp2;
    
      768
                  tmp12 = tmp1 - tmp2;
    
      768
                  d0 = tmp10 + tmp11;
    
      768
                  d4 = tmp10 - tmp11;
    
      768
                  z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
    
      768
                  d2 = tmp13 + z1;
    
      768
                  d6 = tmp13 - z1;
    
                  // Even part of IDCT
    
        2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 704 times.

      768
                  THRESHOLD(tmp0, d0, threshold[0 * 8]);
    
        2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 727 times.

      768
                  THRESHOLD(tmp1, d2, threshold[2 * 8]);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 754 times.

      768
                  THRESHOLD(tmp2, d4, threshold[4 * 8]);
    
        2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 712 times.

      768
                  THRESHOLD(tmp3, d6, threshold[6 * 8]);
    
      768
                  tmp0 += 2;
    
      768
                  tmp10 = (tmp0 + tmp2) >> 2;
    
      768
                  tmp11 = (tmp0 - tmp2) >> 2;
    
      768
                  tmp13 = (tmp1 + tmp3) >>2; //+2 !  (psnr decides)
    
      768
                  tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
    
      768
                  tmp0 = tmp10 + tmp13; //->temps
    
      768
                  tmp3 = tmp10 - tmp13; //->temps
    
      768
                  tmp1 = tmp11 + tmp12; //->temps
    
      768
                  tmp2 = tmp11 - tmp12; //->temps
    
                  // Odd part of FDCT
    
      768
                  tmp10 = tmp4 + tmp5;
    
      768
                  tmp11 = tmp5 + tmp6;
    
      768
                  tmp12 = tmp6 + tmp7;
    
      768
                  z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
    
      768
                  z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
    
      768
                  z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
    
      768
                  z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
    
      768
                  z11 = tmp7 + z3;
    
      768
                  z13 = tmp7 - z3;
    
      768
                  d5 = z13 + z2;
    
      768
                  d3 = z13 - z2;
    
      768
                  d1 = z11 + z4;
    
      768
                  d7 = z11 - z4;
    
                  // Odd part of IDCT
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 744 times.

      768
                  THRESHOLD(tmp4, d1, threshold[1 * 8]);
    
        2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 740 times.

      768
                  THRESHOLD(tmp5, d3, threshold[3 * 8]);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 744 times.

      768
                  THRESHOLD(tmp6, d5, threshold[5 * 8]);
    
        2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 757 times.

      768
                  THRESHOLD(tmp7, d7, threshold[7 * 8]);
    
                  //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
    
      768
                  z13 = tmp6 + tmp5;
    
      768
                  z10 = (tmp6 - tmp5) * 2;
    
      768
                  z11 = tmp4 + tmp7;
    
      768
                  z12 = (tmp4 - tmp7) * 2;
    
      768
                  tmp7  = (z11 + z13) >> 2; //+2 !
    
      768
                  tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562 << 1);
    
      768
                  z5    = MULTIPLY16H(z10 + z12, FIX_1_847759065);
    
      768
                  tmp10 = MULTIPLY16H(z12,       FIX_1_082392200) - z5;
    
      768
                  tmp12 = MULTIPLY16H(z10,       FIX_2_613125930) + z5; // - !!
    
      768
                  tmp6 = tmp12 - tmp7;
    
      768
                  tmp5 = tmp11 - tmp6;
    
      768
                  tmp4 = tmp10 + tmp5;
    
      768
                  wsptr[DCTSIZE * 0] +=  (tmp0 + tmp7);
    
      768
                  wsptr[DCTSIZE * 1] +=  (tmp1 + tmp6);
    
      768
                  wsptr[DCTSIZE * 2] +=  (tmp2 + tmp5);
    
      768
                  wsptr[DCTSIZE * 3] +=  (tmp3 - tmp4);
    
      768
                  wsptr[DCTSIZE * 4] +=  (tmp3 + tmp4);
    
      768
                  wsptr[DCTSIZE * 5] +=  (tmp2 - tmp5);
    
      768
                  wsptr[DCTSIZE * 6]  =  (tmp1 - tmp6);
    
      768
                  wsptr[DCTSIZE * 7]  =  (tmp0 - tmp7);
    
                  //
    
      768
                  data++; //next column
    
      768
                  wsptr++;
    
      768
                  threshold++;
    
              }
    
      96
              data  += 8; //skip each second start pos
    
      96
              wsptr   += 8;
    
          }
    
      3
      }
    
      ✗
      void ff_row_idct_c(const int16_t *restrict wsptr, int16_t *restrict output_adr,
    
                         ptrdiff_t output_stride, int cnt)
    
      {
    
          int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
          int_simd16_t tmp10, tmp11, tmp12, tmp13;
    
          int_simd16_t z5, z10, z11, z12, z13;
    
          int16_t *outptr;
    
      ✗
          cnt *= 4;
    
      ✗
          outptr = output_adr;
    
      ✗
          for (; cnt > 0; cnt--) {
    
              // Even part
    
              //Simd version reads 4x4 block and transposes it
    
      ✗
              tmp10 = wsptr[2] +  wsptr[3];
    
      ✗
              tmp11 = wsptr[2] -  wsptr[3];
    
      ✗
              tmp13 = wsptr[0] +  wsptr[1];
    
      ✗
              tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) * 4) - tmp13;//this shift order to avoid overflow
    
      ✗
              tmp0 = tmp10 + tmp13; //->temps
    
      ✗
              tmp3 = tmp10 - tmp13; //->temps
    
      ✗
              tmp1 = tmp11 + tmp12;
    
      ✗
              tmp2 = tmp11 - tmp12;
    
              // Odd part
    
              //Also transpose, with previous:
    
              // ---- ----      ||||
    
              // ---- ---- idct ||||
    
              // ---- ---- ---> ||||
    
              // ---- ----      ||||
    
      ✗
              z13 = wsptr[4] + wsptr[5];
    
      ✗
              z10 = wsptr[4] - wsptr[5];
    
      ✗
              z11 = wsptr[6] + wsptr[7];
    
      ✗
              z12 = wsptr[6] - wsptr[7];
    
      ✗
              tmp7 = z11 + z13;
    
      ✗
              tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
    
      ✗
              z5 =    MULTIPLY16H(z10 + z12, FIX_1_847759065);
    
      ✗
              tmp10 = MULTIPLY16H(z12,       FIX_1_082392200) - z5;
    
      ✗
              tmp12 = MULTIPLY16H(z10,       FIX_2_613125930) + z5; // - FIX_
    
      ✗
              tmp6 = tmp12 * 8 - tmp7;
    
      ✗
              tmp5 = tmp11 * 8 - tmp6;
    
      ✗
              tmp4 = tmp10 * 8 + tmp5;
    
              // Final output stage: descale and write column
    
      ✗
              outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
    
      ✗
              outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
    
      ✗
              outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
    
      ✗
              outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
    
      ✗
              outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
    
      ✗
              outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
    
      ✗
              outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
    
      ✗
              outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
    
      ✗
              outptr++;
    
      ✗
              wsptr += DCTSIZE;       // advance pointer to next row
    
          }
    
      ✗
      }
    
      ✗
      void ff_row_fdct_c(int16_t *restrict data, const uint8_t *restrict pixels,
    
                         ptrdiff_t line_size, int cnt)
    
      {
    
          int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
          int_simd16_t tmp10, tmp11, tmp12, tmp13;
    
          int_simd16_t z1, z2, z3, z4, z5, z11, z13;
    
          int16_t *dataptr;
    
      ✗
          cnt *= 4;
    
          // Pass 1: process rows.
    
      ✗
          dataptr = data;
    
      ✗
          for (; cnt > 0; cnt--) {
    
      ✗
              tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
    
      ✗
              tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
    
      ✗
              tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
    
      ✗
              tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
    
      ✗
              tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
    
      ✗
              tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
    
      ✗
              tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
    
      ✗
              tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
    
              // Even part
    
      ✗
              tmp10 = tmp0 + tmp3;
    
      ✗
              tmp13 = tmp0 - tmp3;
    
      ✗
              tmp11 = tmp1 + tmp2;
    
      ✗
              tmp12 = tmp1 - tmp2;
    
              //Even columns are written first, this leads to different order of columns
    
              //in column_fidct(), but they are processed independently, so all ok.
    
              //Later in the row_idct() columns are read in the same order.
    
      ✗
              dataptr[2] = tmp10 + tmp11;
    
      ✗
              dataptr[3] = tmp10 - tmp11;
    
      ✗
              z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
    
      ✗
              dataptr[0] = tmp13 + z1;
    
      ✗
              dataptr[1] = tmp13 - z1;
    
              // Odd part
    
      ✗
              tmp10 = tmp4 + tmp5;
    
      ✗
              tmp11 = tmp5 + tmp6;
    
      ✗
              tmp12 = tmp6 + tmp7;
    
      ✗
              z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
    
      ✗
              z2 = MULTIPLY16H(tmp10,         FIX_0_541196100 << 2) + z5;
    
      ✗
              z4 = MULTIPLY16H(tmp12,         FIX_1_306562965 << 2) + z5;
    
      ✗
              z3 = MULTIPLY16H(tmp11,         FIX_0_707106781 << 2);
    
      ✗
              z11 = tmp7 + z3;
    
      ✗
              z13 = tmp7 - z3;
    
      ✗
              dataptr[4] = z13 + z2;
    
      ✗
              dataptr[5] = z13 - z2;
    
      ✗
              dataptr[6] = z11 + z4;
    
      ✗
              dataptr[7] = z11 - z4;
    
      ✗
              pixels++;               // advance pointer to next column
    
      ✗
              dataptr += DCTSIZE;
    
          }
    
      ✗
      }

Line	Branch	Exec	Source
1			/*
2			* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3			* Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4			* Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5			*
6			* This file is part of FFmpeg.
7			*
8			* FFmpeg is free software; you can redistribute it and/or modify
9			* it under the terms of the GNU General Public License as published by
10			* the Free Software Foundation; either version 2 of the License, or
11			* (at your option) any later version.
12			*
13			* FFmpeg is distributed in the hope that it will be useful,
14			* but WITHOUT ANY WARRANTY; without even the implied warranty of
15			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16			* GNU General Public License for more details.
17			*
18			* You should have received a copy of the GNU General Public License along
19			* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20			* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21			*/
22
23			#include <stdint.h>
24
25			#include "vf_fsppdsp.h"
26
27			#include "libavutil/common.h"
28			#include "libavutil/mathematics.h"
29			#include "libavutil/mem_internal.h"
30
31			#define DCTSIZE 8
32
33			#define FIX(x,s) (int)((x) * (1 << s) + 0.5)
34
35			#define MULTIPLY16H(x,k) (((x) * (k)) >> 16)
36			#define THRESHOLD(r,x,t) \
37			if (((unsigned)((x) + t)) >= t * 2) r = (x); \
38			else r = 0;
39			#define DESCALE(x,n) (((x) + (1 << ((n) - 1))) >> n)
40
41			typedef int32_t int_simd16_t;
42
43			enum {
44			FIX_0_382683433 = FIX(0.382683433, 14),
45			FIX_0_541196100 = FIX(0.541196100, 14),
46			FIX_0_707106781 = FIX(M_SQRT1_2 , 14),
47			FIX_1_306562965 = FIX(1.306562965, 14),
48			FIX_1_414213562_A = FIX(M_SQRT2 , 14),
49			FIX_1_847759065 = FIX(1.847759065, 13),
50			FIX_2_613125930 = FIX(-2.613125930, 13),
51			FIX_1_414213562 = FIX(M_SQRT2 , 13),
52			FIX_1_082392200 = FIX(1.082392200, 13),
53			};
54
55			DECLARE_ALIGNED(8, const uint8_t, ff_fspp_dither)[8][8] = {
56			{ 0, 48, 12, 60, 3, 51, 15, 63, },
57			{ 32, 16, 44, 28, 35, 19, 47, 31, },
58			{ 8, 56, 4, 52, 11, 59, 7, 55, },
59			{ 40, 24, 36, 20, 43, 27, 39, 23, },
60			{ 2, 50, 14, 62, 1, 49, 13, 61, },
61			{ 34, 18, 46, 30, 33, 17, 45, 29, },
62			{ 10, 58, 6, 54, 9, 57, 5, 53, },
63			{ 42, 26, 38, 22, 41, 25, 37, 21, },
64			};
65
66			//This func reads from 1 slice, 1 and clears 0 & 1
67		3	void ff_store_slice_c(uint8_t restrict dst, int16_t restrict src,
68			ptrdiff_t dst_stride, ptrdiff_t src_stride,
69			ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
70			{
71			#define STORE(pos) \
72			temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
73			src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
74			temp = av_clip_uint8(temp); \
75			dst[x + pos] = temp;
76
77	2/2 ✓ Branch 0 taken 15 times. ✓ Branch 1 taken 3 times.	18	for (int y = 0; y < height; y++) {
78		15	const uint8_t *d = ff_fspp_dither[y];
79	2/2 ✓ Branch 0 taken 310 times. ✓ Branch 1 taken 15 times.	325	for (int x = 0; x < width; x += 8) {
80			int temp;
81		310	STORE(0);
82		310	STORE(1);
83		310	STORE(2);
84		310	STORE(3);
85		310	STORE(4);
86		310	STORE(5);
87		310	STORE(6);
88		310	STORE(7);
89			}
90		15	src += src_stride;
91		15	dst += dst_stride;
92			}
93		3	}
94
95			//This func reads from 2 slices, 0 & 2 and clears 2-nd
96		3	void ff_store_slice2_c(uint8_t restrict dst, int16_t restrict src,
97			ptrdiff_t dst_stride, ptrdiff_t src_stride,
98			ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
99			{
100			#define STORE2(pos) \
101			temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
102			src[x + pos + 16 * src_stride] = 0; \
103			temp = av_clip_uint8(temp); \
104			dst[x + pos] = temp;
105
106	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 3 times.	9	for (int y = 0; y < height; y++) {
107		6	const uint8_t *d = ff_fspp_dither[y];
108	2/2 ✓ Branch 0 taken 156 times. ✓ Branch 1 taken 6 times.	162	for (int x = 0; x < width; x += 8) {
109			int temp;
110		156	STORE2(0);
111		156	STORE2(1);
112		156	STORE2(2);
113		156	STORE2(3);
114		156	STORE2(4);
115		156	STORE2(5);
116		156	STORE2(6);
117		156	STORE2(7);
118			}
119		6	src += src_stride;
120		6	dst += dst_stride;
121			}
122		3	}
123
124		3	void ff_mul_thrmat_c(const int16_t restrict thr_adr_noq, int16_t restrict thr_adr, int q)
125			{
126	2/2 ✓ Branch 0 taken 192 times. ✓ Branch 1 taken 3 times.	195	for (int a = 0; a < 64; a++)
127		192	thr_adr[a] = q * thr_adr_noq[a];
128		3	}
129
130		3	void ff_column_fidct_c(const int16_t restrict thr_adr, const int16_t restrict data,
131			int16_t *restrict output, int cnt)
132			{
133			int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
134			int_simd16_t tmp10, tmp11, tmp12, tmp13;
135			int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
136			int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
137
138			int16_t *wsptr;
139
140		3	wsptr = output;
141
142	2/2 ✓ Branch 0 taken 96 times. ✓ Branch 1 taken 3 times.	99	for (; cnt > 0; cnt -= 2) { //start positions
143		96	const int16_t *threshold = thr_adr;//threshold_mtx
144	2/2 ✓ Branch 0 taken 768 times. ✓ Branch 1 taken 96 times.	864	for (int ctr = DCTSIZE; ctr > 0; ctr--) {
145			// Process columns from input, add to output.
146		768	tmp0 = data[DCTSIZE * 0] + data[DCTSIZE * 7];
147		768	tmp7 = data[DCTSIZE * 0] - data[DCTSIZE * 7];
148
149		768	tmp1 = data[DCTSIZE * 1] + data[DCTSIZE * 6];
150		768	tmp6 = data[DCTSIZE * 1] - data[DCTSIZE * 6];
151
152		768	tmp2 = data[DCTSIZE * 2] + data[DCTSIZE * 5];
153		768	tmp5 = data[DCTSIZE * 2] - data[DCTSIZE * 5];
154
155		768	tmp3 = data[DCTSIZE * 3] + data[DCTSIZE * 4];
156		768	tmp4 = data[DCTSIZE * 3] - data[DCTSIZE * 4];
157
158			// Even part of FDCT
159
160		768	tmp10 = tmp0 + tmp3;
161		768	tmp13 = tmp0 - tmp3;
162		768	tmp11 = tmp1 + tmp2;
163		768	tmp12 = tmp1 - tmp2;
164
165		768	d0 = tmp10 + tmp11;
166		768	d4 = tmp10 - tmp11;
167
168		768	z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
169		768	d2 = tmp13 + z1;
170		768	d6 = tmp13 - z1;
171
172			// Even part of IDCT
173
174	2/2 ✓ Branch 0 taken 64 times. ✓ Branch 1 taken 704 times.	768	THRESHOLD(tmp0, d0, threshold[0 * 8]);
175	2/2 ✓ Branch 0 taken 41 times. ✓ Branch 1 taken 727 times.	768	THRESHOLD(tmp1, d2, threshold[2 * 8]);
176	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 754 times.	768	THRESHOLD(tmp2, d4, threshold[4 * 8]);
177	2/2 ✓ Branch 0 taken 56 times. ✓ Branch 1 taken 712 times.	768	THRESHOLD(tmp3, d6, threshold[6 * 8]);
178		768	tmp0 += 2;
179		768	tmp10 = (tmp0 + tmp2) >> 2;
180		768	tmp11 = (tmp0 - tmp2) >> 2;
181
182		768	tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
183		768	tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
184
185		768	tmp0 = tmp10 + tmp13; //->temps
186		768	tmp3 = tmp10 - tmp13; //->temps
187		768	tmp1 = tmp11 + tmp12; //->temps
188		768	tmp2 = tmp11 - tmp12; //->temps
189
190			// Odd part of FDCT
191
192		768	tmp10 = tmp4 + tmp5;
193		768	tmp11 = tmp5 + tmp6;
194		768	tmp12 = tmp6 + tmp7;
195
196		768	z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
197		768	z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
198		768	z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
199		768	z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
200
201		768	z11 = tmp7 + z3;
202		768	z13 = tmp7 - z3;
203
204		768	d5 = z13 + z2;
205		768	d3 = z13 - z2;
206		768	d1 = z11 + z4;
207		768	d7 = z11 - z4;
208
209			// Odd part of IDCT
210
211	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 744 times.	768	THRESHOLD(tmp4, d1, threshold[1 * 8]);
212	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 740 times.	768	THRESHOLD(tmp5, d3, threshold[3 * 8]);
213	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 744 times.	768	THRESHOLD(tmp6, d5, threshold[5 * 8]);
214	2/2 ✓ Branch 0 taken 11 times. ✓ Branch 1 taken 757 times.	768	THRESHOLD(tmp7, d7, threshold[7 * 8]);
215
216			//Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
217		768	z13 = tmp6 + tmp5;
218		768	z10 = (tmp6 - tmp5) * 2;
219		768	z11 = tmp4 + tmp7;
220		768	z12 = (tmp4 - tmp7) * 2;
221
222		768	tmp7 = (z11 + z13) >> 2; //+2 !
223		768	tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562 << 1);
224		768	z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
225		768	tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
226		768	tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
227
228		768	tmp6 = tmp12 - tmp7;
229		768	tmp5 = tmp11 - tmp6;
230		768	tmp4 = tmp10 + tmp5;
231
232		768	wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
233		768	wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
234		768	wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
235		768	wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
236		768	wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
237		768	wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
238		768	wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
239		768	wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
240			//
241		768	data++; //next column
242		768	wsptr++;
243		768	threshold++;
244			}
245		96	data += 8; //skip each second start pos
246		96	wsptr += 8;
247			}
248		3	}
249
250		✗	void ff_row_idct_c(const int16_t restrict wsptr, int16_t restrict output_adr,
251			ptrdiff_t output_stride, int cnt)
252			{
253			int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
254			int_simd16_t tmp10, tmp11, tmp12, tmp13;
255			int_simd16_t z5, z10, z11, z12, z13;
256			int16_t *outptr;
257
258		✗	cnt *= 4;
259		✗	outptr = output_adr;
260		✗	for (; cnt > 0; cnt--) {
261			// Even part
262			//Simd version reads 4x4 block and transposes it
263		✗	tmp10 = wsptr[2] + wsptr[3];
264		✗	tmp11 = wsptr[2] - wsptr[3];
265
266		✗	tmp13 = wsptr[0] + wsptr[1];
267		✗	tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) * 4) - tmp13;//this shift order to avoid overflow
268
269		✗	tmp0 = tmp10 + tmp13; //->temps
270		✗	tmp3 = tmp10 - tmp13; //->temps
271		✗	tmp1 = tmp11 + tmp12;
272		✗	tmp2 = tmp11 - tmp12;
273
274			// Odd part
275			//Also transpose, with previous:
276			// ---- ---- \|\|\|\|
277			// ---- ---- idct \|\|\|\|
278			// ---- ---- ---> \|\|\|\|
279			// ---- ---- \|\|\|\|
280		✗	z13 = wsptr[4] + wsptr[5];
281		✗	z10 = wsptr[4] - wsptr[5];
282		✗	z11 = wsptr[6] + wsptr[7];
283		✗	z12 = wsptr[6] - wsptr[7];
284
285		✗	tmp7 = z11 + z13;
286		✗	tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
287
288		✗	z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
289		✗	tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
290		✗	tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
291
292		✗	tmp6 = tmp12 * 8 - tmp7;
293		✗	tmp5 = tmp11 * 8 - tmp6;
294		✗	tmp4 = tmp10 * 8 + tmp5;
295
296			// Final output stage: descale and write column
297		✗	outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
298		✗	outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
299		✗	outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
300		✗	outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
301		✗	outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
302		✗	outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
303		✗	outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
304		✗	outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
305		✗	outptr++;
306
307		✗	wsptr += DCTSIZE; // advance pointer to next row
308			}
309		✗	}
310
311		✗	void ff_row_fdct_c(int16_t restrict data, const uint8_t restrict pixels,
312			ptrdiff_t line_size, int cnt)
313			{
314			int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
315			int_simd16_t tmp10, tmp11, tmp12, tmp13;
316			int_simd16_t z1, z2, z3, z4, z5, z11, z13;
317			int16_t *dataptr;
318
319		✗	cnt *= 4;
320			// Pass 1: process rows.
321
322		✗	dataptr = data;
323		✗	for (; cnt > 0; cnt--) {
324		✗	tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
325		✗	tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
326		✗	tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
327		✗	tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
328		✗	tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
329		✗	tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
330		✗	tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
331		✗	tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
332
333			// Even part
334
335		✗	tmp10 = tmp0 + tmp3;
336		✗	tmp13 = tmp0 - tmp3;
337		✗	tmp11 = tmp1 + tmp2;
338		✗	tmp12 = tmp1 - tmp2;
339			//Even columns are written first, this leads to different order of columns
340			//in column_fidct(), but they are processed independently, so all ok.
341			//Later in the row_idct() columns are read in the same order.
342		✗	dataptr[2] = tmp10 + tmp11;
343		✗	dataptr[3] = tmp10 - tmp11;
344
345		✗	z1 = MULTIPLY16H(tmp12 + tmp13, FIX_0_707106781 << 2);
346		✗	dataptr[0] = tmp13 + z1;
347		✗	dataptr[1] = tmp13 - z1;
348
349			// Odd part
350
351		✗	tmp10 = tmp4 + tmp5;
352		✗	tmp11 = tmp5 + tmp6;
353		✗	tmp12 = tmp6 + tmp7;
354
355		✗	z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433 << 2);
356		✗	z2 = MULTIPLY16H(tmp10, FIX_0_541196100 << 2) + z5;
357		✗	z4 = MULTIPLY16H(tmp12, FIX_1_306562965 << 2) + z5;
358		✗	z3 = MULTIPLY16H(tmp11, FIX_0_707106781 << 2);
359
360		✗	z11 = tmp7 + z3;
361		✗	z13 = tmp7 - z3;
362
363		✗	dataptr[4] = z13 + z2;
364		✗	dataptr[5] = z13 - z2;
365		✗	dataptr[6] = z11 + z4;
366		✗	dataptr[7] = z11 - z4;
367
368		✗	pixels++; // advance pointer to next column
369		✗	dataptr += DCTSIZE;
370			}
371		✗	}
372

Function (Line)	Call count	Block coverage
ff_column_fidct_c (line 130)	called 3 times, returned 3 times	100.0%
ff_mul_thrmat_c (line 124)	called 3 times, returned 3 times	100.0%
ff_row_fdct_c (line 311)	not called	0.0%
ff_row_idct_c (line 250)	not called	0.0%
ff_store_slice2_c (line 96)	called 3 times, returned 3 times	100.0%
ff_store_slice_c (line 67)	called 3 times, returned 3 times	100.0%