FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavcodec/jfdctfst.c
Date:	2025-10-30 05:42:18

	Exec	Total	Coverage
Lines:	71	103	68.9%
Functions:	2	3	66.7%
Branches:	4	6	66.7%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * This file is part of the Independent JPEG Group's software.
    
       *
    
       * The authors make NO WARRANTY or representation, either express or implied,
    
       * with respect to this software, its quality, accuracy, merchantability, or
    
       * fitness for a particular purpose.  This software is provided "AS IS", and
    
       * you, its user, assume the entire risk as to its quality and accuracy.
    
       *
    
       * This software is copyright (C) 1994-1996, Thomas G. Lane.
    
       * All Rights Reserved except as specified below.
    
       *
    
       * Permission is hereby granted to use, copy, modify, and distribute this
    
       * software (or portions thereof) for any purpose, without fee, subject to
    
       * these conditions:
    
       * (1) If any part of the source code for this software is distributed, then
    
       * this README file must be included, with this copyright and no-warranty
    
       * notice unaltered; and any additions, deletions, or changes to the original
    
       * files must be clearly indicated in accompanying documentation.
    
       * (2) If only executable code is distributed, then the accompanying
    
       * documentation must state that "this software is based in part on the work
    
       * of the Independent JPEG Group".
    
       * (3) Permission for use of this software is granted only if the user accepts
    
       * full responsibility for any undesirable consequences; the authors accept
    
       * NO LIABILITY for damages of any kind.
    
       *
    
       * These conditions apply to any software derived from or based on the IJG
    
       * code, not just to the unmodified library.  If you use our work, you ought
    
       * to acknowledge us.
    
       *
    
       * Permission is NOT granted for the use of any IJG author's name or company
    
       * name in advertising or publicity relating to this software or products
    
       * derived from it.  This software may be referred to only as "the Independent
    
       * JPEG Group's software".
    
       *
    
       * We specifically permit and encourage the use of this software as the basis
    
       * of commercial products, provided that all warranty or liability claims are
    
       * assumed by the product vendor.
    
       *
    
       * This file contains a fast, not so accurate integer implementation of the
    
       * forward DCT (Discrete Cosine Transform).
    
       *
    
       * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
    
       * on each column.  Direct algorithms are also available, but they are
    
       * much more complex and seem not to be any faster when reduced to code.
    
       *
    
       * This implementation is based on Arai, Agui, and Nakajima's algorithm for
    
       * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
    
       * Japanese, but the algorithm is described in the Pennebaker & Mitchell
    
       * JPEG textbook (see REFERENCES section in file README).  The following code
    
       * is based directly on figure 4-8 in P&M.
    
       * While an 8-point DCT cannot be done in less than 11 multiplies, it is
    
       * possible to arrange the computation so that many of the multiplies are
    
       * simple scalings of the final outputs.  These multiplies can then be
    
       * folded into the multiplications or divisions by the JPEG quantization
    
       * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
    
       * to be done in the DCT itself.
    
       * The primary disadvantage of this method is that with fixed-point math,
    
       * accuracy is lost due to imprecise representation of the scaled
    
       * quantization values.  The smaller the quantization table entry, the less
    
       * precise the scaled value, so this implementation does worse with high-
    
       * quality-setting files than with low-quality ones.
    
       */
    
      /**
    
       * @file
    
       * Independent JPEG Group's fast AAN dct.
    
       */
    
      #include <stdint.h>
    
      #include "libavutil/attributes.h"
    
      #include "fdctdsp.h"
    
      #define DCTSIZE 8
    
      #define GLOBAL(x) x
    
      #define RIGHT_SHIFT(x, n) ((x) >> (n))
    
      /*
    
       * This module is specialized to the case DCTSIZE = 8.
    
       */
    
      #if DCTSIZE != 8
    
        Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
    
      #endif
    
      /* Scaling decisions are generally the same as in the LL&M algorithm;
    
       * see jfdctint.c for more details.  However, we choose to descale
    
       * (right shift) multiplication products as soon as they are formed,
    
       * rather than carrying additional fractional bits into subsequent additions.
    
       * This compromises accuracy slightly, but it lets us save a few shifts.
    
       * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
    
       * everywhere except in the multiplications proper; this saves a good deal
    
       * of work on 16-bit-int machines.
    
       *
    
       * Again to save a few shifts, the intermediate results between pass 1 and
    
       * pass 2 are not upscaled, but are represented only to integral precision.
    
       *
    
       * A final compromise is to represent the multiplicative constants to only
    
       * 8 fractional bits, rather than 13.  This saves some shifting work on some
    
       * machines, and may also reduce the cost of multiplication (since there
    
       * are fewer one-bits in the constants).
    
       */
    
      #define CONST_BITS  8
    
      /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
    
       * causing a lot of useless floating-point operations at run time.
    
       * To get around this we use the following pre-calculated constants.
    
       * If you change CONST_BITS you may want to add appropriate values.
    
       * (With a reasonable C compiler, you can just rely on the FIX() macro...)
    
       */
    
      #if CONST_BITS == 8
    
      #define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
    
      #define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
    
      #define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
    
      #define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
    
      #else
    
      #define FIX_0_382683433  FIX(0.382683433)
    
      #define FIX_0_541196100  FIX(0.541196100)
    
      #define FIX_0_707106781  FIX(0.707106781)
    
      #define FIX_1_306562965  FIX(1.306562965)
    
      #endif
    
      /* We can gain a little more speed, with a further compromise in accuracy,
    
       * by omitting the addition in a descaling shift.  This yields an incorrectly
    
       * rounded result half the time...
    
       */
    
      #ifndef USE_ACCURATE_ROUNDING
    
      #undef DESCALE
    
      #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
    
      #endif
    
      /* Multiply a int16_t variable by an int32_t constant, and immediately
    
       * descale to yield a int16_t result.
    
       */
    
      #define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
    
      103200826
      static av_always_inline void row_fdct(int16_t * data){
    
        int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
        int tmp10, tmp11, tmp12, tmp13;
    
        int z1, z2, z3, z4, z5, z11, z13;
    
        int16_t *dataptr;
    
        int ctr;
    
        /* Pass 1: process rows. */
    
      103200826
        dataptr = data;
    
        2/2✓ Branch 0 taken 825606608 times.
✓ Branch 1 taken 103200826 times.

      928807434
        for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    
      825606608
          tmp0 = dataptr[0] + dataptr[7];
    
      825606608
          tmp7 = dataptr[0] - dataptr[7];
    
      825606608
          tmp1 = dataptr[1] + dataptr[6];
    
      825606608
          tmp6 = dataptr[1] - dataptr[6];
    
      825606608
          tmp2 = dataptr[2] + dataptr[5];
    
      825606608
          tmp5 = dataptr[2] - dataptr[5];
    
      825606608
          tmp3 = dataptr[3] + dataptr[4];
    
      825606608
          tmp4 = dataptr[3] - dataptr[4];
    
          /* Even part */
    
      825606608
          tmp10 = tmp0 + tmp3;        /* phase 2 */
    
      825606608
          tmp13 = tmp0 - tmp3;
    
      825606608
          tmp11 = tmp1 + tmp2;
    
      825606608
          tmp12 = tmp1 - tmp2;
    
      825606608
          dataptr[0] = tmp10 + tmp11; /* phase 3 */
    
      825606608
          dataptr[4] = tmp10 - tmp11;
    
      825606608
          z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    
      825606608
          dataptr[2] = tmp13 + z1;    /* phase 5 */
    
      825606608
          dataptr[6] = tmp13 - z1;
    
          /* Odd part */
    
      825606608
          tmp10 = tmp4 + tmp5;        /* phase 2 */
    
      825606608
          tmp11 = tmp5 + tmp6;
    
      825606608
          tmp12 = tmp6 + tmp7;
    
          /* The rotator is modified from fig 4-8 to avoid extra negations. */
    
      825606608
          z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    
      825606608
          z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
    
      825606608
          z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
    
      825606608
          z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
    
      825606608
          z11 = tmp7 + z3;            /* phase 5 */
    
      825606608
          z13 = tmp7 - z3;
    
      825606608
          dataptr[5] = z13 + z2;      /* phase 6 */
    
      825606608
          dataptr[3] = z13 - z2;
    
      825606608
          dataptr[1] = z11 + z4;
    
      825606608
          dataptr[7] = z11 - z4;
    
      825606608
          dataptr += DCTSIZE;         /* advance pointer to next row */
    
        }
    
      103200826
      }
    
      /*
    
       * Perform the forward DCT on one block of samples.
    
       */
    
      GLOBAL(void)
    
      103200826
      ff_fdct_ifast (int16_t * data)
    
      {
    
        int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
        int tmp10, tmp11, tmp12, tmp13;
    
        int z1, z2, z3, z4, z5, z11, z13;
    
        int16_t *dataptr;
    
        int ctr;
    
      103200826
        row_fdct(data);
    
        /* Pass 2: process columns. */
    
      103200826
        dataptr = data;
    
        2/2✓ Branch 0 taken 825606608 times.
✓ Branch 1 taken 103200826 times.

      928807434
        for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    
      825606608
          tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
    
      825606608
          tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
    
      825606608
          tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
    
      825606608
          tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
    
      825606608
          tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
    
      825606608
          tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
    
      825606608
          tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
    
      825606608
          tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
    
          /* Even part */
    
      825606608
          tmp10 = tmp0 + tmp3;        /* phase 2 */
    
      825606608
          tmp13 = tmp0 - tmp3;
    
      825606608
          tmp11 = tmp1 + tmp2;
    
      825606608
          tmp12 = tmp1 - tmp2;
    
      825606608
          dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
    
      825606608
          dataptr[DCTSIZE*4] = tmp10 - tmp11;
    
      825606608
          z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
    
      825606608
          dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
    
      825606608
          dataptr[DCTSIZE*6] = tmp13 - z1;
    
          /* Odd part */
    
      825606608
          tmp10 = tmp4 + tmp5;        /* phase 2 */
    
      825606608
          tmp11 = tmp5 + tmp6;
    
      825606608
          tmp12 = tmp6 + tmp7;
    
          /* The rotator is modified from fig 4-8 to avoid extra negations. */
    
      825606608
          z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
    
      825606608
          z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
    
      825606608
          z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
    
      825606608
          z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
    
      825606608
          z11 = tmp7 + z3;            /* phase 5 */
    
      825606608
          z13 = tmp7 - z3;
    
      825606608
          dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
    
      825606608
          dataptr[DCTSIZE*3] = z13 - z2;
    
      825606608
          dataptr[DCTSIZE*1] = z11 + z4;
    
      825606608
          dataptr[DCTSIZE*7] = z11 - z4;
    
      825606608
          dataptr++;                  /* advance pointer to next column */
    
        }
    
      103200826
      }
    
      /*
    
       * Perform the forward 2-4-8 DCT on one block of samples.
    
       */
    
      GLOBAL(void)
    
      ✗
      ff_fdct_ifast248 (int16_t * data)
    
      {
    
        int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    
        int tmp10, tmp11, tmp12, tmp13;
    
        int z1;
    
        int16_t *dataptr;
    
        int ctr;
    
      ✗
        row_fdct(data);
    
        /* Pass 2: process columns. */
    
      ✗
        dataptr = data;
    
      ✗
        for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
    
      ✗
          tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
    
      ✗
          tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
    
      ✗
          tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
    
      ✗
          tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
    
      ✗
          tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
    
      ✗
          tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
    
      ✗
          tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
    
      ✗
          tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
    
          /* Even part */
    
      ✗
          tmp10 = tmp0 + tmp3;
    
      ✗
          tmp11 = tmp1 + tmp2;
    
      ✗
          tmp12 = tmp1 - tmp2;
    
      ✗
          tmp13 = tmp0 - tmp3;
    
      ✗
          dataptr[DCTSIZE*0] = tmp10 + tmp11;
    
      ✗
          dataptr[DCTSIZE*4] = tmp10 - tmp11;
    
      ✗
          z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    
      ✗
          dataptr[DCTSIZE*2] = tmp13 + z1;
    
      ✗
          dataptr[DCTSIZE*6] = tmp13 - z1;
    
      ✗
          tmp10 = tmp4 + tmp7;
    
      ✗
          tmp11 = tmp5 + tmp6;
    
      ✗
          tmp12 = tmp5 - tmp6;
    
      ✗
          tmp13 = tmp4 - tmp7;
    
      ✗
          dataptr[DCTSIZE*1] = tmp10 + tmp11;
    
      ✗
          dataptr[DCTSIZE*5] = tmp10 - tmp11;
    
      ✗
          z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
    
      ✗
          dataptr[DCTSIZE*3] = tmp13 + z1;
    
      ✗
          dataptr[DCTSIZE*7] = tmp13 - z1;
    
      ✗
          dataptr++;                        /* advance pointer to next column */
    
        }
    
      ✗
      }
    
      #undef GLOBAL
    
      #undef CONST_BITS
    
      #undef DESCALE
    
      #undef FIX_0_541196100
    
      #undef FIX_1_306562965

Line	Branch	Exec	Source
1			/*
2			* This file is part of the Independent JPEG Group's software.
3			*
4			* The authors make NO WARRANTY or representation, either express or implied,
5			* with respect to this software, its quality, accuracy, merchantability, or
6			* fitness for a particular purpose. This software is provided "AS IS", and
7			* you, its user, assume the entire risk as to its quality and accuracy.
8			*
9			* This software is copyright (C) 1994-1996, Thomas G. Lane.
10			* All Rights Reserved except as specified below.
11			*
12			* Permission is hereby granted to use, copy, modify, and distribute this
13			* software (or portions thereof) for any purpose, without fee, subject to
14			* these conditions:
15			* (1) If any part of the source code for this software is distributed, then
16			* this README file must be included, with this copyright and no-warranty
17			* notice unaltered; and any additions, deletions, or changes to the original
18			* files must be clearly indicated in accompanying documentation.
19			* (2) If only executable code is distributed, then the accompanying
20			* documentation must state that "this software is based in part on the work
21			* of the Independent JPEG Group".
22			* (3) Permission for use of this software is granted only if the user accepts
23			* full responsibility for any undesirable consequences; the authors accept
24			* NO LIABILITY for damages of any kind.
25			*
26			* These conditions apply to any software derived from or based on the IJG
27			* code, not just to the unmodified library. If you use our work, you ought
28			* to acknowledge us.
29			*
30			* Permission is NOT granted for the use of any IJG author's name or company
31			* name in advertising or publicity relating to this software or products
32			* derived from it. This software may be referred to only as "the Independent
33			* JPEG Group's software".
34			*
35			* We specifically permit and encourage the use of this software as the basis
36			* of commercial products, provided that all warranty or liability claims are
37			* assumed by the product vendor.
38			*
39			* This file contains a fast, not so accurate integer implementation of the
40			* forward DCT (Discrete Cosine Transform).
41			*
42			* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
43			* on each column. Direct algorithms are also available, but they are
44			* much more complex and seem not to be any faster when reduced to code.
45			*
46			* This implementation is based on Arai, Agui, and Nakajima's algorithm for
47			* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
48			* Japanese, but the algorithm is described in the Pennebaker & Mitchell
49			* JPEG textbook (see REFERENCES section in file README). The following code
50			* is based directly on figure 4-8 in P&M.
51			* While an 8-point DCT cannot be done in less than 11 multiplies, it is
52			* possible to arrange the computation so that many of the multiplies are
53			* simple scalings of the final outputs. These multiplies can then be
54			* folded into the multiplications or divisions by the JPEG quantization
55			* table entries. The AA&N method leaves only 5 multiplies and 29 adds
56			* to be done in the DCT itself.
57			* The primary disadvantage of this method is that with fixed-point math,
58			* accuracy is lost due to imprecise representation of the scaled
59			* quantization values. The smaller the quantization table entry, the less
60			* precise the scaled value, so this implementation does worse with high-
61			* quality-setting files than with low-quality ones.
62			*/
63
64			/**
65			* @file
66			* Independent JPEG Group's fast AAN dct.
67			*/
68
69			#include <stdint.h>
70			#include "libavutil/attributes.h"
71			#include "fdctdsp.h"
72
73			#define DCTSIZE 8
74			#define GLOBAL(x) x
75			#define RIGHT_SHIFT(x, n) ((x) >> (n))
76
77			/*
78			* This module is specialized to the case DCTSIZE = 8.
79			*/
80
81			#if DCTSIZE != 8
82			Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
83			#endif
84
85
86			/* Scaling decisions are generally the same as in the LL&M algorithm;
87			* see jfdctint.c for more details. However, we choose to descale
88			* (right shift) multiplication products as soon as they are formed,
89			* rather than carrying additional fractional bits into subsequent additions.
90			* This compromises accuracy slightly, but it lets us save a few shifts.
91			* More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
92			* everywhere except in the multiplications proper; this saves a good deal
93			* of work on 16-bit-int machines.
94			*
95			* Again to save a few shifts, the intermediate results between pass 1 and
96			* pass 2 are not upscaled, but are represented only to integral precision.
97			*
98			* A final compromise is to represent the multiplicative constants to only
99			* 8 fractional bits, rather than 13. This saves some shifting work on some
100			* machines, and may also reduce the cost of multiplication (since there
101			* are fewer one-bits in the constants).
102			*/
103
104			#define CONST_BITS 8
105
106
107			/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
108			* causing a lot of useless floating-point operations at run time.
109			* To get around this we use the following pre-calculated constants.
110			* If you change CONST_BITS you may want to add appropriate values.
111			* (With a reasonable C compiler, you can just rely on the FIX() macro...)
112			*/
113
114			#if CONST_BITS == 8
115			#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
116			#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
117			#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
118			#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
119			#else
120			#define FIX_0_382683433 FIX(0.382683433)
121			#define FIX_0_541196100 FIX(0.541196100)
122			#define FIX_0_707106781 FIX(0.707106781)
123			#define FIX_1_306562965 FIX(1.306562965)
124			#endif
125
126
127			/* We can gain a little more speed, with a further compromise in accuracy,
128			* by omitting the addition in a descaling shift. This yields an incorrectly
129			* rounded result half the time...
130			*/
131
132			#ifndef USE_ACCURATE_ROUNDING
133			#undef DESCALE
134			#define DESCALE(x,n) RIGHT_SHIFT(x, n)
135			#endif
136
137
138			/* Multiply a int16_t variable by an int32_t constant, and immediately
139			* descale to yield a int16_t result.
140			*/
141
142			#define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS))
143
144		103200826	static av_always_inline void row_fdct(int16_t * data){
145			int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
146			int tmp10, tmp11, tmp12, tmp13;
147			int z1, z2, z3, z4, z5, z11, z13;
148			int16_t *dataptr;
149			int ctr;
150
151			/* Pass 1: process rows. */
152
153		103200826	dataptr = data;
154	2/2 ✓ Branch 0 taken 825606608 times. ✓ Branch 1 taken 103200826 times.	928807434	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
155		825606608	tmp0 = dataptr[0] + dataptr[7];
156		825606608	tmp7 = dataptr[0] - dataptr[7];
157		825606608	tmp1 = dataptr[1] + dataptr[6];
158		825606608	tmp6 = dataptr[1] - dataptr[6];
159		825606608	tmp2 = dataptr[2] + dataptr[5];
160		825606608	tmp5 = dataptr[2] - dataptr[5];
161		825606608	tmp3 = dataptr[3] + dataptr[4];
162		825606608	tmp4 = dataptr[3] - dataptr[4];
163
164			/* Even part */
165
166		825606608	tmp10 = tmp0 + tmp3; /* phase 2 */
167		825606608	tmp13 = tmp0 - tmp3;
168		825606608	tmp11 = tmp1 + tmp2;
169		825606608	tmp12 = tmp1 - tmp2;
170
171		825606608	dataptr[0] = tmp10 + tmp11; /* phase 3 */
172		825606608	dataptr[4] = tmp10 - tmp11;
173
174		825606608	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
175		825606608	dataptr[2] = tmp13 + z1; /* phase 5 */
176		825606608	dataptr[6] = tmp13 - z1;
177
178			/* Odd part */
179
180		825606608	tmp10 = tmp4 + tmp5; /* phase 2 */
181		825606608	tmp11 = tmp5 + tmp6;
182		825606608	tmp12 = tmp6 + tmp7;
183
184			/* The rotator is modified from fig 4-8 to avoid extra negations. */
185		825606608	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
186		825606608	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
187		825606608	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
188		825606608	z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
189
190		825606608	z11 = tmp7 + z3; /* phase 5 */
191		825606608	z13 = tmp7 - z3;
192
193		825606608	dataptr[5] = z13 + z2; /* phase 6 */
194		825606608	dataptr[3] = z13 - z2;
195		825606608	dataptr[1] = z11 + z4;
196		825606608	dataptr[7] = z11 - z4;
197
198		825606608	dataptr += DCTSIZE; /* advance pointer to next row */
199			}
200		103200826	}
201
202			/*
203			* Perform the forward DCT on one block of samples.
204			*/
205
206			GLOBAL(void)
207		103200826	ff_fdct_ifast (int16_t * data)
208			{
209			int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
210			int tmp10, tmp11, tmp12, tmp13;
211			int z1, z2, z3, z4, z5, z11, z13;
212			int16_t *dataptr;
213			int ctr;
214
215		103200826	row_fdct(data);
216
217			/* Pass 2: process columns. */
218
219		103200826	dataptr = data;
220	2/2 ✓ Branch 0 taken 825606608 times. ✓ Branch 1 taken 103200826 times.	928807434	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
221		825606608	tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE7];
222		825606608	tmp7 = dataptr[DCTSIZE0] - dataptr[DCTSIZE7];
223		825606608	tmp1 = dataptr[DCTSIZE1] + dataptr[DCTSIZE6];
224		825606608	tmp6 = dataptr[DCTSIZE1] - dataptr[DCTSIZE6];
225		825606608	tmp2 = dataptr[DCTSIZE2] + dataptr[DCTSIZE5];
226		825606608	tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE5];
227		825606608	tmp3 = dataptr[DCTSIZE3] + dataptr[DCTSIZE4];
228		825606608	tmp4 = dataptr[DCTSIZE3] - dataptr[DCTSIZE4];
229
230			/* Even part */
231
232		825606608	tmp10 = tmp0 + tmp3; /* phase 2 */
233		825606608	tmp13 = tmp0 - tmp3;
234		825606608	tmp11 = tmp1 + tmp2;
235		825606608	tmp12 = tmp1 - tmp2;
236
237		825606608	dataptr[DCTSIZE0] = tmp10 + tmp11; / phase 3 */
238		825606608	dataptr[DCTSIZE*4] = tmp10 - tmp11;
239
240		825606608	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
241		825606608	dataptr[DCTSIZE2] = tmp13 + z1; / phase 5 */
242		825606608	dataptr[DCTSIZE*6] = tmp13 - z1;
243
244			/* Odd part */
245
246		825606608	tmp10 = tmp4 + tmp5; /* phase 2 */
247		825606608	tmp11 = tmp5 + tmp6;
248		825606608	tmp12 = tmp6 + tmp7;
249
250			/* The rotator is modified from fig 4-8 to avoid extra negations. */
251		825606608	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
252		825606608	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
253		825606608	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
254		825606608	z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
255
256		825606608	z11 = tmp7 + z3; /* phase 5 */
257		825606608	z13 = tmp7 - z3;
258
259		825606608	dataptr[DCTSIZE5] = z13 + z2; / phase 6 */
260		825606608	dataptr[DCTSIZE*3] = z13 - z2;
261		825606608	dataptr[DCTSIZE*1] = z11 + z4;
262		825606608	dataptr[DCTSIZE*7] = z11 - z4;
263
264		825606608	dataptr++; /* advance pointer to next column */
265			}
266		103200826	}
267
268			/*
269			* Perform the forward 2-4-8 DCT on one block of samples.
270			*/
271
272			GLOBAL(void)
273		✗	ff_fdct_ifast248 (int16_t * data)
274			{
275			int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
276			int tmp10, tmp11, tmp12, tmp13;
277			int z1;
278			int16_t *dataptr;
279			int ctr;
280
281		✗	row_fdct(data);
282
283			/* Pass 2: process columns. */
284
285		✗	dataptr = data;
286		✗	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
287		✗	tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE1];
288		✗	tmp1 = dataptr[DCTSIZE2] + dataptr[DCTSIZE3];
289		✗	tmp2 = dataptr[DCTSIZE4] + dataptr[DCTSIZE5];
290		✗	tmp3 = dataptr[DCTSIZE6] + dataptr[DCTSIZE7];
291		✗	tmp4 = dataptr[DCTSIZE0] - dataptr[DCTSIZE1];
292		✗	tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE3];
293		✗	tmp6 = dataptr[DCTSIZE4] - dataptr[DCTSIZE5];
294		✗	tmp7 = dataptr[DCTSIZE6] - dataptr[DCTSIZE7];
295
296			/* Even part */
297
298		✗	tmp10 = tmp0 + tmp3;
299		✗	tmp11 = tmp1 + tmp2;
300		✗	tmp12 = tmp1 - tmp2;
301		✗	tmp13 = tmp0 - tmp3;
302
303		✗	dataptr[DCTSIZE*0] = tmp10 + tmp11;
304		✗	dataptr[DCTSIZE*4] = tmp10 - tmp11;
305
306		✗	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
307		✗	dataptr[DCTSIZE*2] = tmp13 + z1;
308		✗	dataptr[DCTSIZE*6] = tmp13 - z1;
309
310		✗	tmp10 = tmp4 + tmp7;
311		✗	tmp11 = tmp5 + tmp6;
312		✗	tmp12 = tmp5 - tmp6;
313		✗	tmp13 = tmp4 - tmp7;
314
315		✗	dataptr[DCTSIZE*1] = tmp10 + tmp11;
316		✗	dataptr[DCTSIZE*5] = tmp10 - tmp11;
317
318		✗	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
319		✗	dataptr[DCTSIZE*3] = tmp13 + z1;
320		✗	dataptr[DCTSIZE*7] = tmp13 - z1;
321
322		✗	dataptr++; /* advance pointer to next column */
323			}
324		✗	}
325
326
327			#undef GLOBAL
328			#undef CONST_BITS
329			#undef DESCALE
330			#undef FIX_0_541196100
331			#undef FIX_1_306562965
332

Function (Line)	Call count	Block coverage
ff_fdct_ifast (line 207)	called 103200826 times, returned 103200826 times	100.0%
ff_fdct_ifast248 (line 273)	not called	0.0%
row_fdct (line 144)	called 103200826 times, returned 103200826 times	100.0%