Directory:  ../../../ffmpeg/ 

File:  src/libavcodec/jfdctfst.c 
Date:  20220121 12:56:39 
Exec  Total  Coverage  

Lines:  71  102  69.6% 
Branches:  4  6  66.7% 
Line  Branch  Exec  Source 

1  /*  
2  * This file is part of the Independent JPEG Group's software.  
3  *  
4  * The authors make NO WARRANTY or representation, either express or implied,  
5  * with respect to this software, its quality, accuracy, merchantability, or  
6  * fitness for a particular purpose. This software is provided "AS IS", and  
7  * you, its user, assume the entire risk as to its quality and accuracy.  
8  *  
9  * This software is copyright (C) 19941996, Thomas G. Lane.  
10  * All Rights Reserved except as specified below.  
11  *  
12  * Permission is hereby granted to use, copy, modify, and distribute this  
13  * software (or portions thereof) for any purpose, without fee, subject to  
14  * these conditions:  
15  * (1) If any part of the source code for this software is distributed, then  
16  * this README file must be included, with this copyright and nowarranty  
17  * notice unaltered; and any additions, deletions, or changes to the original  
18  * files must be clearly indicated in accompanying documentation.  
19  * (2) If only executable code is distributed, then the accompanying  
20  * documentation must state that "this software is based in part on the work  
21  * of the Independent JPEG Group".  
22  * (3) Permission for use of this software is granted only if the user accepts  
23  * full responsibility for any undesirable consequences; the authors accept  
24  * NO LIABILITY for damages of any kind.  
25  *  
26  * These conditions apply to any software derived from or based on the IJG  
27  * code, not just to the unmodified library. If you use our work, you ought  
28  * to acknowledge us.  
29  *  
30  * Permission is NOT granted for the use of any IJG author's name or company  
31  * name in advertising or publicity relating to this software or products  
32  * derived from it. This software may be referred to only as "the Independent  
33  * JPEG Group's software".  
34  *  
35  * We specifically permit and encourage the use of this software as the basis  
36  * of commercial products, provided that all warranty or liability claims are  
37  * assumed by the product vendor.  
38  *  
39  * This file contains a fast, not so accurate integer implementation of the  
40  * forward DCT (Discrete Cosine Transform).  
41  *  
42  * A 2D DCT can be done by 1D DCT on each row followed by 1D DCT  
43  * on each column. Direct algorithms are also available, but they are  
44  * much more complex and seem not to be any faster when reduced to code.  
45  *  
46  * This implementation is based on Arai, Agui, and Nakajima's algorithm for  
47  * scaled DCT. Their original paper (Trans. IEICE E71(11):1095) is in  
48  * Japanese, but the algorithm is described in the Pennebaker & Mitchell  
49  * JPEG textbook (see REFERENCES section in file README). The following code  
50  * is based directly on figure 48 in P&M.  
51  * While an 8point DCT cannot be done in less than 11 multiplies, it is  
52  * possible to arrange the computation so that many of the multiplies are  
53  * simple scalings of the final outputs. These multiplies can then be  
54  * folded into the multiplications or divisions by the JPEG quantization  
55  * table entries. The AA&N method leaves only 5 multiplies and 29 adds  
56  * to be done in the DCT itself.  
57  * The primary disadvantage of this method is that with fixedpoint math,  
58  * accuracy is lost due to imprecise representation of the scaled  
59  * quantization values. The smaller the quantization table entry, the less  
60  * precise the scaled value, so this implementation does worse with high  
61  * qualitysetting files than with lowquality ones.  
62  */  
63  
64  /**  
65  * @file  
66  * Independent JPEG Group's fast AAN dct.  
67  */  
68  
69  #include <stdlib.h>  
70  #include <stdio.h>  
71  #include "libavutil/common.h"  
72  #include "dct.h"  
73  
74  #define DCTSIZE 8  
75  #define GLOBAL(x) x  
76  #define RIGHT_SHIFT(x, n) ((x) >> (n))  
77  
78  /*  
79  * This module is specialized to the case DCTSIZE = 8.  
80  */  
81  
82  #if DCTSIZE != 8  
83  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */  
84  #endif  
85  
86  
87  /* Scaling decisions are generally the same as in the LL&M algorithm;  
88  * see jfdctint.c for more details. However, we choose to descale  
89  * (right shift) multiplication products as soon as they are formed,  
90  * rather than carrying additional fractional bits into subsequent additions.  
91  * This compromises accuracy slightly, but it lets us save a few shifts.  
92  * More importantly, 16bit arithmetic is then adequate (for 8bit samples)  
93  * everywhere except in the multiplications proper; this saves a good deal  
94  * of work on 16bitint machines.  
95  *  
96  * Again to save a few shifts, the intermediate results between pass 1 and  
97  * pass 2 are not upscaled, but are represented only to integral precision.  
98  *  
99  * A final compromise is to represent the multiplicative constants to only  
100  * 8 fractional bits, rather than 13. This saves some shifting work on some  
101  * machines, and may also reduce the cost of multiplication (since there  
102  * are fewer onebits in the constants).  
103  */  
104  
105  #define CONST_BITS 8  
106  
107  
108  /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus  
109  * causing a lot of useless floatingpoint operations at run time.  
110  * To get around this we use the following precalculated constants.  
111  * If you change CONST_BITS you may want to add appropriate values.  
112  * (With a reasonable C compiler, you can just rely on the FIX() macro...)  
113  */  
114  
115  #if CONST_BITS == 8  
116  #define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */  
117  #define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */  
118  #define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */  
119  #define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */  
120  #else  
121  #define FIX_0_382683433 FIX(0.382683433)  
122  #define FIX_0_541196100 FIX(0.541196100)  
123  #define FIX_0_707106781 FIX(0.707106781)  
124  #define FIX_1_306562965 FIX(1.306562965)  
125  #endif  
126  
127  
128  /* We can gain a little more speed, with a further compromise in accuracy,  
129  * by omitting the addition in a descaling shift. This yields an incorrectly  
130  * rounded result half the time...  
131  */  
132  
133  #ifndef USE_ACCURATE_ROUNDING  
134  #undef DESCALE  
135  #define DESCALE(x,n) RIGHT_SHIFT(x, n)  
136  #endif  
137  
138  
139  /* Multiply a int16_t variable by an int32_t constant, and immediately  
140  * descale to yield a int16_t result.  
141  */  
142  
143  #define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS))  
144  
145  99957480  static av_always_inline void row_fdct(int16_t * data){  
146  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;  
147  int tmp10, tmp11, tmp12, tmp13;  
148  int z1, z2, z3, z4, z5, z11, z13;  
149  int16_t *dataptr;  
150  int ctr;  
151  
152  /* Pass 1: process rows. */  
153  
154  99957480  dataptr = data;  
155 
2/2✓ Branch 0 taken 799659840 times.
✓ Branch 1 taken 99957480 times.

899617320  for (ctr = DCTSIZE1; ctr >= 0; ctr) { 
156  799659840  tmp0 = dataptr[0] + dataptr[7];  
157  799659840  tmp7 = dataptr[0]  dataptr[7];  
158  799659840  tmp1 = dataptr[1] + dataptr[6];  
159  799659840  tmp6 = dataptr[1]  dataptr[6];  
160  799659840  tmp2 = dataptr[2] + dataptr[5];  
161  799659840  tmp5 = dataptr[2]  dataptr[5];  
162  799659840  tmp3 = dataptr[3] + dataptr[4];  
163  799659840  tmp4 = dataptr[3]  dataptr[4];  
164  
165  /* Even part */  
166  
167  799659840  tmp10 = tmp0 + tmp3; /* phase 2 */  
168  799659840  tmp13 = tmp0  tmp3;  
169  799659840  tmp11 = tmp1 + tmp2;  
170  799659840  tmp12 = tmp1  tmp2;  
171  
172  799659840  dataptr[0] = tmp10 + tmp11; /* phase 3 */  
173  799659840  dataptr[4] = tmp10  tmp11;  
174  
175  799659840  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */  
176  799659840  dataptr[2] = tmp13 + z1; /* phase 5 */  
177  799659840  dataptr[6] = tmp13  z1;  
178  
179  /* Odd part */  
180  
181  799659840  tmp10 = tmp4 + tmp5; /* phase 2 */  
182  799659840  tmp11 = tmp5 + tmp6;  
183  799659840  tmp12 = tmp6 + tmp7;  
184  
185  /* The rotator is modified from fig 48 to avoid extra negations. */  
186  799659840  z5 = MULTIPLY(tmp10  tmp12, FIX_0_382683433); /* c6 */  
187  799659840  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2c6 */  
188  799659840  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */  
189  799659840  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */  
190  
191  799659840  z11 = tmp7 + z3; /* phase 5 */  
192  799659840  z13 = tmp7  z3;  
193  
194  799659840  dataptr[5] = z13 + z2; /* phase 6 */  
195  799659840  dataptr[3] = z13  z2;  
196  799659840  dataptr[1] = z11 + z4;  
197  799659840  dataptr[7] = z11  z4;  
198  
199  799659840  dataptr += DCTSIZE; /* advance pointer to next row */  
200  }  
201  99957480  }  
202  
203  /*  
204  * Perform the forward DCT on one block of samples.  
205  */  
206  
207  GLOBAL(void)  
208  99957480  ff_fdct_ifast (int16_t * data)  
209  {  
210  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;  
211  int tmp10, tmp11, tmp12, tmp13;  
212  int z1, z2, z3, z4, z5, z11, z13;  
213  int16_t *dataptr;  
214  int ctr;  
215  
216  99957480  row_fdct(data);  
217  
218  /* Pass 2: process columns. */  
219  
220  99957480  dataptr = data;  
221 
2/2✓ Branch 0 taken 799659840 times.
✓ Branch 1 taken 99957480 times.

899617320  for (ctr = DCTSIZE1; ctr >= 0; ctr) { 
222  799659840  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];  
223  799659840  tmp7 = dataptr[DCTSIZE*0]  dataptr[DCTSIZE*7];  
224  799659840  tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];  
225  799659840  tmp6 = dataptr[DCTSIZE*1]  dataptr[DCTSIZE*6];  
226  799659840  tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];  
227  799659840  tmp5 = dataptr[DCTSIZE*2]  dataptr[DCTSIZE*5];  
228  799659840  tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];  
229  799659840  tmp4 = dataptr[DCTSIZE*3]  dataptr[DCTSIZE*4];  
230  
231  /* Even part */  
232  
233  799659840  tmp10 = tmp0 + tmp3; /* phase 2 */  
234  799659840  tmp13 = tmp0  tmp3;  
235  799659840  tmp11 = tmp1 + tmp2;  
236  799659840  tmp12 = tmp1  tmp2;  
237  
238  799659840  dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */  
239  799659840  dataptr[DCTSIZE*4] = tmp10  tmp11;  
240  
241  799659840  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */  
242  799659840  dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */  
243  799659840  dataptr[DCTSIZE*6] = tmp13  z1;  
244  
245  /* Odd part */  
246  
247  799659840  tmp10 = tmp4 + tmp5; /* phase 2 */  
248  799659840  tmp11 = tmp5 + tmp6;  
249  799659840  tmp12 = tmp6 + tmp7;  
250  
251  /* The rotator is modified from fig 48 to avoid extra negations. */  
252  799659840  z5 = MULTIPLY(tmp10  tmp12, FIX_0_382683433); /* c6 */  
253  799659840  z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2c6 */  
254  799659840  z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */  
255  799659840  z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */  
256  
257  799659840  z11 = tmp7 + z3; /* phase 5 */  
258  799659840  z13 = tmp7  z3;  
259  
260  799659840  dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */  
261  799659840  dataptr[DCTSIZE*3] = z13  z2;  
262  799659840  dataptr[DCTSIZE*1] = z11 + z4;  
263  799659840  dataptr[DCTSIZE*7] = z11  z4;  
264  
265  799659840  dataptr++; /* advance pointer to next column */  
266  }  
267  99957480  }  
268  
269  /*  
270  * Perform the forward 248 DCT on one block of samples.  
271  */  
272  
273  GLOBAL(void)  
274  ✗  ff_fdct_ifast248 (int16_t * data)  
275  {  
276  int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;  
277  int tmp10, tmp11, tmp12, tmp13;  
278  int z1;  
279  int16_t *dataptr;  
280  int ctr;  
281  
282  ✗  row_fdct(data);  
283  
284  /* Pass 2: process columns. */  
285  
286  ✗  dataptr = data;  
287  ✗  for (ctr = DCTSIZE1; ctr >= 0; ctr) {  
288  ✗  tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];  
289  ✗  tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];  
290  ✗  tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];  
291  ✗  tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];  
292  ✗  tmp4 = dataptr[DCTSIZE*0]  dataptr[DCTSIZE*1];  
293  ✗  tmp5 = dataptr[DCTSIZE*2]  dataptr[DCTSIZE*3];  
294  ✗  tmp6 = dataptr[DCTSIZE*4]  dataptr[DCTSIZE*5];  
295  ✗  tmp7 = dataptr[DCTSIZE*6]  dataptr[DCTSIZE*7];  
296  
297  /* Even part */  
298  
299  ✗  tmp10 = tmp0 + tmp3;  
300  ✗  tmp11 = tmp1 + tmp2;  
301  ✗  tmp12 = tmp1  tmp2;  
302  ✗  tmp13 = tmp0  tmp3;  
303  
304  ✗  dataptr[DCTSIZE*0] = tmp10 + tmp11;  
305  ✗  dataptr[DCTSIZE*4] = tmp10  tmp11;  
306  
307  ✗  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);  
308  ✗  dataptr[DCTSIZE*2] = tmp13 + z1;  
309  ✗  dataptr[DCTSIZE*6] = tmp13  z1;  
310  
311  ✗  tmp10 = tmp4 + tmp7;  
312  ✗  tmp11 = tmp5 + tmp6;  
313  ✗  tmp12 = tmp5  tmp6;  
314  ✗  tmp13 = tmp4  tmp7;  
315  
316  ✗  dataptr[DCTSIZE*1] = tmp10 + tmp11;  
317  ✗  dataptr[DCTSIZE*5] = tmp10  tmp11;  
318  
319  ✗  z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);  
320  ✗  dataptr[DCTSIZE*3] = tmp13 + z1;  
321  ✗  dataptr[DCTSIZE*7] = tmp13  z1;  
322  
323  ✗  dataptr++; /* advance pointer to next column */  
324  }  
325  }  
326  
327  
328  #undef GLOBAL  
329  #undef CONST_BITS  
330  #undef DESCALE  
331  #undef FIX_0_541196100  
332  #undef FIX_1_306562965  
333 