LCOV - code coverage report
Current view: top level - libavcodec - jrevdct.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 580 594 97.6 %
Date: 2017-12-18 13:19:42 Functions: 4 6 66.7 %

          Line data    Source code
       1             : /*
       2             :  * This file is part of the Independent JPEG Group's software.
       3             :  *
       4             :  * The authors make NO WARRANTY or representation, either express or implied,
       5             :  * with respect to this software, its quality, accuracy, merchantability, or
       6             :  * fitness for a particular purpose.  This software is provided "AS IS", and
       7             :  * you, its user, assume the entire risk as to its quality and accuracy.
       8             :  *
       9             :  * This software is copyright (C) 1991, 1992, Thomas G. Lane.
      10             :  * All Rights Reserved except as specified below.
      11             :  *
      12             :  * Permission is hereby granted to use, copy, modify, and distribute this
      13             :  * software (or portions thereof) for any purpose, without fee, subject to
      14             :  * these conditions:
      15             :  * (1) If any part of the source code for this software is distributed, then
      16             :  * this README file must be included, with this copyright and no-warranty
      17             :  * notice unaltered; and any additions, deletions, or changes to the original
      18             :  * files must be clearly indicated in accompanying documentation.
      19             :  * (2) If only executable code is distributed, then the accompanying
      20             :  * documentation must state that "this software is based in part on the work
      21             :  * of the Independent JPEG Group".
      22             :  * (3) Permission for use of this software is granted only if the user accepts
      23             :  * full responsibility for any undesirable consequences; the authors accept
      24             :  * NO LIABILITY for damages of any kind.
      25             :  *
      26             :  * These conditions apply to any software derived from or based on the IJG
      27             :  * code, not just to the unmodified library.  If you use our work, you ought
      28             :  * to acknowledge us.
      29             :  *
      30             :  * Permission is NOT granted for the use of any IJG author's name or company
      31             :  * name in advertising or publicity relating to this software or products
      32             :  * derived from it.  This software may be referred to only as "the Independent
      33             :  * JPEG Group's software".
      34             :  *
      35             :  * We specifically permit and encourage the use of this software as the basis
      36             :  * of commercial products, provided that all warranty or liability claims are
      37             :  * assumed by the product vendor.
      38             :  *
      39             :  * This file contains the basic inverse-DCT transformation subroutine.
      40             :  *
      41             :  * This implementation is based on an algorithm described in
      42             :  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
      43             :  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
      44             :  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
      45             :  * The primary algorithm described there uses 11 multiplies and 29 adds.
      46             :  * We use their alternate method with 12 multiplies and 32 adds.
      47             :  * The advantage of this method is that no data path contains more than one
      48             :  * multiplication; this allows a very simple and accurate implementation in
      49             :  * scaled fixed-point arithmetic, with a minimal number of shifts.
      50             :  *
      51             :  * I've made lots of modifications to attempt to take advantage of the
      52             :  * sparse nature of the DCT matrices we're getting.  Although the logic
      53             :  * is cumbersome, it's straightforward and the resulting code is much
      54             :  * faster.
      55             :  *
      56             :  * A better way to do this would be to pass in the DCT block as a sparse
      57             :  * matrix, perhaps with the difference cases encoded.
      58             :  */
      59             : 
      60             : /**
      61             :  * @file
      62             :  * Independent JPEG Group's LLM idct.
      63             :  */
      64             : 
      65             : #include "libavutil/common.h"
      66             : 
      67             : #include "dct.h"
      68             : #include "idctdsp.h"
      69             : 
      70             : #define EIGHT_BIT_SAMPLES
      71             : 
      72             : #define DCTSIZE 8
      73             : #define DCTSIZE2 64
      74             : 
      75             : #define GLOBAL
      76             : 
      77             : #define RIGHT_SHIFT(x, n) ((x) >> (n))
      78             : 
      79             : typedef int16_t DCTBLOCK[DCTSIZE2];
      80             : 
      81             : #define CONST_BITS 13
      82             : 
      83             : /*
      84             :  * This routine is specialized to the case DCTSIZE = 8.
      85             :  */
      86             : 
      87             : #if DCTSIZE != 8
      88             :   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
      89             : #endif
      90             : 
      91             : 
      92             : /*
      93             :  * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
      94             :  * on each column.  Direct algorithms are also available, but they are
      95             :  * much more complex and seem not to be any faster when reduced to code.
      96             :  *
      97             :  * The poop on this scaling stuff is as follows:
      98             :  *
      99             :  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
     100             :  * larger than the true IDCT outputs.  The final outputs are therefore
     101             :  * a factor of N larger than desired; since N=8 this can be cured by
     102             :  * a simple right shift at the end of the algorithm.  The advantage of
     103             :  * this arrangement is that we save two multiplications per 1-D IDCT,
     104             :  * because the y0 and y4 inputs need not be divided by sqrt(N).
     105             :  *
     106             :  * We have to do addition and subtraction of the integer inputs, which
     107             :  * is no problem, and multiplication by fractional constants, which is
     108             :  * a problem to do in integer arithmetic.  We multiply all the constants
     109             :  * by CONST_SCALE and convert them to integer constants (thus retaining
     110             :  * CONST_BITS bits of precision in the constants).  After doing a
     111             :  * multiplication we have to divide the product by CONST_SCALE, with proper
     112             :  * rounding, to produce the correct output.  This division can be done
     113             :  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
     114             :  * as long as possible so that partial sums can be added together with
     115             :  * full fractional precision.
     116             :  *
     117             :  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
     118             :  * they are represented to better-than-integral precision.  These outputs
     119             :  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
     120             :  * with the recommended scaling.  (To scale up 12-bit sample data further, an
     121             :  * intermediate int32 array would be needed.)
     122             :  *
     123             :  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
     124             :  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
     125             :  * shows that the values given below are the most effective.
     126             :  */
     127             : 
     128             : #ifdef EIGHT_BIT_SAMPLES
     129             : #define PASS1_BITS  2
     130             : #else
     131             : #define PASS1_BITS  1   /* lose a little precision to avoid overflow */
     132             : #endif
     133             : 
     134             : #define ONE         ((int32_t) 1)
     135             : 
     136             : #define CONST_SCALE (ONE << CONST_BITS)
     137             : 
     138             : /* Convert a positive real constant to an integer scaled by CONST_SCALE.
     139             :  * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
     140             :  * you will pay a significant penalty in run time.  In that case, figure
     141             :  * the correct integer constant values and insert them by hand.
     142             :  */
     143             : 
     144             : /* Actually FIX is no longer used, we precomputed them all */
     145             : #define FIX(x)  ((int32_t) ((x) * CONST_SCALE + 0.5))
     146             : 
     147             : /* Descale and correctly round an int32_t value that's scaled by N bits.
     148             :  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
     149             :  * the fudge factor is correct for either sign of X.
     150             :  */
     151             : 
     152             : #define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
     153             : 
     154             : /* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
     155             :  * For 8-bit samples with the recommended scaling, all the variable
     156             :  * and constant values involved are no more than 16 bits wide, so a
     157             :  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
     158             :  * this provides a useful speedup on many machines.
     159             :  * There is no way to specify a 16x16->32 multiply in portable C, but
     160             :  * some C compilers will do the right thing if you provide the correct
     161             :  * combination of casts.
     162             :  * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
     163             :  */
     164             : 
     165             : #ifdef EIGHT_BIT_SAMPLES
     166             : #ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
     167             : #define MULTIPLY(var,const)  (((int16_t) (var)) * ((int16_t) (const)))
     168             : #endif
     169             : #ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
     170             : #define MULTIPLY(var,const)  (((int16_t) (var)) * ((int32_t) (const)))
     171             : #endif
     172             : #endif
     173             : 
     174             : #ifndef MULTIPLY                /* default definition */
     175             : #define MULTIPLY(var,const)  ((var) * (const))
     176             : #endif
     177             : 
     178             : 
     179             : /*
     180             :   Unlike our decoder where we approximate the FIXes, we need to use exact
     181             : ones here or successive P-frames will drift too much with Reference frame coding
     182             : */
     183             : #define FIX_0_211164243 1730
     184             : #define FIX_0_275899380 2260
     185             : #define FIX_0_298631336 2446
     186             : #define FIX_0_390180644 3196
     187             : #define FIX_0_509795579 4176
     188             : #define FIX_0_541196100 4433
     189             : #define FIX_0_601344887 4926
     190             : #define FIX_0_765366865 6270
     191             : #define FIX_0_785694958 6436
     192             : #define FIX_0_899976223 7373
     193             : #define FIX_1_061594337 8697
     194             : #define FIX_1_111140466 9102
     195             : #define FIX_1_175875602 9633
     196             : #define FIX_1_306562965 10703
     197             : #define FIX_1_387039845 11363
     198             : #define FIX_1_451774981 11893
     199             : #define FIX_1_501321110 12299
     200             : #define FIX_1_662939225 13623
     201             : #define FIX_1_847759065 15137
     202             : #define FIX_1_961570560 16069
     203             : #define FIX_2_053119869 16819
     204             : #define FIX_2_172734803 17799
     205             : #define FIX_2_562915447 20995
     206             : #define FIX_3_072711026 25172
     207             : 
     208             : /*
     209             :  * Perform the inverse DCT on one block of coefficients.
     210             :  */
     211             : 
     212      256390 : void ff_j_rev_dct(DCTBLOCK data)
     213             : {
     214             :   int32_t tmp0, tmp1, tmp2, tmp3;
     215             :   int32_t tmp10, tmp11, tmp12, tmp13;
     216             :   int32_t z1, z2, z3, z4, z5;
     217             :   int32_t d0, d1, d2, d3, d4, d5, d6, d7;
     218             :   register int16_t *dataptr;
     219             :   int rowctr;
     220             : 
     221             :   /* Pass 1: process rows. */
     222             :   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
     223             :   /* furthermore, we scale the results by 2**PASS1_BITS. */
     224             : 
     225      256390 :   dataptr = data;
     226             : 
     227     2307510 :   for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
     228             :     /* Due to quantization, we will usually find that many of the input
     229             :      * coefficients are zero, especially the AC terms.  We can exploit this
     230             :      * by short-circuiting the IDCT calculation for any row in which all
     231             :      * the AC terms are zero.  In that case each output is equal to the
     232             :      * DC coefficient (with scale factor as needed).
     233             :      * With typical images and quantization tables, half or more of the
     234             :      * row DCT calculations can be simplified this way.
     235             :      */
     236             : 
     237     2051120 :     register int *idataptr = (int*)dataptr;
     238             : 
     239             :     /* WARNING: we do the same permutation as MMX idct to simplify the
     240             :        video core */
     241     2051120 :     d0 = dataptr[0];
     242     2051120 :     d2 = dataptr[1];
     243     2051120 :     d4 = dataptr[2];
     244     2051120 :     d6 = dataptr[3];
     245     2051120 :     d1 = dataptr[4];
     246     2051120 :     d3 = dataptr[5];
     247     2051120 :     d5 = dataptr[6];
     248     2051120 :     d7 = dataptr[7];
     249             : 
     250     2051120 :     if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
     251             :       /* AC terms all zero */
     252     1167050 :       if (d0) {
     253             :           /* Compute a 32 bit value to assign. */
     254      164773 :           int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
     255      164773 :           register int v = (dcval & 0xffff) | ((dcval * (1 << 16)) & 0xffff0000);
     256             : 
     257      164773 :           idataptr[0] = v;
     258      164773 :           idataptr[1] = v;
     259      164773 :           idataptr[2] = v;
     260      164773 :           idataptr[3] = v;
     261             :       }
     262             : 
     263     1167050 :       dataptr += DCTSIZE;       /* advance pointer to next row */
     264     1167050 :       continue;
     265             :     }
     266             : 
     267             :     /* Even part: reverse the even part of the forward DCT. */
     268             :     /* The rotator is sqrt(2)*c(-6). */
     269             : {
     270      884070 :     if (d6) {
     271      250389 :             if (d2) {
     272             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
     273      189821 :                     z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
     274      189821 :                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
     275      189821 :                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
     276             : 
     277      189821 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     278      189821 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     279             : 
     280      189821 :                     tmp10 = tmp0 + tmp3;
     281      189821 :                     tmp13 = tmp0 - tmp3;
     282      189821 :                     tmp11 = tmp1 + tmp2;
     283      189821 :                     tmp12 = tmp1 - tmp2;
     284             :             } else {
     285             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
     286       60568 :                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
     287       60568 :                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
     288             : 
     289       60568 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     290       60568 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     291             : 
     292       60568 :                     tmp10 = tmp0 + tmp3;
     293       60568 :                     tmp13 = tmp0 - tmp3;
     294       60568 :                     tmp11 = tmp1 + tmp2;
     295       60568 :                     tmp12 = tmp1 - tmp2;
     296             :             }
     297             :     } else {
     298      633681 :             if (d2) {
     299             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
     300      185795 :                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
     301      185795 :                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
     302             : 
     303      185795 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     304      185795 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     305             : 
     306      185795 :                     tmp10 = tmp0 + tmp3;
     307      185795 :                     tmp13 = tmp0 - tmp3;
     308      185795 :                     tmp11 = tmp1 + tmp2;
     309      185795 :                     tmp12 = tmp1 - tmp2;
     310             :             } else {
     311             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
     312      447886 :                     tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
     313      447886 :                     tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
     314             :             }
     315             :       }
     316             : 
     317             :     /* Odd part per figure 8; the matrix is unitary and hence its
     318             :      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
     319             :      */
     320             : 
     321      884070 :     if (d7) {
     322      411656 :         if (d5) {
     323      190767 :             if (d3) {
     324      172993 :                 if (d1) {
     325             :                     /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
     326      167464 :                     z1 = d7 + d1;
     327      167464 :                     z2 = d5 + d3;
     328      167464 :                     z3 = d7 + d3;
     329      167464 :                     z4 = d5 + d1;
     330      167464 :                     z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
     331             : 
     332      167464 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     333      167464 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     334      167464 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     335      167464 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     336      167464 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     337      167464 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     338      167464 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     339      167464 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     340             : 
     341      167464 :                     z3 += z5;
     342      167464 :                     z4 += z5;
     343             : 
     344      167464 :                     tmp0 += z1 + z3;
     345      167464 :                     tmp1 += z2 + z4;
     346      167464 :                     tmp2 += z2 + z3;
     347      167464 :                     tmp3 += z1 + z4;
     348             :                 } else {
     349             :                     /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
     350        5529 :                     z2 = d5 + d3;
     351        5529 :                     z3 = d7 + d3;
     352        5529 :                     z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
     353             : 
     354        5529 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     355        5529 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     356        5529 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     357        5529 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     358        5529 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     359        5529 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     360        5529 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     361             : 
     362        5529 :                     z3 += z5;
     363        5529 :                     z4 += z5;
     364             : 
     365        5529 :                     tmp0 += z1 + z3;
     366        5529 :                     tmp1 += z2 + z4;
     367        5529 :                     tmp2 += z2 + z3;
     368        5529 :                     tmp3 = z1 + z4;
     369             :                 }
     370             :             } else {
     371       17774 :                 if (d1) {
     372             :                     /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
     373        5830 :                     z1 = d7 + d1;
     374        5830 :                     z4 = d5 + d1;
     375        5830 :                     z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
     376             : 
     377        5830 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     378        5830 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     379        5830 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     380        5830 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     381        5830 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     382        5830 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     383        5830 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     384             : 
     385        5830 :                     z3 += z5;
     386        5830 :                     z4 += z5;
     387             : 
     388        5830 :                     tmp0 += z1 + z3;
     389        5830 :                     tmp1 += z2 + z4;
     390        5830 :                     tmp2 = z2 + z3;
     391        5830 :                     tmp3 += z1 + z4;
     392             :                 } else {
     393             :                     /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
     394       11944 :                     tmp0 = MULTIPLY(-d7, FIX_0_601344887);
     395       11944 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     396       11944 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     397       11944 :                     tmp1 = MULTIPLY(-d5, FIX_0_509795579);
     398       11944 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     399       11944 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     400       11944 :                     z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
     401             : 
     402       11944 :                     z3 += z5;
     403       11944 :                     z4 += z5;
     404             : 
     405       11944 :                     tmp0 += z3;
     406       11944 :                     tmp1 += z4;
     407       11944 :                     tmp2 = z2 + z3;
     408       11944 :                     tmp3 = z1 + z4;
     409             :                 }
     410             :             }
     411             :         } else {
     412      220889 :             if (d3) {
     413       14847 :                 if (d1) {
     414             :                     /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
     415        7688 :                     z1 = d7 + d1;
     416        7688 :                     z3 = d7 + d3;
     417        7688 :                     z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
     418             : 
     419        7688 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     420        7688 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     421        7688 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     422        7688 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     423        7688 :                     z2 = MULTIPLY(-d3, FIX_2_562915447);
     424        7688 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     425        7688 :                     z4 = MULTIPLY(-d1, FIX_0_390180644);
     426             : 
     427        7688 :                     z3 += z5;
     428        7688 :                     z4 += z5;
     429             : 
     430        7688 :                     tmp0 += z1 + z3;
     431        7688 :                     tmp1 = z2 + z4;
     432        7688 :                     tmp2 += z2 + z3;
     433        7688 :                     tmp3 += z1 + z4;
     434             :                 } else {
     435             :                     /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
     436        7159 :                     z3 = d7 + d3;
     437             : 
     438        7159 :                     tmp0 = MULTIPLY(-d7, FIX_0_601344887);
     439        7159 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     440        7159 :                     tmp2 = MULTIPLY(d3, FIX_0_509795579);
     441        7159 :                     z2 = MULTIPLY(-d3, FIX_2_562915447);
     442        7159 :                     z5 = MULTIPLY(z3, FIX_1_175875602);
     443        7159 :                     z3 = MULTIPLY(-z3, FIX_0_785694958);
     444             : 
     445        7159 :                     tmp0 += z3;
     446        7159 :                     tmp1 = z2 + z5;
     447        7159 :                     tmp2 += z3;
     448        7159 :                     tmp3 = z1 + z5;
     449             :                 }
     450             :             } else {
     451      206042 :                 if (d1) {
     452             :                     /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
     453        9371 :                     z1 = d7 + d1;
     454        9371 :                     z5 = MULTIPLY(z1, FIX_1_175875602);
     455             : 
     456        9371 :                     z1 = MULTIPLY(z1, FIX_0_275899380);
     457        9371 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     458        9371 :                     tmp0 = MULTIPLY(-d7, FIX_1_662939225);
     459        9371 :                     z4 = MULTIPLY(-d1, FIX_0_390180644);
     460        9371 :                     tmp3 = MULTIPLY(d1, FIX_1_111140466);
     461             : 
     462        9371 :                     tmp0 += z1;
     463        9371 :                     tmp1 = z4 + z5;
     464        9371 :                     tmp2 = z3 + z5;
     465        9371 :                     tmp3 += z1;
     466             :                 } else {
     467             :                     /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
     468      196671 :                     tmp0 = MULTIPLY(-d7, FIX_1_387039845);
     469      196671 :                     tmp1 = MULTIPLY(d7, FIX_1_175875602);
     470      196671 :                     tmp2 = MULTIPLY(-d7, FIX_0_785694958);
     471      196671 :                     tmp3 = MULTIPLY(d7, FIX_0_275899380);
     472             :                 }
     473             :             }
     474             :         }
     475             :     } else {
     476      472414 :         if (d5) {
     477       80019 :             if (d3) {
     478       26288 :                 if (d1) {
     479             :                     /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
     480       15317 :                     z2 = d5 + d3;
     481       15317 :                     z4 = d5 + d1;
     482       15317 :                     z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
     483             : 
     484       15317 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     485       15317 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     486       15317 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     487       15317 :                     z1 = MULTIPLY(-d1, FIX_0_899976223);
     488       15317 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     489       15317 :                     z3 = MULTIPLY(-d3, FIX_1_961570560);
     490       15317 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     491             : 
     492       15317 :                     z3 += z5;
     493       15317 :                     z4 += z5;
     494             : 
     495       15317 :                     tmp0 = z1 + z3;
     496       15317 :                     tmp1 += z2 + z4;
     497       15317 :                     tmp2 += z2 + z3;
     498       15317 :                     tmp3 += z1 + z4;
     499             :                 } else {
     500             :                     /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
     501       10971 :                     z2 = d5 + d3;
     502             : 
     503       10971 :                     z5 = MULTIPLY(z2, FIX_1_175875602);
     504       10971 :                     tmp1 = MULTIPLY(d5, FIX_1_662939225);
     505       10971 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     506       10971 :                     z2 = MULTIPLY(-z2, FIX_1_387039845);
     507       10971 :                     tmp2 = MULTIPLY(d3, FIX_1_111140466);
     508       10971 :                     z3 = MULTIPLY(-d3, FIX_1_961570560);
     509             : 
     510       10971 :                     tmp0 = z3 + z5;
     511       10971 :                     tmp1 += z2;
     512       10971 :                     tmp2 += z2;
     513       10971 :                     tmp3 = z4 + z5;
     514             :                 }
     515             :             } else {
     516       53731 :                 if (d1) {
     517             :                     /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
     518       11905 :                     z4 = d5 + d1;
     519             : 
     520       11905 :                     z5 = MULTIPLY(z4, FIX_1_175875602);
     521       11905 :                     z1 = MULTIPLY(-d1, FIX_0_899976223);
     522       11905 :                     tmp3 = MULTIPLY(d1, FIX_0_601344887);
     523       11905 :                     tmp1 = MULTIPLY(-d5, FIX_0_509795579);
     524       11905 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     525       11905 :                     z4 = MULTIPLY(z4, FIX_0_785694958);
     526             : 
     527       11905 :                     tmp0 = z1 + z5;
     528       11905 :                     tmp1 += z4;
     529       11905 :                     tmp2 = z2 + z5;
     530       11905 :                     tmp3 += z4;
     531             :                 } else {
     532             :                     /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
     533       41826 :                     tmp0 = MULTIPLY(d5, FIX_1_175875602);
     534       41826 :                     tmp1 = MULTIPLY(d5, FIX_0_275899380);
     535       41826 :                     tmp2 = MULTIPLY(-d5, FIX_1_387039845);
     536       41826 :                     tmp3 = MULTIPLY(d5, FIX_0_785694958);
     537             :                 }
     538             :             }
     539             :         } else {
     540      392395 :             if (d3) {
     541      120448 :                 if (d1) {
     542             :                     /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
     543       54347 :                     z5 = d1 + d3;
     544       54347 :                     tmp3 = MULTIPLY(d1, FIX_0_211164243);
     545       54347 :                     tmp2 = MULTIPLY(-d3, FIX_1_451774981);
     546       54347 :                     z1 = MULTIPLY(d1, FIX_1_061594337);
     547       54347 :                     z2 = MULTIPLY(-d3, FIX_2_172734803);
     548       54347 :                     z4 = MULTIPLY(z5, FIX_0_785694958);
     549       54347 :                     z5 = MULTIPLY(z5, FIX_1_175875602);
     550             : 
     551       54347 :                     tmp0 = z1 - z4;
     552       54347 :                     tmp1 = z2 + z4;
     553       54347 :                     tmp2 += z5;
     554       54347 :                     tmp3 += z5;
     555             :                 } else {
     556             :                     /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
     557       66101 :                     tmp0 = MULTIPLY(-d3, FIX_0_785694958);
     558       66101 :                     tmp1 = MULTIPLY(-d3, FIX_1_387039845);
     559       66101 :                     tmp2 = MULTIPLY(-d3, FIX_0_275899380);
     560       66101 :                     tmp3 = MULTIPLY(d3, FIX_1_175875602);
     561             :                 }
     562             :             } else {
     563      271947 :                 if (d1) {
     564             :                     /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
     565      156833 :                     tmp0 = MULTIPLY(d1, FIX_0_275899380);
     566      156833 :                     tmp1 = MULTIPLY(d1, FIX_0_785694958);
     567      156833 :                     tmp2 = MULTIPLY(d1, FIX_1_175875602);
     568      156833 :                     tmp3 = MULTIPLY(d1, FIX_1_387039845);
     569             :                 } else {
     570             :                     /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
     571      115114 :                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
     572             :                 }
     573             :             }
     574             :         }
     575             :     }
     576             : }
     577             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
     578             : 
     579      884070 :     dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     580      884070 :     dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     581      884070 :     dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
     582      884070 :     dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
     583      884070 :     dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
     584      884070 :     dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     585      884070 :     dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     586      884070 :     dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
     587             : 
     588      884070 :     dataptr += DCTSIZE;         /* advance pointer to next row */
     589             :   }
     590             : 
     591             :   /* Pass 2: process columns. */
     592             :   /* Note that we must descale the results by a factor of 8 == 2**3, */
     593             :   /* and also undo the PASS1_BITS scaling. */
     594             : 
     595      256390 :   dataptr = data;
     596     2307510 :   for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
     597             :     /* Columns of zeroes can be exploited in the same way as we did with rows.
     598             :      * However, the row calculation has created many nonzero AC terms, so the
     599             :      * simplification applies less often (typically 5% to 10% of the time).
     600             :      * On machines with very fast multiplication, it's possible that the
     601             :      * test takes more time than it's worth.  In that case this section
     602             :      * may be commented out.
     603             :      */
     604             : 
     605     2051120 :     d0 = dataptr[DCTSIZE*0];
     606     2051120 :     d1 = dataptr[DCTSIZE*1];
     607     2051120 :     d2 = dataptr[DCTSIZE*2];
     608     2051120 :     d3 = dataptr[DCTSIZE*3];
     609     2051120 :     d4 = dataptr[DCTSIZE*4];
     610     2051120 :     d5 = dataptr[DCTSIZE*5];
     611     2051120 :     d6 = dataptr[DCTSIZE*6];
     612     2051120 :     d7 = dataptr[DCTSIZE*7];
     613             : 
     614             :     /* Even part: reverse the even part of the forward DCT. */
     615             :     /* The rotator is sqrt(2)*c(-6). */
     616     2051120 :     if (d6) {
     617      591088 :             if (d2) {
     618             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
     619      469566 :                     z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
     620      469566 :                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
     621      469566 :                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
     622             : 
     623      469566 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     624      469566 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     625             : 
     626      469566 :                     tmp10 = tmp0 + tmp3;
     627      469566 :                     tmp13 = tmp0 - tmp3;
     628      469566 :                     tmp11 = tmp1 + tmp2;
     629      469566 :                     tmp12 = tmp1 - tmp2;
     630             :             } else {
     631             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
     632      121522 :                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
     633      121522 :                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
     634             : 
     635      121522 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     636      121522 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     637             : 
     638      121522 :                     tmp10 = tmp0 + tmp3;
     639      121522 :                     tmp13 = tmp0 - tmp3;
     640      121522 :                     tmp11 = tmp1 + tmp2;
     641      121522 :                     tmp12 = tmp1 - tmp2;
     642             :             }
     643             :     } else {
     644     1460032 :             if (d2) {
     645             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
     646      522320 :                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
     647      522320 :                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
     648             : 
     649      522320 :                     tmp0 = (d0 + d4) * CONST_SCALE;
     650      522320 :                     tmp1 = (d0 - d4) * CONST_SCALE;
     651             : 
     652      522320 :                     tmp10 = tmp0 + tmp3;
     653      522320 :                     tmp13 = tmp0 - tmp3;
     654      522320 :                     tmp11 = tmp1 + tmp2;
     655      522320 :                     tmp12 = tmp1 - tmp2;
     656             :             } else {
     657             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
     658      937712 :                     tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
     659      937712 :                     tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
     660             :             }
     661             :     }
     662             : 
     663             :     /* Odd part per figure 8; the matrix is unitary and hence its
     664             :      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
     665             :      */
     666     2051120 :     if (d7) {
     667     1767613 :         if (d5) {
     668      618129 :             if (d3) {
     669      501664 :                 if (d1) {
     670             :                     /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
     671      436687 :                     z1 = d7 + d1;
     672      436687 :                     z2 = d5 + d3;
     673      436687 :                     z3 = d7 + d3;
     674      436687 :                     z4 = d5 + d1;
     675      436687 :                     z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
     676             : 
     677      436687 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     678      436687 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     679      436687 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     680      436687 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     681      436687 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     682      436687 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     683      436687 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     684      436687 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     685             : 
     686      436687 :                     z3 += z5;
     687      436687 :                     z4 += z5;
     688             : 
     689      436687 :                     tmp0 += z1 + z3;
     690      436687 :                     tmp1 += z2 + z4;
     691      436687 :                     tmp2 += z2 + z3;
     692      436687 :                     tmp3 += z1 + z4;
     693             :                 } else {
     694             :                     /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
     695       64977 :                     z2 = d5 + d3;
     696       64977 :                     z3 = d7 + d3;
     697       64977 :                     z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
     698             : 
     699       64977 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     700       64977 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     701       64977 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     702       64977 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     703       64977 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     704       64977 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     705       64977 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     706             : 
     707       64977 :                     z3 += z5;
     708       64977 :                     z4 += z5;
     709             : 
     710       64977 :                     tmp0 += z1 + z3;
     711       64977 :                     tmp1 += z2 + z4;
     712       64977 :                     tmp2 += z2 + z3;
     713       64977 :                     tmp3 = z1 + z4;
     714             :                 }
     715             :             } else {
     716      116465 :                 if (d1) {
     717             :                     /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
     718       47878 :                     z1 = d7 + d1;
     719       47878 :                     z3 = d7;
     720       47878 :                     z4 = d5 + d1;
     721       47878 :                     z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
     722             : 
     723       47878 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     724       47878 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     725       47878 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     726       47878 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     727       47878 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     728       47878 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     729       47878 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     730             : 
     731       47878 :                     z3 += z5;
     732       47878 :                     z4 += z5;
     733             : 
     734       47878 :                     tmp0 += z1 + z3;
     735       47878 :                     tmp1 += z2 + z4;
     736       47878 :                     tmp2 = z2 + z3;
     737       47878 :                     tmp3 += z1 + z4;
     738             :                 } else {
     739             :                     /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
     740       68587 :                     tmp0 = MULTIPLY(-d7, FIX_0_601344887);
     741       68587 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     742       68587 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     743       68587 :                     tmp1 = MULTIPLY(-d5, FIX_0_509795579);
     744       68587 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     745       68587 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     746       68587 :                     z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
     747             : 
     748       68587 :                     z3 += z5;
     749       68587 :                     z4 += z5;
     750             : 
     751       68587 :                     tmp0 += z3;
     752       68587 :                     tmp1 += z4;
     753       68587 :                     tmp2 = z2 + z3;
     754       68587 :                     tmp3 = z1 + z4;
     755             :                 }
     756             :             }
     757             :         } else {
     758     1149484 :             if (d3) {
     759      285997 :                 if (d1) {
     760             :                     /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
     761      178504 :                     z1 = d7 + d1;
     762      178504 :                     z3 = d7 + d3;
     763      178504 :                     z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
     764             : 
     765      178504 :                     tmp0 = MULTIPLY(d7, FIX_0_298631336);
     766      178504 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     767      178504 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     768      178504 :                     z1 = MULTIPLY(-z1, FIX_0_899976223);
     769      178504 :                     z2 = MULTIPLY(-d3, FIX_2_562915447);
     770      178504 :                     z3 = MULTIPLY(-z3, FIX_1_961570560);
     771      178504 :                     z4 = MULTIPLY(-d1, FIX_0_390180644);
     772             : 
     773      178504 :                     z3 += z5;
     774      178504 :                     z4 += z5;
     775             : 
     776      178504 :                     tmp0 += z1 + z3;
     777      178504 :                     tmp1 = z2 + z4;
     778      178504 :                     tmp2 += z2 + z3;
     779      178504 :                     tmp3 += z1 + z4;
     780             :                 } else {
     781             :                     /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
     782      107493 :                     z3 = d7 + d3;
     783             : 
     784      107493 :                     tmp0 = MULTIPLY(-d7, FIX_0_601344887);
     785      107493 :                     z1 = MULTIPLY(-d7, FIX_0_899976223);
     786      107493 :                     tmp2 = MULTIPLY(d3, FIX_0_509795579);
     787      107493 :                     z2 = MULTIPLY(-d3, FIX_2_562915447);
     788      107493 :                     z5 = MULTIPLY(z3, FIX_1_175875602);
     789      107493 :                     z3 = MULTIPLY(-z3, FIX_0_785694958);
     790             : 
     791      107493 :                     tmp0 += z3;
     792      107493 :                     tmp1 = z2 + z5;
     793      107493 :                     tmp2 += z3;
     794      107493 :                     tmp3 = z1 + z5;
     795             :                 }
     796             :             } else {
     797      863487 :                 if (d1) {
     798             :                     /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
     799      317832 :                     z1 = d7 + d1;
     800      317832 :                     z5 = MULTIPLY(z1, FIX_1_175875602);
     801             : 
     802      317832 :                     z1 = MULTIPLY(z1, FIX_0_275899380);
     803      317832 :                     z3 = MULTIPLY(-d7, FIX_1_961570560);
     804      317832 :                     tmp0 = MULTIPLY(-d7, FIX_1_662939225);
     805      317832 :                     z4 = MULTIPLY(-d1, FIX_0_390180644);
     806      317832 :                     tmp3 = MULTIPLY(d1, FIX_1_111140466);
     807             : 
     808      317832 :                     tmp0 += z1;
     809      317832 :                     tmp1 = z4 + z5;
     810      317832 :                     tmp2 = z3 + z5;
     811      317832 :                     tmp3 += z1;
     812             :                 } else {
     813             :                     /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
     814      545655 :                     tmp0 = MULTIPLY(-d7, FIX_1_387039845);
     815      545655 :                     tmp1 = MULTIPLY(d7, FIX_1_175875602);
     816      545655 :                     tmp2 = MULTIPLY(-d7, FIX_0_785694958);
     817      545655 :                     tmp3 = MULTIPLY(d7, FIX_0_275899380);
     818             :                 }
     819             :             }
     820             :         }
     821             :     } else {
     822      283507 :         if (d5) {
     823       60262 :             if (d3) {
     824       38632 :                 if (d1) {
     825             :                     /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
     826       29576 :                     z2 = d5 + d3;
     827       29576 :                     z4 = d5 + d1;
     828       29576 :                     z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
     829             : 
     830       29576 :                     tmp1 = MULTIPLY(d5, FIX_2_053119869);
     831       29576 :                     tmp2 = MULTIPLY(d3, FIX_3_072711026);
     832       29576 :                     tmp3 = MULTIPLY(d1, FIX_1_501321110);
     833       29576 :                     z1 = MULTIPLY(-d1, FIX_0_899976223);
     834       29576 :                     z2 = MULTIPLY(-z2, FIX_2_562915447);
     835       29576 :                     z3 = MULTIPLY(-d3, FIX_1_961570560);
     836       29576 :                     z4 = MULTIPLY(-z4, FIX_0_390180644);
     837             : 
     838       29576 :                     z3 += z5;
     839       29576 :                     z4 += z5;
     840             : 
     841       29576 :                     tmp0 = z1 + z3;
     842       29576 :                     tmp1 += z2 + z4;
     843       29576 :                     tmp2 += z2 + z3;
     844       29576 :                     tmp3 += z1 + z4;
     845             :                 } else {
     846             :                     /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
     847        9056 :                     z2 = d5 + d3;
     848             : 
     849        9056 :                     z5 = MULTIPLY(z2, FIX_1_175875602);
     850        9056 :                     tmp1 = MULTIPLY(d5, FIX_1_662939225);
     851        9056 :                     z4 = MULTIPLY(-d5, FIX_0_390180644);
     852        9056 :                     z2 = MULTIPLY(-z2, FIX_1_387039845);
     853        9056 :                     tmp2 = MULTIPLY(d3, FIX_1_111140466);
     854        9056 :                     z3 = MULTIPLY(-d3, FIX_1_961570560);
     855             : 
     856        9056 :                     tmp0 = z3 + z5;
     857        9056 :                     tmp1 += z2;
     858        9056 :                     tmp2 += z2;
     859        9056 :                     tmp3 = z4 + z5;
     860             :                 }
     861             :             } else {
     862       21630 :                 if (d1) {
     863             :                     /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
     864        9905 :                     z4 = d5 + d1;
     865             : 
     866        9905 :                     z5 = MULTIPLY(z4, FIX_1_175875602);
     867        9905 :                     z1 = MULTIPLY(-d1, FIX_0_899976223);
     868        9905 :                     tmp3 = MULTIPLY(d1, FIX_0_601344887);
     869        9905 :                     tmp1 = MULTIPLY(-d5, FIX_0_509795579);
     870        9905 :                     z2 = MULTIPLY(-d5, FIX_2_562915447);
     871        9905 :                     z4 = MULTIPLY(z4, FIX_0_785694958);
     872             : 
     873        9905 :                     tmp0 = z1 + z5;
     874        9905 :                     tmp1 += z4;
     875        9905 :                     tmp2 = z2 + z5;
     876        9905 :                     tmp3 += z4;
     877             :                 } else {
     878             :                     /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
     879       11725 :                     tmp0 = MULTIPLY(d5, FIX_1_175875602);
     880       11725 :                     tmp1 = MULTIPLY(d5, FIX_0_275899380);
     881       11725 :                     tmp2 = MULTIPLY(-d5, FIX_1_387039845);
     882       11725 :                     tmp3 = MULTIPLY(d5, FIX_0_785694958);
     883             :                 }
     884             :             }
     885             :         } else {
     886      223245 :             if (d3) {
     887       62267 :                 if (d1) {
     888             :                     /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
     889       49316 :                     z5 = d1 + d3;
     890       49316 :                     tmp3 = MULTIPLY(d1, FIX_0_211164243);
     891       49316 :                     tmp2 = MULTIPLY(-d3, FIX_1_451774981);
     892       49316 :                     z1 = MULTIPLY(d1, FIX_1_061594337);
     893       49316 :                     z2 = MULTIPLY(-d3, FIX_2_172734803);
     894       49316 :                     z4 = MULTIPLY(z5, FIX_0_785694958);
     895       49316 :                     z5 = MULTIPLY(z5, FIX_1_175875602);
     896             : 
     897       49316 :                     tmp0 = z1 - z4;
     898       49316 :                     tmp1 = z2 + z4;
     899       49316 :                     tmp2 += z5;
     900       49316 :                     tmp3 += z5;
     901             :                 } else {
     902             :                     /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
     903       12951 :                     tmp0 = MULTIPLY(-d3, FIX_0_785694958);
     904       12951 :                     tmp1 = MULTIPLY(-d3, FIX_1_387039845);
     905       12951 :                     tmp2 = MULTIPLY(-d3, FIX_0_275899380);
     906       12951 :                     tmp3 = MULTIPLY(d3, FIX_1_175875602);
     907             :                 }
     908             :             } else {
     909      160978 :                 if (d1) {
     910             :                     /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
     911       53628 :                     tmp0 = MULTIPLY(d1, FIX_0_275899380);
     912       53628 :                     tmp1 = MULTIPLY(d1, FIX_0_785694958);
     913       53628 :                     tmp2 = MULTIPLY(d1, FIX_1_175875602);
     914       53628 :                     tmp3 = MULTIPLY(d1, FIX_1_387039845);
     915             :                 } else {
     916             :                     /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
     917      107350 :                     tmp0 = tmp1 = tmp2 = tmp3 = 0;
     918             :                 }
     919             :             }
     920             :         }
     921             :     }
     922             : 
     923             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
     924             : 
     925     2051120 :     dataptr[DCTSIZE*0] = (int16_t) DESCALE(tmp10 + tmp3,
     926             :                                            CONST_BITS+PASS1_BITS+3);
     927     2051120 :     dataptr[DCTSIZE*7] = (int16_t) DESCALE(tmp10 - tmp3,
     928             :                                            CONST_BITS+PASS1_BITS+3);
     929     2051120 :     dataptr[DCTSIZE*1] = (int16_t) DESCALE(tmp11 + tmp2,
     930             :                                            CONST_BITS+PASS1_BITS+3);
     931     2051120 :     dataptr[DCTSIZE*6] = (int16_t) DESCALE(tmp11 - tmp2,
     932             :                                            CONST_BITS+PASS1_BITS+3);
     933     2051120 :     dataptr[DCTSIZE*2] = (int16_t) DESCALE(tmp12 + tmp1,
     934             :                                            CONST_BITS+PASS1_BITS+3);
     935     2051120 :     dataptr[DCTSIZE*5] = (int16_t) DESCALE(tmp12 - tmp1,
     936             :                                            CONST_BITS+PASS1_BITS+3);
     937     2051120 :     dataptr[DCTSIZE*3] = (int16_t) DESCALE(tmp13 + tmp0,
     938             :                                            CONST_BITS+PASS1_BITS+3);
     939     2051120 :     dataptr[DCTSIZE*4] = (int16_t) DESCALE(tmp13 - tmp0,
     940             :                                            CONST_BITS+PASS1_BITS+3);
     941             : 
     942     2051120 :     dataptr++;                  /* advance pointer to next column */
     943             :   }
     944      256390 : }
     945             : 
     946             : #undef DCTSIZE
     947             : #define DCTSIZE 4
     948             : #define DCTSTRIDE 8
     949             : 
     950       88025 : void ff_j_rev_dct4(DCTBLOCK data)
     951             : {
     952             :   int32_t tmp0, tmp1, tmp2, tmp3;
     953             :   int32_t tmp10, tmp11, tmp12, tmp13;
     954             :   int32_t z1;
     955             :   int32_t d0, d2, d4, d6;
     956             :   register int16_t *dataptr;
     957             :   int rowctr;
     958             : 
     959             :   /* Pass 1: process rows. */
     960             :   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
     961             :   /* furthermore, we scale the results by 2**PASS1_BITS. */
     962             : 
     963       88025 :   data[0] += 4;
     964             : 
     965       88025 :   dataptr = data;
     966             : 
     967      440125 :   for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
     968             :     /* Due to quantization, we will usually find that many of the input
     969             :      * coefficients are zero, especially the AC terms.  We can exploit this
     970             :      * by short-circuiting the IDCT calculation for any row in which all
     971             :      * the AC terms are zero.  In that case each output is equal to the
     972             :      * DC coefficient (with scale factor as needed).
     973             :      * With typical images and quantization tables, half or more of the
     974             :      * row DCT calculations can be simplified this way.
     975             :      */
     976             : 
     977      352100 :     register int *idataptr = (int*)dataptr;
     978             : 
     979      352100 :     d0 = dataptr[0];
     980      352100 :     d2 = dataptr[1];
     981      352100 :     d4 = dataptr[2];
     982      352100 :     d6 = dataptr[3];
     983             : 
     984      352100 :     if ((d2 | d4 | d6) == 0) {
     985             :       /* AC terms all zero */
     986      152603 :       if (d0) {
     987             :           /* Compute a 32 bit value to assign. */
     988       47562 :           int16_t dcval = (int16_t) (d0 << PASS1_BITS);
     989       47562 :           register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
     990             : 
     991       47562 :           idataptr[0] = v;
     992       47562 :           idataptr[1] = v;
     993             :       }
     994             : 
     995      152603 :       dataptr += DCTSTRIDE;     /* advance pointer to next row */
     996      152603 :       continue;
     997             :     }
     998             : 
     999             :     /* Even part: reverse the even part of the forward DCT. */
    1000             :     /* The rotator is sqrt(2)*c(-6). */
    1001      199497 :     if (d6) {
    1002       99787 :             if (d2) {
    1003             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
    1004       70211 :                     z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
    1005       70211 :                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
    1006       70211 :                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
    1007             : 
    1008       70211 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1009       70211 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1010             : 
    1011       70211 :                     tmp10 = tmp0 + tmp3;
    1012       70211 :                     tmp13 = tmp0 - tmp3;
    1013       70211 :                     tmp11 = tmp1 + tmp2;
    1014       70211 :                     tmp12 = tmp1 - tmp2;
    1015             :             } else {
    1016             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
    1017       29576 :                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
    1018       29576 :                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
    1019             : 
    1020       29576 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1021       29576 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1022             : 
    1023       29576 :                     tmp10 = tmp0 + tmp3;
    1024       29576 :                     tmp13 = tmp0 - tmp3;
    1025       29576 :                     tmp11 = tmp1 + tmp2;
    1026       29576 :                     tmp12 = tmp1 - tmp2;
    1027             :             }
    1028             :     } else {
    1029       99710 :             if (d2) {
    1030             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
    1031       75107 :                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
    1032       75107 :                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
    1033             : 
    1034       75107 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1035       75107 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1036             : 
    1037       75107 :                     tmp10 = tmp0 + tmp3;
    1038       75107 :                     tmp13 = tmp0 - tmp3;
    1039       75107 :                     tmp11 = tmp1 + tmp2;
    1040       75107 :                     tmp12 = tmp1 - tmp2;
    1041             :             } else {
    1042             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
    1043       24603 :                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
    1044       24603 :                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
    1045             :             }
    1046             :       }
    1047             : 
    1048             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
    1049             : 
    1050      199497 :     dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
    1051      199497 :     dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
    1052      199497 :     dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
    1053      199497 :     dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
    1054             : 
    1055      199497 :     dataptr += DCTSTRIDE;       /* advance pointer to next row */
    1056             :   }
    1057             : 
    1058             :   /* Pass 2: process columns. */
    1059             :   /* Note that we must descale the results by a factor of 8 == 2**3, */
    1060             :   /* and also undo the PASS1_BITS scaling. */
    1061             : 
    1062       88025 :   dataptr = data;
    1063      440125 :   for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
    1064             :     /* Columns of zeroes can be exploited in the same way as we did with rows.
    1065             :      * However, the row calculation has created many nonzero AC terms, so the
    1066             :      * simplification applies less often (typically 5% to 10% of the time).
    1067             :      * On machines with very fast multiplication, it's possible that the
    1068             :      * test takes more time than it's worth.  In that case this section
    1069             :      * may be commented out.
    1070             :      */
    1071             : 
    1072      352100 :     d0 = dataptr[DCTSTRIDE*0];
    1073      352100 :     d2 = dataptr[DCTSTRIDE*1];
    1074      352100 :     d4 = dataptr[DCTSTRIDE*2];
    1075      352100 :     d6 = dataptr[DCTSTRIDE*3];
    1076             : 
    1077             :     /* Even part: reverse the even part of the forward DCT. */
    1078             :     /* The rotator is sqrt(2)*c(-6). */
    1079      352100 :     if (d6) {
    1080      171607 :             if (d2) {
    1081             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
    1082      159810 :                     z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
    1083      159810 :                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
    1084      159810 :                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
    1085             : 
    1086      159810 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1087      159810 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1088             : 
    1089      159810 :                     tmp10 = tmp0 + tmp3;
    1090      159810 :                     tmp13 = tmp0 - tmp3;
    1091      159810 :                     tmp11 = tmp1 + tmp2;
    1092      159810 :                     tmp12 = tmp1 - tmp2;
    1093             :             } else {
    1094             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
    1095       11797 :                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
    1096       11797 :                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
    1097             : 
    1098       11797 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1099       11797 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1100             : 
    1101       11797 :                     tmp10 = tmp0 + tmp3;
    1102       11797 :                     tmp13 = tmp0 - tmp3;
    1103       11797 :                     tmp11 = tmp1 + tmp2;
    1104       11797 :                     tmp12 = tmp1 - tmp2;
    1105             :             }
    1106             :     } else {
    1107      180493 :             if (d2) {
    1108             :                     /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
    1109       84921 :                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
    1110       84921 :                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
    1111             : 
    1112       84921 :                     tmp0 = (d0 + d4) << CONST_BITS;
    1113       84921 :                     tmp1 = (d0 - d4) << CONST_BITS;
    1114             : 
    1115       84921 :                     tmp10 = tmp0 + tmp3;
    1116       84921 :                     tmp13 = tmp0 - tmp3;
    1117       84921 :                     tmp11 = tmp1 + tmp2;
    1118       84921 :                     tmp12 = tmp1 - tmp2;
    1119             :             } else {
    1120             :                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
    1121       95572 :                     tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
    1122       95572 :                     tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
    1123             :             }
    1124             :     }
    1125             : 
    1126             :     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
    1127             : 
    1128      352100 :     dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
    1129      352100 :     dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
    1130      352100 :     dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
    1131      352100 :     dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
    1132             : 
    1133      352100 :     dataptr++;                  /* advance pointer to next column */
    1134             :   }
    1135       88025 : }
    1136             : 
    1137           0 : void ff_j_rev_dct2(DCTBLOCK data){
    1138             :   int d00, d01, d10, d11;
    1139             : 
    1140           0 :   data[0] += 4;
    1141           0 :   d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
    1142           0 :   d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
    1143           0 :   d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
    1144           0 :   d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
    1145             : 
    1146           0 :   data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
    1147           0 :   data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
    1148           0 :   data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
    1149           0 :   data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
    1150           0 : }
    1151             : 
    1152           0 : void ff_j_rev_dct1(DCTBLOCK data){
    1153           0 :   data[0] = (data[0] + 4)>>3;
    1154           0 : }
    1155             : 
    1156             : #undef FIX
    1157             : #undef CONST_BITS
    1158             : 
    1159       48750 : void ff_jref_idct_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
    1160             : {
    1161       48750 :     ff_j_rev_dct(block);
    1162       48750 :     ff_put_pixels_clamped_c(block, dest, line_size);
    1163       48750 : }
    1164             : 
    1165      147640 : void ff_jref_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
    1166             : {
    1167      147640 :     ff_j_rev_dct(block);
    1168      147640 :     ff_add_pixels_clamped_c(block, dest, line_size);
    1169      147640 : }

Generated by: LCOV version 1.13