LCOV - code coverage report
Current view: top level - libavcodec - jfdctfst.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 71 103 68.9 %
Date: 2017-12-13 02:34:56 Functions: 2 3 66.7 %

          Line data    Source code
       1             : /*
       2             :  * This file is part of the Independent JPEG Group's software.
       3             :  *
       4             :  * The authors make NO WARRANTY or representation, either express or implied,
       5             :  * with respect to this software, its quality, accuracy, merchantability, or
       6             :  * fitness for a particular purpose.  This software is provided "AS IS", and
       7             :  * you, its user, assume the entire risk as to its quality and accuracy.
       8             :  *
       9             :  * This software is copyright (C) 1994-1996, Thomas G. Lane.
      10             :  * All Rights Reserved except as specified below.
      11             :  *
      12             :  * Permission is hereby granted to use, copy, modify, and distribute this
      13             :  * software (or portions thereof) for any purpose, without fee, subject to
      14             :  * these conditions:
      15             :  * (1) If any part of the source code for this software is distributed, then
      16             :  * this README file must be included, with this copyright and no-warranty
      17             :  * notice unaltered; and any additions, deletions, or changes to the original
      18             :  * files must be clearly indicated in accompanying documentation.
      19             :  * (2) If only executable code is distributed, then the accompanying
      20             :  * documentation must state that "this software is based in part on the work
      21             :  * of the Independent JPEG Group".
      22             :  * (3) Permission for use of this software is granted only if the user accepts
      23             :  * full responsibility for any undesirable consequences; the authors accept
      24             :  * NO LIABILITY for damages of any kind.
      25             :  *
      26             :  * These conditions apply to any software derived from or based on the IJG
      27             :  * code, not just to the unmodified library.  If you use our work, you ought
      28             :  * to acknowledge us.
      29             :  *
      30             :  * Permission is NOT granted for the use of any IJG author's name or company
      31             :  * name in advertising or publicity relating to this software or products
      32             :  * derived from it.  This software may be referred to only as "the Independent
      33             :  * JPEG Group's software".
      34             :  *
      35             :  * We specifically permit and encourage the use of this software as the basis
      36             :  * of commercial products, provided that all warranty or liability claims are
      37             :  * assumed by the product vendor.
      38             :  *
      39             :  * This file contains a fast, not so accurate integer implementation of the
      40             :  * forward DCT (Discrete Cosine Transform).
      41             :  *
      42             :  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
      43             :  * on each column.  Direct algorithms are also available, but they are
      44             :  * much more complex and seem not to be any faster when reduced to code.
      45             :  *
      46             :  * This implementation is based on Arai, Agui, and Nakajima's algorithm for
      47             :  * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
      48             :  * Japanese, but the algorithm is described in the Pennebaker & Mitchell
      49             :  * JPEG textbook (see REFERENCES section in file README).  The following code
      50             :  * is based directly on figure 4-8 in P&M.
      51             :  * While an 8-point DCT cannot be done in less than 11 multiplies, it is
      52             :  * possible to arrange the computation so that many of the multiplies are
      53             :  * simple scalings of the final outputs.  These multiplies can then be
      54             :  * folded into the multiplications or divisions by the JPEG quantization
      55             :  * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
      56             :  * to be done in the DCT itself.
      57             :  * The primary disadvantage of this method is that with fixed-point math,
      58             :  * accuracy is lost due to imprecise representation of the scaled
      59             :  * quantization values.  The smaller the quantization table entry, the less
      60             :  * precise the scaled value, so this implementation does worse with high-
      61             :  * quality-setting files than with low-quality ones.
      62             :  */
      63             : 
      64             : /**
      65             :  * @file
      66             :  * Independent JPEG Group's fast AAN dct.
      67             :  */
      68             : 
      69             : #include <stdlib.h>
      70             : #include <stdio.h>
      71             : #include "libavutil/common.h"
      72             : #include "dct.h"
      73             : 
      74             : #define DCTSIZE 8
      75             : #define GLOBAL(x) x
      76             : #define RIGHT_SHIFT(x, n) ((x) >> (n))
      77             : 
      78             : /*
      79             :  * This module is specialized to the case DCTSIZE = 8.
      80             :  */
      81             : 
      82             : #if DCTSIZE != 8
      83             :   Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
      84             : #endif
      85             : 
      86             : 
      87             : /* Scaling decisions are generally the same as in the LL&M algorithm;
      88             :  * see jfdctint.c for more details.  However, we choose to descale
      89             :  * (right shift) multiplication products as soon as they are formed,
      90             :  * rather than carrying additional fractional bits into subsequent additions.
      91             :  * This compromises accuracy slightly, but it lets us save a few shifts.
      92             :  * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
      93             :  * everywhere except in the multiplications proper; this saves a good deal
      94             :  * of work on 16-bit-int machines.
      95             :  *
      96             :  * Again to save a few shifts, the intermediate results between pass 1 and
      97             :  * pass 2 are not upscaled, but are represented only to integral precision.
      98             :  *
      99             :  * A final compromise is to represent the multiplicative constants to only
     100             :  * 8 fractional bits, rather than 13.  This saves some shifting work on some
     101             :  * machines, and may also reduce the cost of multiplication (since there
     102             :  * are fewer one-bits in the constants).
     103             :  */
     104             : 
     105             : #define CONST_BITS  8
     106             : 
     107             : 
     108             : /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
     109             :  * causing a lot of useless floating-point operations at run time.
     110             :  * To get around this we use the following pre-calculated constants.
     111             :  * If you change CONST_BITS you may want to add appropriate values.
     112             :  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
     113             :  */
     114             : 
     115             : #if CONST_BITS == 8
     116             : #define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
     117             : #define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
     118             : #define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
     119             : #define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
     120             : #else
     121             : #define FIX_0_382683433  FIX(0.382683433)
     122             : #define FIX_0_541196100  FIX(0.541196100)
     123             : #define FIX_0_707106781  FIX(0.707106781)
     124             : #define FIX_1_306562965  FIX(1.306562965)
     125             : #endif
     126             : 
     127             : 
     128             : /* We can gain a little more speed, with a further compromise in accuracy,
     129             :  * by omitting the addition in a descaling shift.  This yields an incorrectly
     130             :  * rounded result half the time...
     131             :  */
     132             : 
     133             : #ifndef USE_ACCURATE_ROUNDING
     134             : #undef DESCALE
     135             : #define DESCALE(x,n)  RIGHT_SHIFT(x, n)
     136             : #endif
     137             : 
     138             : 
     139             : /* Multiply a int16_t variable by an int32_t constant, and immediately
     140             :  * descale to yield a int16_t result.
     141             :  */
     142             : 
     143             : #define MULTIPLY(var,const)  ((int16_t) DESCALE((var) * (const), CONST_BITS))
     144             : 
     145    99756017 : static av_always_inline void row_fdct(int16_t * data){
     146             :   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     147             :   int tmp10, tmp11, tmp12, tmp13;
     148             :   int z1, z2, z3, z4, z5, z11, z13;
     149             :   int16_t *dataptr;
     150             :   int ctr;
     151             : 
     152             :   /* Pass 1: process rows. */
     153             : 
     154    99756017 :   dataptr = data;
     155   897804153 :   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     156   798048136 :     tmp0 = dataptr[0] + dataptr[7];
     157   798048136 :     tmp7 = dataptr[0] - dataptr[7];
     158   798048136 :     tmp1 = dataptr[1] + dataptr[6];
     159   798048136 :     tmp6 = dataptr[1] - dataptr[6];
     160   798048136 :     tmp2 = dataptr[2] + dataptr[5];
     161   798048136 :     tmp5 = dataptr[2] - dataptr[5];
     162   798048136 :     tmp3 = dataptr[3] + dataptr[4];
     163   798048136 :     tmp4 = dataptr[3] - dataptr[4];
     164             : 
     165             :     /* Even part */
     166             : 
     167   798048136 :     tmp10 = tmp0 + tmp3;        /* phase 2 */
     168   798048136 :     tmp13 = tmp0 - tmp3;
     169   798048136 :     tmp11 = tmp1 + tmp2;
     170   798048136 :     tmp12 = tmp1 - tmp2;
     171             : 
     172   798048136 :     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     173   798048136 :     dataptr[4] = tmp10 - tmp11;
     174             : 
     175   798048136 :     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
     176   798048136 :     dataptr[2] = tmp13 + z1;    /* phase 5 */
     177   798048136 :     dataptr[6] = tmp13 - z1;
     178             : 
     179             :     /* Odd part */
     180             : 
     181   798048136 :     tmp10 = tmp4 + tmp5;        /* phase 2 */
     182   798048136 :     tmp11 = tmp5 + tmp6;
     183   798048136 :     tmp12 = tmp6 + tmp7;
     184             : 
     185             :     /* The rotator is modified from fig 4-8 to avoid extra negations. */
     186   798048136 :     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
     187   798048136 :     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
     188   798048136 :     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
     189   798048136 :     z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
     190             : 
     191   798048136 :     z11 = tmp7 + z3;            /* phase 5 */
     192   798048136 :     z13 = tmp7 - z3;
     193             : 
     194   798048136 :     dataptr[5] = z13 + z2;      /* phase 6 */
     195   798048136 :     dataptr[3] = z13 - z2;
     196   798048136 :     dataptr[1] = z11 + z4;
     197   798048136 :     dataptr[7] = z11 - z4;
     198             : 
     199   798048136 :     dataptr += DCTSIZE;         /* advance pointer to next row */
     200             :   }
     201    99756017 : }
     202             : 
     203             : /*
     204             :  * Perform the forward DCT on one block of samples.
     205             :  */
     206             : 
     207             : GLOBAL(void)
     208    99756017 : ff_fdct_ifast (int16_t * data)
     209             : {
     210             :   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     211             :   int tmp10, tmp11, tmp12, tmp13;
     212             :   int z1, z2, z3, z4, z5, z11, z13;
     213             :   int16_t *dataptr;
     214             :   int ctr;
     215             : 
     216    99756017 :   row_fdct(data);
     217             : 
     218             :   /* Pass 2: process columns. */
     219             : 
     220    99756017 :   dataptr = data;
     221   897804153 :   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     222   798048136 :     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
     223   798048136 :     tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
     224   798048136 :     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
     225   798048136 :     tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
     226   798048136 :     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
     227   798048136 :     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     228   798048136 :     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     229   798048136 :     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
     230             : 
     231             :     /* Even part */
     232             : 
     233   798048136 :     tmp10 = tmp0 + tmp3;        /* phase 2 */
     234   798048136 :     tmp13 = tmp0 - tmp3;
     235   798048136 :     tmp11 = tmp1 + tmp2;
     236   798048136 :     tmp12 = tmp1 - tmp2;
     237             : 
     238   798048136 :     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     239   798048136 :     dataptr[DCTSIZE*4] = tmp10 - tmp11;
     240             : 
     241   798048136 :     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
     242   798048136 :     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     243   798048136 :     dataptr[DCTSIZE*6] = tmp13 - z1;
     244             : 
     245             :     /* Odd part */
     246             : 
     247   798048136 :     tmp10 = tmp4 + tmp5;        /* phase 2 */
     248   798048136 :     tmp11 = tmp5 + tmp6;
     249   798048136 :     tmp12 = tmp6 + tmp7;
     250             : 
     251             :     /* The rotator is modified from fig 4-8 to avoid extra negations. */
     252   798048136 :     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
     253   798048136 :     z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
     254   798048136 :     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     255   798048136 :     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
     256             : 
     257   798048136 :     z11 = tmp7 + z3;            /* phase 5 */
     258   798048136 :     z13 = tmp7 - z3;
     259             : 
     260   798048136 :     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
     261   798048136 :     dataptr[DCTSIZE*3] = z13 - z2;
     262   798048136 :     dataptr[DCTSIZE*1] = z11 + z4;
     263   798048136 :     dataptr[DCTSIZE*7] = z11 - z4;
     264             : 
     265   798048136 :     dataptr++;                  /* advance pointer to next column */
     266             :   }
     267    99756017 : }
     268             : 
     269             : /*
     270             :  * Perform the forward 2-4-8 DCT on one block of samples.
     271             :  */
     272             : 
     273             : GLOBAL(void)
     274           0 : ff_fdct_ifast248 (int16_t * data)
     275             : {
     276             :   int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     277             :   int tmp10, tmp11, tmp12, tmp13;
     278             :   int z1;
     279             :   int16_t *dataptr;
     280             :   int ctr;
     281             : 
     282           0 :   row_fdct(data);
     283             : 
     284             :   /* Pass 2: process columns. */
     285             : 
     286           0 :   dataptr = data;
     287           0 :   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     288           0 :     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
     289           0 :     tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
     290           0 :     tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
     291           0 :     tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
     292           0 :     tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
     293           0 :     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
     294           0 :     tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
     295           0 :     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
     296             : 
     297             :     /* Even part */
     298             : 
     299           0 :     tmp10 = tmp0 + tmp3;
     300           0 :     tmp11 = tmp1 + tmp2;
     301           0 :     tmp12 = tmp1 - tmp2;
     302           0 :     tmp13 = tmp0 - tmp3;
     303             : 
     304           0 :     dataptr[DCTSIZE*0] = tmp10 + tmp11;
     305           0 :     dataptr[DCTSIZE*4] = tmp10 - tmp11;
     306             : 
     307           0 :     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
     308           0 :     dataptr[DCTSIZE*2] = tmp13 + z1;
     309           0 :     dataptr[DCTSIZE*6] = tmp13 - z1;
     310             : 
     311           0 :     tmp10 = tmp4 + tmp7;
     312           0 :     tmp11 = tmp5 + tmp6;
     313           0 :     tmp12 = tmp5 - tmp6;
     314           0 :     tmp13 = tmp4 - tmp7;
     315             : 
     316           0 :     dataptr[DCTSIZE*1] = tmp10 + tmp11;
     317           0 :     dataptr[DCTSIZE*5] = tmp10 - tmp11;
     318             : 
     319           0 :     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
     320           0 :     dataptr[DCTSIZE*3] = tmp13 + z1;
     321           0 :     dataptr[DCTSIZE*7] = tmp13 - z1;
     322             : 
     323           0 :     dataptr++;                        /* advance pointer to next column */
     324             :   }
     325           0 : }
     326             : 
     327             : 
     328             : #undef GLOBAL
     329             : #undef CONST_BITS
     330             : #undef DESCALE
     331             : #undef FIX_0_541196100
     332             : #undef FIX_1_306562965

Generated by: LCOV version 1.13