LCOV - code coverage report
Current view: top level - libavcodec - vp3dsp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 153 153 100.0 %
Date: 2017-12-14 08:27:08 Functions: 8 8 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2004 The FFmpeg project
       3             :  *
       4             :  * This file is part of FFmpeg.
       5             :  *
       6             :  * FFmpeg is free software; you can redistribute it and/or
       7             :  * modify it under the terms of the GNU Lesser General Public
       8             :  * License as published by the Free Software Foundation; either
       9             :  * version 2.1 of the License, or (at your option) any later version.
      10             :  *
      11             :  * FFmpeg is distributed in the hope that it will be useful,
      12             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      14             :  * Lesser General Public License for more details.
      15             :  *
      16             :  * You should have received a copy of the GNU Lesser General Public
      17             :  * License along with FFmpeg; if not, write to the Free Software
      18             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      19             :  */
      20             : 
      21             : /**
      22             :  * @file
      23             :  * Standard C DSP-oriented functions cribbed from the original VP3
      24             :  * source code.
      25             :  */
      26             : 
      27             : #include "libavutil/attributes.h"
      28             : #include "libavutil/common.h"
      29             : #include "libavutil/intreadwrite.h"
      30             : 
      31             : #include "avcodec.h"
      32             : #include "rnd_avg.h"
      33             : #include "vp3dsp.h"
      34             : 
      35             : #define IdctAdjustBeforeShift 8
      36             : #define xC1S7 64277
      37             : #define xC2S6 60547
      38             : #define xC3S5 54491
      39             : #define xC4S4 46341
      40             : #define xC5S3 36410
      41             : #define xC6S2 25080
      42             : #define xC7S1 12785
      43             : 
      44             : #define M(a, b) ((int)((SUINT)(a) * (b)) >> 16)
      45             : 
      46     1845672 : static av_always_inline void idct(uint8_t *dst, ptrdiff_t stride,
      47             :                                   int16_t *input, int type)
      48             : {
      49     1845672 :     int16_t *ip = input;
      50             : 
      51             :     int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
      52             :     int Ed, Gd, Add, Bdd, Fd, Hd;
      53             : 
      54             :     int i;
      55             : 
      56             :     /* Inverse DCT on the rows now */
      57    16611048 :     for (i = 0; i < 8; i++) {
      58             :         /* Check for non-zero values */
      59    73826880 :         if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
      60    59061504 :             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
      61     2102713 :             A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
      62     2102713 :             B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
      63     2102713 :             C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
      64     2102713 :             D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
      65             : 
      66     2102713 :             Ad = M(xC4S4, (A - C));
      67     2102713 :             Bd = M(xC4S4, (B - D));
      68             : 
      69     2102713 :             Cd = A + C;
      70     2102713 :             Dd = B + D;
      71             : 
      72     2102713 :             E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
      73     2102713 :             F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
      74             : 
      75     2102713 :             G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
      76     2102713 :             H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
      77             : 
      78     2102713 :             Ed = E - G;
      79     2102713 :             Gd = E + G;
      80             : 
      81     2102713 :             Add = F + Ad;
      82     2102713 :             Bdd = Bd - H;
      83             : 
      84     2102713 :             Fd = F - Ad;
      85     2102713 :             Hd = Bd + H;
      86             : 
      87             :             /*  Final sequence of operations over-write original inputs. */
      88     2102713 :             ip[0 * 8] = Gd + Cd;
      89     2102713 :             ip[7 * 8] = Gd - Cd;
      90             : 
      91     2102713 :             ip[1 * 8] = Add + Hd;
      92     2102713 :             ip[2 * 8] = Add - Hd;
      93             : 
      94     2102713 :             ip[3 * 8] = Ed + Dd;
      95     2102713 :             ip[4 * 8] = Ed - Dd;
      96             : 
      97     2102713 :             ip[5 * 8] = Fd + Bdd;
      98     2102713 :             ip[6 * 8] = Fd - Bdd;
      99             :         }
     100             : 
     101    14765376 :         ip += 1;            /* next row */
     102             :     }
     103             : 
     104     1845672 :     ip = input;
     105             : 
     106    16611048 :     for (i = 0; i < 8; i++) {
     107             :         /* Check for non-zero values (bitwise or faster than ||) */
     108    73826880 :         if (ip[1] | ip[2] | ip[3] |
     109    59061504 :             ip[4] | ip[5] | ip[6] | ip[7]) {
     110     3736607 :             A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
     111     3736607 :             B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
     112     3736607 :             C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
     113     3736607 :             D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
     114             : 
     115     3736607 :             Ad = M(xC4S4, (A - C));
     116     3736607 :             Bd = M(xC4S4, (B - D));
     117             : 
     118     3736607 :             Cd = A + C;
     119     3736607 :             Dd = B + D;
     120             : 
     121     3736607 :             E = M(xC4S4, (ip[0] + ip[4])) + 8;
     122     3736607 :             F = M(xC4S4, (ip[0] - ip[4])) + 8;
     123             : 
     124     3736607 :             if (type == 1) { // HACK
     125      701781 :                 E += 16 * 128;
     126      701781 :                 F += 16 * 128;
     127             :             }
     128             : 
     129     3736607 :             G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
     130     3736607 :             H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
     131             : 
     132     3736607 :             Ed = E - G;
     133     3736607 :             Gd = E + G;
     134             : 
     135     3736607 :             Add = F + Ad;
     136     3736607 :             Bdd = Bd - H;
     137             : 
     138     3736607 :             Fd = F - Ad;
     139     3736607 :             Hd = Bd + H;
     140             : 
     141             :             /* Final sequence of operations over-write original inputs. */
     142     3736607 :             if (type == 1) {
     143      701781 :                 dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
     144      701781 :                 dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
     145             : 
     146      701781 :                 dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
     147      701781 :                 dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
     148             : 
     149      701781 :                 dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
     150      701781 :                 dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
     151             : 
     152      701781 :                 dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
     153      701781 :                 dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
     154             :             } else {
     155     3034826 :                 dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
     156     3034826 :                 dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
     157             : 
     158     3034826 :                 dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
     159     3034826 :                 dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
     160             : 
     161     3034826 :                 dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
     162     3034826 :                 dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
     163             : 
     164     3034826 :                 dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
     165     3034826 :                 dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
     166             :             }
     167             :         } else {
     168    11028769 :             if (type == 1) {
     169     2236939 :                 dst[0*stride] =
     170     4473878 :                 dst[1*stride] =
     171     6710817 :                 dst[2*stride] =
     172     6710817 :                 dst[3*stride] =
     173     6710817 :                 dst[4*stride] =
     174     6710817 :                 dst[5*stride] =
     175     6710817 :                 dst[6*stride] =
     176     4473878 :                 dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
     177             :             } else {
     178     8791830 :                 if (ip[0]) {
     179     1752150 :                     int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
     180     1752150 :                     dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
     181     1752150 :                     dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
     182     1752150 :                     dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
     183     1752150 :                     dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
     184     1752150 :                     dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
     185     1752150 :                     dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
     186     1752150 :                     dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
     187     1752150 :                     dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
     188             :                 }
     189             :             }
     190             :         }
     191             : 
     192    14765376 :         ip += 8;            /* next column */
     193    14765376 :         dst++;
     194             :     }
     195     1845672 : }
     196             : 
     197      367340 : static void vp3_idct_put_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
     198             :                            int16_t *block /* align 16 */)
     199             : {
     200      367340 :     idct(dest, stride, block, 1);
     201      367340 :     memset(block, 0, sizeof(*block) * 64);
     202      367340 : }
     203             : 
     204     1478332 : static void vp3_idct_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
     205             :                            int16_t *block /* align 16 */)
     206             : {
     207     1478332 :     idct(dest, stride, block, 2);
     208     1478332 :     memset(block, 0, sizeof(*block) * 64);
     209     1478332 : }
     210             : 
     211      118882 : static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, ptrdiff_t stride,
     212             :                               int16_t *block /* align 16 */)
     213             : {
     214      118882 :     int i, dc = (block[0] + 15) >> 5;
     215             : 
     216     1069938 :     for (i = 0; i < 8; i++) {
     217      951056 :         dest[0] = av_clip_uint8(dest[0] + dc);
     218      951056 :         dest[1] = av_clip_uint8(dest[1] + dc);
     219      951056 :         dest[2] = av_clip_uint8(dest[2] + dc);
     220      951056 :         dest[3] = av_clip_uint8(dest[3] + dc);
     221      951056 :         dest[4] = av_clip_uint8(dest[4] + dc);
     222      951056 :         dest[5] = av_clip_uint8(dest[5] + dc);
     223      951056 :         dest[6] = av_clip_uint8(dest[6] + dc);
     224      951056 :         dest[7] = av_clip_uint8(dest[7] + dc);
     225      951056 :         dest   += stride;
     226             :     }
     227      118882 :     block[0] = 0;
     228      118882 : }
     229             : 
     230      382571 : static void vp3_v_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
     231             :                                 int *bounding_values)
     232             : {
     233             :     unsigned char *end;
     234             :     int filter_value;
     235      382571 :     const ptrdiff_t nstride = -stride;
     236             : 
     237     3443139 :     for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
     238     6121136 :         filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
     239     3060568 :                        (first_pixel[0] - first_pixel[nstride]) * 3;
     240     3060568 :         filter_value = bounding_values[(filter_value + 4) >> 3];
     241             : 
     242     3060568 :         first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
     243     3060568 :         first_pixel[0]       = av_clip_uint8(first_pixel[0] - filter_value);
     244             :     }
     245      382571 : }
     246             : 
     247      395869 : static void vp3_h_loop_filter_c(uint8_t *first_pixel, ptrdiff_t stride,
     248             :                                 int *bounding_values)
     249             : {
     250             :     unsigned char *end;
     251             :     int filter_value;
     252             : 
     253     3562821 :     for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
     254     6333904 :         filter_value = (first_pixel[-2] - first_pixel[1]) +
     255     3166952 :                        (first_pixel[ 0] - first_pixel[-1]) * 3;
     256     3166952 :         filter_value = bounding_values[(filter_value + 4) >> 3];
     257             : 
     258     3166952 :         first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
     259     3166952 :         first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
     260             :     }
     261      395869 : }
     262             : 
     263      325591 : static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
     264             :                                  const uint8_t *src2, ptrdiff_t stride, int h)
     265             : {
     266             :     int i;
     267             : 
     268     2930319 :     for (i = 0; i < h; i++) {
     269             :         uint32_t a, b;
     270             : 
     271     2604728 :         a = AV_RN32(&src1[i * stride]);
     272     2604728 :         b = AV_RN32(&src2[i * stride]);
     273     2604728 :         AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
     274     2604728 :         a = AV_RN32(&src1[i * stride + 4]);
     275     2604728 :         b = AV_RN32(&src2[i * stride + 4]);
     276     2604728 :         AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
     277             :     }
     278      325591 : }
     279             : 
     280          29 : av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
     281             : {
     282          29 :     c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
     283             : 
     284          29 :     c->idct_put      = vp3_idct_put_c;
     285          29 :     c->idct_add      = vp3_idct_add_c;
     286          29 :     c->idct_dc_add   = vp3_idct_dc_add_c;
     287          29 :     c->v_loop_filter = vp3_v_loop_filter_c;
     288          29 :     c->h_loop_filter = vp3_h_loop_filter_c;
     289             : 
     290             :     if (ARCH_ARM)
     291             :         ff_vp3dsp_init_arm(c, flags);
     292             :     if (ARCH_PPC)
     293             :         ff_vp3dsp_init_ppc(c, flags);
     294             :     if (ARCH_X86)
     295          29 :         ff_vp3dsp_init_x86(c, flags);
     296          29 : }

Generated by: LCOV version 1.13