LCOV - code coverage report
Current view: top level - libavcodec/x86 - mpegvideoenc_qns_template.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 15 0.0 %
Date: 2017-12-15 02:19:58 Functions: 0 6 0.0 %

          Line data    Source code
       1             : /*
       2             :  * QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
       3             :  * Copyright (c) 2004 Michael Niedermayer
       4             :  *
       5             :  * MMX optimization by Michael Niedermayer <michaelni@gmx.at>
       6             :  * 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>
       7             :  *
       8             :  * This file is part of FFmpeg.
       9             :  *
      10             :  * FFmpeg is free software; you can redistribute it and/or
      11             :  * modify it under the terms of the GNU Lesser General Public
      12             :  * License as published by the Free Software Foundation; either
      13             :  * version 2.1 of the License, or (at your option) any later version.
      14             :  *
      15             :  * FFmpeg is distributed in the hope that it will be useful,
      16             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      18             :  * Lesser General Public License for more details.
      19             :  *
      20             :  * You should have received a copy of the GNU Lesser General Public
      21             :  * License along with FFmpeg; if not, write to the Free Software
      22             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      23             :  */
      24             : 
      25             : #include <stdint.h>
      26             : 
      27             : #include "libavutil/avassert.h"
      28             : #include "libavutil/common.h"
      29             : #include "libavutil/x86/asm.h"
      30             : 
      31             : #include "inline_asm.h"
      32             : 
      33             : #define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0))
      34             : 
      35           0 : static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
      36             : {
      37           0 :     x86_reg i=0;
      38             : 
      39             :     av_assert2(FFABS(scale) < MAX_ABS);
      40           0 :     scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
      41             : 
      42           0 :     SET_RND(mm6);
      43           0 :     __asm__ volatile(
      44             :         "pxor %%mm7, %%mm7              \n\t"
      45             :         "movd  %4, %%mm5                \n\t"
      46             :         "punpcklwd %%mm5, %%mm5         \n\t"
      47             :         "punpcklwd %%mm5, %%mm5         \n\t"
      48             :         ".p2align 4                     \n\t"
      49             :         "1:                             \n\t"
      50             :         "movq  (%1, %0), %%mm0          \n\t"
      51             :         "movq  8(%1, %0), %%mm1         \n\t"
      52             :         PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
      53             :         "paddw (%2, %0), %%mm0          \n\t"
      54             :         "paddw 8(%2, %0), %%mm1         \n\t"
      55             :         "psraw $6, %%mm0                \n\t"
      56             :         "psraw $6, %%mm1                \n\t"
      57             :         "pmullw (%3, %0), %%mm0         \n\t"
      58             :         "pmullw 8(%3, %0), %%mm1        \n\t"
      59             :         "pmaddwd %%mm0, %%mm0           \n\t"
      60             :         "pmaddwd %%mm1, %%mm1           \n\t"
      61             :         "paddd %%mm1, %%mm0             \n\t"
      62             :         "psrld $4, %%mm0                \n\t"
      63             :         "paddd %%mm0, %%mm7             \n\t"
      64             :         "add $16, %0                    \n\t"
      65             :         "cmp $128, %0                   \n\t" //FIXME optimize & bench
      66             :         " jb 1b                         \n\t"
      67             :         PHADDD(%%mm7, %%mm6)
      68             :         "psrld $2, %%mm7                \n\t"
      69             :         "movd %%mm7, %0                 \n\t"
      70             : 
      71             :         : "+r" (i)
      72             :         : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
      73             :     );
      74           0 :     return i;
      75             : }
      76             : 
      77           0 : static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
      78             : {
      79           0 :     x86_reg i=0;
      80             : 
      81           0 :     if(FFABS(scale) < MAX_ABS){
      82           0 :         scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
      83           0 :         SET_RND(mm6);
      84           0 :         __asm__ volatile(
      85             :                 "movd  %3, %%mm5        \n\t"
      86             :                 "punpcklwd %%mm5, %%mm5 \n\t"
      87             :                 "punpcklwd %%mm5, %%mm5 \n\t"
      88             :                 ".p2align 4             \n\t"
      89             :                 "1:                     \n\t"
      90             :                 "movq  (%1, %0), %%mm0  \n\t"
      91             :                 "movq  8(%1, %0), %%mm1 \n\t"
      92             :                 PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
      93             :                 "paddw (%2, %0), %%mm0  \n\t"
      94             :                 "paddw 8(%2, %0), %%mm1 \n\t"
      95             :                 "movq %%mm0, (%2, %0)   \n\t"
      96             :                 "movq %%mm1, 8(%2, %0)  \n\t"
      97             :                 "add $16, %0            \n\t"
      98             :                 "cmp $128, %0           \n\t" // FIXME optimize & bench
      99             :                 " jb 1b                 \n\t"
     100             : 
     101             :                 : "+r" (i)
     102             :                 : "r"(basis), "r"(rem), "g"(scale)
     103             :         );
     104             :     }else{
     105           0 :         for(i=0; i<8*8; i++){
     106           0 :             rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
     107             :         }
     108             :     }
     109           0 : }

Generated by: LCOV version 1.13