LCOV - code coverage report
Current view: top level - src/libavcodec/x86 - rnd_template.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 5 10 50.0 %
Date: 2017-01-22 02:20:28 Functions: 1 4 25.0 %

          Line data    Source code
       1             : /*
       2             :  * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd
       3             :  * Copyright (c) 2000, 2001 Fabrice Bellard
       4             :  * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
       5             :  *
       6             :  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
       7             :  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
       8             :  * and improved by Zdenek Kabelac <kabi@users.sf.net>
       9             :  *
      10             :  * This file is part of FFmpeg.
      11             :  *
      12             :  * FFmpeg is free software; you can redistribute it and/or
      13             :  * modify it under the terms of the GNU Lesser General Public
      14             :  * License as published by the Free Software Foundation; either
      15             :  * version 2.1 of the License, or (at your option) any later version.
      16             :  *
      17             :  * FFmpeg is distributed in the hope that it will be useful,
      18             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      19             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      20             :  * Lesser General Public License for more details.
      21             :  *
      22             :  * You should have received a copy of the GNU Lesser General Public
      23             :  * License along with FFmpeg; if not, write to the Free Software
      24             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      25             :  */
      26             : 
      27             : #include <stddef.h>
      28             : #include <stdint.h>
      29             : 
      30             : #include "inline_asm.h"
      31             : 
      32             : // put_pixels
      33       58247 : av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
      34             :                                   ptrdiff_t line_size, int h)
      35             : {
      36       58247 :     MOVQ_ZERO(mm7);
      37       58247 :     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
      38       58247 :     __asm__ volatile(
      39             :         "movq   (%1), %%mm0             \n\t"
      40             :         "movq   1(%1), %%mm4            \n\t"
      41             :         "movq   %%mm0, %%mm1            \n\t"
      42             :         "movq   %%mm4, %%mm5            \n\t"
      43             :         "punpcklbw %%mm7, %%mm0         \n\t"
      44             :         "punpcklbw %%mm7, %%mm4         \n\t"
      45             :         "punpckhbw %%mm7, %%mm1         \n\t"
      46             :         "punpckhbw %%mm7, %%mm5         \n\t"
      47             :         "paddusw %%mm0, %%mm4           \n\t"
      48             :         "paddusw %%mm1, %%mm5           \n\t"
      49             :         "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
      50             :         "add    %3, %1                  \n\t"
      51             :         ".p2align 3                     \n\t"
      52             :         "1:                             \n\t"
      53             :         "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
      54             :         "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
      55             :         "movq   %%mm0, %%mm1            \n\t"
      56             :         "movq   %%mm2, %%mm3            \n\t"
      57             :         "punpcklbw %%mm7, %%mm0         \n\t"
      58             :         "punpcklbw %%mm7, %%mm2         \n\t"
      59             :         "punpckhbw %%mm7, %%mm1         \n\t"
      60             :         "punpckhbw %%mm7, %%mm3         \n\t"
      61             :         "paddusw %%mm2, %%mm0           \n\t"
      62             :         "paddusw %%mm3, %%mm1           \n\t"
      63             :         "paddusw %%mm6, %%mm4           \n\t"
      64             :         "paddusw %%mm6, %%mm5           \n\t"
      65             :         "paddusw %%mm0, %%mm4           \n\t"
      66             :         "paddusw %%mm1, %%mm5           \n\t"
      67             :         "psrlw  $2, %%mm4               \n\t"
      68             :         "psrlw  $2, %%mm5               \n\t"
      69             :         "packuswb  %%mm5, %%mm4         \n\t"
      70             :         "movq   %%mm4, (%2, %%"FF_REG_a")  \n\t"
      71             :         "add    %3, %%"FF_REG_a"           \n\t"
      72             : 
      73             :         "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
      74             :         "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
      75             :         "movq   %%mm2, %%mm3            \n\t"
      76             :         "movq   %%mm4, %%mm5            \n\t"
      77             :         "punpcklbw %%mm7, %%mm2         \n\t"
      78             :         "punpcklbw %%mm7, %%mm4         \n\t"
      79             :         "punpckhbw %%mm7, %%mm3         \n\t"
      80             :         "punpckhbw %%mm7, %%mm5         \n\t"
      81             :         "paddusw %%mm2, %%mm4           \n\t"
      82             :         "paddusw %%mm3, %%mm5           \n\t"
      83             :         "paddusw %%mm6, %%mm0           \n\t"
      84             :         "paddusw %%mm6, %%mm1           \n\t"
      85             :         "paddusw %%mm4, %%mm0           \n\t"
      86             :         "paddusw %%mm5, %%mm1           \n\t"
      87             :         "psrlw  $2, %%mm0               \n\t"
      88             :         "psrlw  $2, %%mm1               \n\t"
      89             :         "packuswb  %%mm1, %%mm0         \n\t"
      90             :         "movq   %%mm0, (%2, %%"FF_REG_a")  \n\t"
      91             :         "add    %3, %%"FF_REG_a"        \n\t"
      92             : 
      93             :         "subl   $2, %0                  \n\t"
      94             :         "jnz    1b                      \n\t"
      95             :         :"+g"(h), "+S"(pixels)
      96             :         :"D"(block), "r"((x86_reg)line_size)
      97             :         :FF_REG_a, "memory");
      98       58247 : }
      99             : 
     100             : // avg_pixels
     101             : // this routine is 'slightly' suboptimal but mostly unused
     102           0 : av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
     103             :                                   ptrdiff_t line_size, int h)
     104             : {
     105           0 :     MOVQ_ZERO(mm7);
     106           0 :     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
     107           0 :     __asm__ volatile(
     108             :         "movq   (%1), %%mm0             \n\t"
     109             :         "movq   1(%1), %%mm4            \n\t"
     110             :         "movq   %%mm0, %%mm1            \n\t"
     111             :         "movq   %%mm4, %%mm5            \n\t"
     112             :         "punpcklbw %%mm7, %%mm0         \n\t"
     113             :         "punpcklbw %%mm7, %%mm4         \n\t"
     114             :         "punpckhbw %%mm7, %%mm1         \n\t"
     115             :         "punpckhbw %%mm7, %%mm5         \n\t"
     116             :         "paddusw %%mm0, %%mm4           \n\t"
     117             :         "paddusw %%mm1, %%mm5           \n\t"
     118             :         "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
     119             :         "add    %3, %1                  \n\t"
     120             :         ".p2align 3                     \n\t"
     121             :         "1:                             \n\t"
     122             :         "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
     123             :         "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
     124             :         "movq   %%mm0, %%mm1            \n\t"
     125             :         "movq   %%mm2, %%mm3            \n\t"
     126             :         "punpcklbw %%mm7, %%mm0         \n\t"
     127             :         "punpcklbw %%mm7, %%mm2         \n\t"
     128             :         "punpckhbw %%mm7, %%mm1         \n\t"
     129             :         "punpckhbw %%mm7, %%mm3         \n\t"
     130             :         "paddusw %%mm2, %%mm0           \n\t"
     131             :         "paddusw %%mm3, %%mm1           \n\t"
     132             :         "paddusw %%mm6, %%mm4           \n\t"
     133             :         "paddusw %%mm6, %%mm5           \n\t"
     134             :         "paddusw %%mm0, %%mm4           \n\t"
     135             :         "paddusw %%mm1, %%mm5           \n\t"
     136             :         "psrlw  $2, %%mm4               \n\t"
     137             :         "psrlw  $2, %%mm5               \n\t"
     138             :                 "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
     139             :         "packuswb  %%mm5, %%mm4         \n\t"
     140             :                 "pcmpeqd %%mm2, %%mm2   \n\t"
     141             :                 "paddb %%mm2, %%mm2     \n\t"
     142             :                 PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
     143             :                 "movq   %%mm5, (%2, %%"FF_REG_a")  \n\t"
     144             :         "add    %3, %%"FF_REG_a"        \n\t"
     145             : 
     146             :         "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
     147             :         "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
     148             :         "movq   %%mm2, %%mm3            \n\t"
     149             :         "movq   %%mm4, %%mm5            \n\t"
     150             :         "punpcklbw %%mm7, %%mm2         \n\t"
     151             :         "punpcklbw %%mm7, %%mm4         \n\t"
     152             :         "punpckhbw %%mm7, %%mm3         \n\t"
     153             :         "punpckhbw %%mm7, %%mm5         \n\t"
     154             :         "paddusw %%mm2, %%mm4           \n\t"
     155             :         "paddusw %%mm3, %%mm5           \n\t"
     156             :         "paddusw %%mm6, %%mm0           \n\t"
     157             :         "paddusw %%mm6, %%mm1           \n\t"
     158             :         "paddusw %%mm4, %%mm0           \n\t"
     159             :         "paddusw %%mm5, %%mm1           \n\t"
     160             :         "psrlw  $2, %%mm0               \n\t"
     161             :         "psrlw  $2, %%mm1               \n\t"
     162             :                 "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
     163             :         "packuswb  %%mm1, %%mm0         \n\t"
     164             :                 "pcmpeqd %%mm2, %%mm2   \n\t"
     165             :                 "paddb %%mm2, %%mm2     \n\t"
     166             :                 PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
     167             :                 "movq   %%mm1, (%2, %%"FF_REG_a")  \n\t"
     168             :         "add    %3, %%"FF_REG_a"           \n\t"
     169             : 
     170             :         "subl   $2, %0                  \n\t"
     171             :         "jnz    1b                      \n\t"
     172             :         :"+g"(h), "+S"(pixels)
     173             :         :"D"(block), "r"((x86_reg)line_size)
     174             :         :FF_REG_a, "memory");
     175           0 : }

Generated by: LCOV version 1.12