FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavcodec/x86/me_cmp_init.c
Date:	2025-10-10 03:51:19

	Exec	Total	Coverage
Lines:	66	69	95.7%
Functions:	7	7	100.0%
Branches:	19	24	79.2%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * SIMD-optimized motion estimation
    
       * Copyright (c) 2000, 2001 Fabrice Bellard
    
       * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
    
       *
    
       * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
    
       *
    
       * This file is part of FFmpeg.
    
       *
    
       * FFmpeg is free software; you can redistribute it and/or
    
       * modify it under the terms of the GNU Lesser General Public
    
       * License as published by the Free Software Foundation; either
    
       * version 2.1 of the License, or (at your option) any later version.
    
       *
    
       * FFmpeg is distributed in the hope that it will be useful,
    
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
       * Lesser General Public License for more details.
    
       *
    
       * You should have received a copy of the GNU Lesser General Public
    
       * License along with FFmpeg; if not, write to the Free Software
    
       * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
       */
    
      #include "libavutil/attributes.h"
    
      #include "libavutil/cpu.h"
    
      #include "libavutil/mem_internal.h"
    
      #include "libavutil/x86/asm.h"
    
      #include "libavutil/x86/cpu.h"
    
      #include "libavcodec/me_cmp.h"
    
      #include "libavcodec/mpegvideoenc.h"
    
      int ff_sum_abs_dctelem_sse2(const int16_t *block);
    
      int ff_sum_abs_dctelem_ssse3(const int16_t *block);
    
      int ff_sse8_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                      ptrdiff_t stride, int h);
    
      int ff_sse16_mmx(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                       ptrdiff_t stride, int h);
    
      int ff_sse16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                        ptrdiff_t stride, int h);
    
      int ff_hf_noise8_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
    
      int ff_hf_noise16_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
    
      int ff_sad8_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                         ptrdiff_t stride, int h);
    
      int ff_sad16_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                          ptrdiff_t stride, int h);
    
      int ff_sad16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                        ptrdiff_t stride, int h);
    
      int ff_sad8_x2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                            ptrdiff_t stride, int h);
    
      int ff_sad16_x2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                             ptrdiff_t stride, int h);
    
      int ff_sad16_x2_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                           ptrdiff_t stride, int h);
    
      int ff_sad8_y2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                            ptrdiff_t stride, int h);
    
      int ff_sad16_y2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                             ptrdiff_t stride, int h);
    
      int ff_sad16_y2_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                           ptrdiff_t stride, int h);
    
      int ff_sad8_approx_xy2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                                    ptrdiff_t stride, int h);
    
      int ff_sad16_approx_xy2_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                                     ptrdiff_t stride, int h);
    
      int ff_sad16_approx_xy2_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                                   ptrdiff_t stride, int h);
    
      int ff_vsad_intra8_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                                ptrdiff_t stride, int h);
    
      int ff_vsad_intra16_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                                 ptrdiff_t stride, int h);
    
      int ff_vsad_intra16_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                               ptrdiff_t stride, int h);
    
      int ff_vsad8_approx_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                          ptrdiff_t stride, int h);
    
      int ff_vsad16_approx_mmxext(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                           ptrdiff_t stride, int h);
    
      int ff_vsad16_approx_sse2(MPVEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
    
                         ptrdiff_t stride, int h);
    
      #define hadamard_func(cpu)                                                       \
    
          int ff_hadamard8_diff_ ## cpu(MPVEncContext *s, const uint8_t *src1,         \
    
                                        const uint8_t *src2, ptrdiff_t stride, int h); \
    
          int ff_hadamard8_diff16_ ## cpu(MPVEncContext *s, const uint8_t *src1,       \
    
                                          const uint8_t *src2, ptrdiff_t stride, int h);
    
      hadamard_func(mmxext)
    
      hadamard_func(sse2)
    
      hadamard_func(ssse3)
    
      #if HAVE_X86ASM
    
      16
      static int nsse16_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
    
                            ptrdiff_t stride, int h)
    
      {
    
          int score1, score2;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.

      16
          if (c)
    
      ✗
              score1 = c->sse_cmp[0](c, pix1, pix2, stride, h);
    
          else
    
      16
              score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
    
      16
          score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)
    
      16
                 - ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.

      16
          if (c)
    
      ✗
              return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
    
          else
    
      16
              return score1 + FFABS(score2) * 8;
    
      }
    
      16
      static int nsse8_mmx(MPVEncContext *c, const uint8_t *pix1, const uint8_t *pix2,
    
                           ptrdiff_t stride, int h)
    
      {
    
      16
          int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h);
    
      16
          int score2 = ff_hf_noise8_mmx(pix1, stride, h) -
    
      16
                       ff_hf_noise8_mmx(pix2, stride, h);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.

      16
          if (c)
    
      ✗
              return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
    
          else
    
      16
              return score1 + FFABS(score2) * 8;
    
      }
    
      #endif /* HAVE_X86ASM */
    
      #if HAVE_INLINE_ASM
    
      DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
    
          0x0000000000000000ULL,
    
          0x0001000100010001ULL,
    
          0x0002000200020002ULL,
    
      };
    
      71328
      static inline void sad8_4_mmx(const uint8_t *blk1, const uint8_t *blk2,
    
                                    ptrdiff_t stride, int h)
    
      {
    
      71328
          x86_reg len = -stride * h;
    
      71328
          __asm__ volatile (
    
              "movq  (%1, %%"FF_REG_a"), %%mm0\n\t"
    
              "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
    
              "movq %%mm0, %%mm1              \n\t"
    
              "movq %%mm2, %%mm3              \n\t"
    
              "punpcklbw %%mm7, %%mm0         \n\t"
    
              "punpckhbw %%mm7, %%mm1         \n\t"
    
              "punpcklbw %%mm7, %%mm2         \n\t"
    
              "punpckhbw %%mm7, %%mm3         \n\t"
    
              "paddw %%mm2, %%mm0             \n\t"
    
              "paddw %%mm3, %%mm1             \n\t"
    
              ".p2align 4                     \n\t"
    
              "1:                             \n\t"
    
              "movq  (%2, %%"FF_REG_a"), %%mm2\n\t"
    
              "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
    
              "movq %%mm2, %%mm3              \n\t"
    
              "movq %%mm4, %%mm5              \n\t"
    
              "punpcklbw %%mm7, %%mm2         \n\t"
    
              "punpckhbw %%mm7, %%mm3         \n\t"
    
              "punpcklbw %%mm7, %%mm4         \n\t"
    
              "punpckhbw %%mm7, %%mm5         \n\t"
    
              "paddw %%mm4, %%mm2             \n\t"
    
              "paddw %%mm5, %%mm3             \n\t"
    
              "movq %5, %%mm5                 \n\t"
    
              "paddw %%mm2, %%mm0             \n\t"
    
              "paddw %%mm3, %%mm1             \n\t"
    
              "paddw %%mm5, %%mm0             \n\t"
    
              "paddw %%mm5, %%mm1             \n\t"
    
              "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
    
              "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
    
              "psrlw $2, %%mm0                \n\t"
    
              "psrlw $2, %%mm1                \n\t"
    
              "packuswb %%mm1, %%mm0          \n\t"
    
              "psubusb %%mm0, %%mm4           \n\t"
    
              "psubusb %%mm5, %%mm0           \n\t"
    
              "por %%mm4, %%mm0               \n\t"
    
              "movq %%mm0, %%mm4              \n\t"
    
              "punpcklbw %%mm7, %%mm0         \n\t"
    
              "punpckhbw %%mm7, %%mm4         \n\t"
    
              "paddw %%mm0, %%mm6             \n\t"
    
              "paddw %%mm4, %%mm6             \n\t"
    
              "movq  %%mm2, %%mm0             \n\t"
    
              "movq  %%mm3, %%mm1             \n\t"
    
              "add %4, %%"FF_REG_a"           \n\t"
    
              " js 1b                         \n\t"
    
              : "+a" (len)
    
      71328
              : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
    
                "r" (stride), "m" (round_tab[2]));
    
      71328
      }
    
      35672
      static inline int sum_mmx(void)
    
      {
    
          int ret;
    
      35672
          __asm__ volatile (
    
              "movq %%mm6, %%mm0              \n\t"
    
              "psrlq $32, %%mm6               \n\t"
    
              "paddw %%mm0, %%mm6             \n\t"
    
              "movq %%mm6, %%mm0              \n\t"
    
              "psrlq $16, %%mm6               \n\t"
    
              "paddw %%mm0, %%mm6             \n\t"
    
              "movd %%mm6, %0                 \n\t"
    
              : "=r" (ret));
    
      35672
          return ret & 0xFFFF;
    
      }
    
      #define PIX_SADXY(suf)                                                  \
    
      static int sad8_xy2_ ## suf(MPVEncContext *v, const uint8_t *blk2,      \
    
                                  const uint8_t *blk1, ptrdiff_t stride, int h) \
    
      {                                                                       \
    
          __asm__ volatile (                                                  \
    
              "pxor %%mm7, %%mm7     \n\t"                                    \
    
              "pxor %%mm6, %%mm6     \n\t"                                    \
    
              ::);                                                            \
    
                                                                              \
    
          sad8_4_ ## suf(blk1, blk2, stride, h);                              \
    
                                                                              \
    
          return sum_ ## suf();                                               \
    
      }                                                                       \
    
                                                                              \
    
      static int sad16_xy2_ ## suf(MPVEncContext *v, const uint8_t *blk2,     \
    
                                   const uint8_t *blk1, ptrdiff_t stride, int h) \
    
      {                                                                       \
    
          __asm__ volatile (                                                  \
    
              "pxor %%mm7, %%mm7     \n\t"                                    \
    
              "pxor %%mm6, %%mm6     \n\t"                                    \
    
              ::);                                                            \
    
                                                                              \
    
          sad8_4_ ## suf(blk1,     blk2,     stride, h);                      \
    
          sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h);                      \
    
                                                                              \
    
          return sum_ ## suf();                                               \
    
      }                                                                       \
    
      71344
      PIX_SADXY(mmx)
    
      #endif /* HAVE_INLINE_ASM */
    
      1069
      av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
    
      {
    
      1069
          int cpu_flags = av_get_cpu_flags();
    
      #if HAVE_INLINE_ASM
    
        2/2✓ Branch 0 taken 71 times.
✓ Branch 1 taken 998 times.

      1069
          if (INLINE_MMX(cpu_flags)) {
    
      71
              c->pix_abs[0][3] = sad16_xy2_mmx;
    
      71
              c->pix_abs[1][3] = sad8_xy2_mmx;
    
          }
    
      #endif /* HAVE_INLINE_ASM */
    
        2/2✓ Branch 0 taken 71 times.
✓ Branch 1 taken 998 times.

      1069
          if (EXTERNAL_MMX(cpu_flags)) {
    
      71
              c->sse[1]            = ff_sse8_mmx;
    
      #if HAVE_X86ASM
    
      71
              c->nsse[0]           = nsse16_mmx;
    
      71
              c->nsse[1]           = nsse8_mmx;
    
      #endif
    
          }
    
        2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 999 times.

      1069
          if (EXTERNAL_MMXEXT(cpu_flags)) {
    
      #if !HAVE_ALIGNED_STACK
    
              c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
    
              c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
    
      #endif
    
      70
              c->sad[0] = ff_sad16_mmxext;
    
      70
              c->sad[1] = ff_sad8_mmxext;
    
      70
              c->pix_abs[0][0] = ff_sad16_mmxext;
    
      70
              c->pix_abs[0][1] = ff_sad16_x2_mmxext;
    
      70
              c->pix_abs[0][2] = ff_sad16_y2_mmxext;
    
      70
              c->pix_abs[1][0] = ff_sad8_mmxext;
    
      70
              c->pix_abs[1][1] = ff_sad8_x2_mmxext;
    
      70
              c->pix_abs[1][2] = ff_sad8_y2_mmxext;
    
      70
              c->vsad[4] = ff_vsad_intra16_mmxext;
    
      70
              c->vsad[5] = ff_vsad_intra8_mmxext;
    
        2/2✓ Branch 0 taken 57 times.
✓ Branch 1 taken 13 times.

      70
              if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
    
      57
                  c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
    
      57
                  c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
    
      57
                  c->vsad[0] = ff_vsad16_approx_mmxext;
    
      57
                  c->vsad[1] = ff_vsad8_approx_mmxext;
    
              }
    
          }
    
        2/2✓ Branch 0 taken 68 times.
✓ Branch 1 taken 1001 times.

      1069
          if (EXTERNAL_SSE2(cpu_flags)) {
    
      68
              c->sse[0] = ff_sse16_sse2;
    
      68
              c->sum_abs_dctelem   = ff_sum_abs_dctelem_sse2;
    
      #if HAVE_ALIGNED_STACK
    
      68
              c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
    
      68
              c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
    
      #endif
    
        2/4✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 68 times.
✗ Branch 3 not taken.

      68
              if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
    
      68
                  c->sad[0]        = ff_sad16_sse2;
    
      68
                  c->pix_abs[0][0] = ff_sad16_sse2;
    
      68
                  c->pix_abs[0][1] = ff_sad16_x2_sse2;
    
      68
                  c->pix_abs[0][2] = ff_sad16_y2_sse2;
    
      68
                  c->vsad[4]       = ff_vsad_intra16_sse2;
    
        2/2✓ Branch 0 taken 57 times.
✓ Branch 1 taken 11 times.

      68
                  if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
    
      57
                      c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
    
      57
                      c->vsad[0]       = ff_vsad16_approx_sse2;
    
                  }
    
              }
    
          }
    
        2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 1003 times.

      1069
          if (EXTERNAL_SSSE3(cpu_flags)) {
    
      66
              c->sum_abs_dctelem   = ff_sum_abs_dctelem_ssse3;
    
      #if HAVE_ALIGNED_STACK
    
      66
              c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
    
      66
              c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
    
      #endif
    
          }
    
      1069
      }

Line	Branch	Exec	Source
1			/*
2			* SIMD-optimized motion estimation
3			* Copyright (c) 2000, 2001 Fabrice Bellard
4			* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5			*
6			* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7			*
8			* This file is part of FFmpeg.
9			*
10			* FFmpeg is free software; you can redistribute it and/or
11			* modify it under the terms of the GNU Lesser General Public
12			* License as published by the Free Software Foundation; either
13			* version 2.1 of the License, or (at your option) any later version.
14			*
15			* FFmpeg is distributed in the hope that it will be useful,
16			* but WITHOUT ANY WARRANTY; without even the implied warranty of
17			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18			* Lesser General Public License for more details.
19			*
20			* You should have received a copy of the GNU Lesser General Public
21			* License along with FFmpeg; if not, write to the Free Software
22			* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23			*/
24
25			#include "libavutil/attributes.h"
26			#include "libavutil/cpu.h"
27			#include "libavutil/mem_internal.h"
28			#include "libavutil/x86/asm.h"
29			#include "libavutil/x86/cpu.h"
30			#include "libavcodec/me_cmp.h"
31			#include "libavcodec/mpegvideoenc.h"
32
33			int ff_sum_abs_dctelem_sse2(const int16_t *block);
34			int ff_sum_abs_dctelem_ssse3(const int16_t *block);
35			int ff_sse8_mmx(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
36			ptrdiff_t stride, int h);
37			int ff_sse16_mmx(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
38			ptrdiff_t stride, int h);
39			int ff_sse16_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
40			ptrdiff_t stride, int h);
41			int ff_hf_noise8_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
42			int ff_hf_noise16_mmx(const uint8_t *pix1, ptrdiff_t stride, int h);
43			int ff_sad8_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
44			ptrdiff_t stride, int h);
45			int ff_sad16_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
46			ptrdiff_t stride, int h);
47			int ff_sad16_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
48			ptrdiff_t stride, int h);
49			int ff_sad8_x2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
50			ptrdiff_t stride, int h);
51			int ff_sad16_x2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
52			ptrdiff_t stride, int h);
53			int ff_sad16_x2_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
54			ptrdiff_t stride, int h);
55			int ff_sad8_y2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
56			ptrdiff_t stride, int h);
57			int ff_sad16_y2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
58			ptrdiff_t stride, int h);
59			int ff_sad16_y2_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
60			ptrdiff_t stride, int h);
61			int ff_sad8_approx_xy2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
62			ptrdiff_t stride, int h);
63			int ff_sad16_approx_xy2_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
64			ptrdiff_t stride, int h);
65			int ff_sad16_approx_xy2_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
66			ptrdiff_t stride, int h);
67			int ff_vsad_intra8_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
68			ptrdiff_t stride, int h);
69			int ff_vsad_intra16_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
70			ptrdiff_t stride, int h);
71			int ff_vsad_intra16_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
72			ptrdiff_t stride, int h);
73			int ff_vsad8_approx_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
74			ptrdiff_t stride, int h);
75			int ff_vsad16_approx_mmxext(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
76			ptrdiff_t stride, int h);
77			int ff_vsad16_approx_sse2(MPVEncContext v, const uint8_t pix1, const uint8_t *pix2,
78			ptrdiff_t stride, int h);
79
80			#define hadamard_func(cpu) \
81			int ff_hadamard8_diff_ ## cpu(MPVEncContext s, const uint8_t src1, \
82			const uint8_t *src2, ptrdiff_t stride, int h); \
83			int ff_hadamard8_diff16_ ## cpu(MPVEncContext s, const uint8_t src1, \
84			const uint8_t *src2, ptrdiff_t stride, int h);
85
86			hadamard_func(mmxext)
87			hadamard_func(sse2)
88			hadamard_func(ssse3)
89
90			#if HAVE_X86ASM
91		16	static int nsse16_mmx(MPVEncContext c, const uint8_t pix1, const uint8_t *pix2,
92			ptrdiff_t stride, int h)
93			{
94			int score1, score2;
95
96	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	if (c)
97		✗	score1 = c->sse_cmp[0](c, pix1, pix2, stride, h);
98			else
99		16	score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
100		16	score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)
101		16	- ff_hf_noise16_mmx(pix2, stride, h) - ff_hf_noise8_mmx(pix2+8, stride, h);
102
103	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	if (c)
104		✗	return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
105			else
106		16	return score1 + FFABS(score2) * 8;
107			}
108
109		16	static int nsse8_mmx(MPVEncContext c, const uint8_t pix1, const uint8_t *pix2,
110			ptrdiff_t stride, int h)
111			{
112		16	int score1 = ff_sse8_mmx(c, pix1, pix2, stride, h);
113		16	int score2 = ff_hf_noise8_mmx(pix1, stride, h) -
114		16	ff_hf_noise8_mmx(pix2, stride, h);
115
116	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	if (c)
117		✗	return score1 + FFABS(score2) * c->c.avctx->nsse_weight;
118			else
119		16	return score1 + FFABS(score2) * 8;
120			}
121
122			#endif /* HAVE_X86ASM */
123
124			#if HAVE_INLINE_ASM
125
126			DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
127			0x0000000000000000ULL,
128			0x0001000100010001ULL,
129			0x0002000200020002ULL,
130			};
131
132		71328	static inline void sad8_4_mmx(const uint8_t blk1, const uint8_t blk2,
133			ptrdiff_t stride, int h)
134			{
135		71328	x86_reg len = -stride * h;
136		71328	__asm__ volatile (
137			"movq (%1, %%"FF_REG_a"), %%mm0\n\t"
138			"movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
139			"movq %%mm0, %%mm1 \n\t"
140			"movq %%mm2, %%mm3 \n\t"
141			"punpcklbw %%mm7, %%mm0 \n\t"
142			"punpckhbw %%mm7, %%mm1 \n\t"
143			"punpcklbw %%mm7, %%mm2 \n\t"
144			"punpckhbw %%mm7, %%mm3 \n\t"
145			"paddw %%mm2, %%mm0 \n\t"
146			"paddw %%mm3, %%mm1 \n\t"
147			".p2align 4 \n\t"
148			"1: \n\t"
149			"movq (%2, %%"FF_REG_a"), %%mm2\n\t"
150			"movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
151			"movq %%mm2, %%mm3 \n\t"
152			"movq %%mm4, %%mm5 \n\t"
153			"punpcklbw %%mm7, %%mm2 \n\t"
154			"punpckhbw %%mm7, %%mm3 \n\t"
155			"punpcklbw %%mm7, %%mm4 \n\t"
156			"punpckhbw %%mm7, %%mm5 \n\t"
157			"paddw %%mm4, %%mm2 \n\t"
158			"paddw %%mm5, %%mm3 \n\t"
159			"movq %5, %%mm5 \n\t"
160			"paddw %%mm2, %%mm0 \n\t"
161			"paddw %%mm3, %%mm1 \n\t"
162			"paddw %%mm5, %%mm0 \n\t"
163			"paddw %%mm5, %%mm1 \n\t"
164			"movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
165			"movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
166			"psrlw $2, %%mm0 \n\t"
167			"psrlw $2, %%mm1 \n\t"
168			"packuswb %%mm1, %%mm0 \n\t"
169			"psubusb %%mm0, %%mm4 \n\t"
170			"psubusb %%mm5, %%mm0 \n\t"
171			"por %%mm4, %%mm0 \n\t"
172			"movq %%mm0, %%mm4 \n\t"
173			"punpcklbw %%mm7, %%mm0 \n\t"
174			"punpckhbw %%mm7, %%mm4 \n\t"
175			"paddw %%mm0, %%mm6 \n\t"
176			"paddw %%mm4, %%mm6 \n\t"
177			"movq %%mm2, %%mm0 \n\t"
178			"movq %%mm3, %%mm1 \n\t"
179			"add %4, %%"FF_REG_a" \n\t"
180			" js 1b \n\t"
181			: "+a" (len)
182		71328	: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
183			"r" (stride), "m" (round_tab[2]));
184		71328	}
185
186		35672	static inline int sum_mmx(void)
187			{
188			int ret;
189		35672	__asm__ volatile (
190			"movq %%mm6, %%mm0 \n\t"
191			"psrlq $32, %%mm6 \n\t"
192			"paddw %%mm0, %%mm6 \n\t"
193			"movq %%mm6, %%mm0 \n\t"
194			"psrlq $16, %%mm6 \n\t"
195			"paddw %%mm0, %%mm6 \n\t"
196			"movd %%mm6, %0 \n\t"
197			: "=r" (ret));
198		35672	return ret & 0xFFFF;
199			}
200
201			#define PIX_SADXY(suf) \
202			static int sad8_xy2_ ## suf(MPVEncContext v, const uint8_t blk2, \
203			const uint8_t *blk1, ptrdiff_t stride, int h) \
204			{ \
205			__asm__ volatile ( \
206			"pxor %%mm7, %%mm7 \n\t" \
207			"pxor %%mm6, %%mm6 \n\t" \
208			::); \
209			\
210			sad8_4_ ## suf(blk1, blk2, stride, h); \
211			\
212			return sum_ ## suf(); \
213			} \
214			\
215			static int sad16_xy2_ ## suf(MPVEncContext v, const uint8_t blk2, \
216			const uint8_t *blk1, ptrdiff_t stride, int h) \
217			{ \
218			__asm__ volatile ( \
219			"pxor %%mm7, %%mm7 \n\t" \
220			"pxor %%mm6, %%mm6 \n\t" \
221			::); \
222			\
223			sad8_4_ ## suf(blk1, blk2, stride, h); \
224			sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
225			\
226			return sum_ ## suf(); \
227			} \
228
229		71344	PIX_SADXY(mmx)
230
231			#endif /* HAVE_INLINE_ASM */
232
233		1069	av_cold void ff_me_cmp_init_x86(MECmpContext c, AVCodecContext avctx)
234			{
235		1069	int cpu_flags = av_get_cpu_flags();
236
237			#if HAVE_INLINE_ASM
238	2/2 ✓ Branch 0 taken 71 times. ✓ Branch 1 taken 998 times.	1069	if (INLINE_MMX(cpu_flags)) {
239		71	c->pix_abs[0][3] = sad16_xy2_mmx;
240		71	c->pix_abs[1][3] = sad8_xy2_mmx;
241			}
242
243			#endif /* HAVE_INLINE_ASM */
244
245	2/2 ✓ Branch 0 taken 71 times. ✓ Branch 1 taken 998 times.	1069	if (EXTERNAL_MMX(cpu_flags)) {
246		71	c->sse[1] = ff_sse8_mmx;
247			#if HAVE_X86ASM
248		71	c->nsse[0] = nsse16_mmx;
249		71	c->nsse[1] = nsse8_mmx;
250			#endif
251			}
252
253	2/2 ✓ Branch 0 taken 70 times. ✓ Branch 1 taken 999 times.	1069	if (EXTERNAL_MMXEXT(cpu_flags)) {
254			#if !HAVE_ALIGNED_STACK
255			c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
256			c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
257			#endif
258
259		70	c->sad[0] = ff_sad16_mmxext;
260		70	c->sad[1] = ff_sad8_mmxext;
261
262		70	c->pix_abs[0][0] = ff_sad16_mmxext;
263		70	c->pix_abs[0][1] = ff_sad16_x2_mmxext;
264		70	c->pix_abs[0][2] = ff_sad16_y2_mmxext;
265		70	c->pix_abs[1][0] = ff_sad8_mmxext;
266		70	c->pix_abs[1][1] = ff_sad8_x2_mmxext;
267		70	c->pix_abs[1][2] = ff_sad8_y2_mmxext;
268
269		70	c->vsad[4] = ff_vsad_intra16_mmxext;
270		70	c->vsad[5] = ff_vsad_intra8_mmxext;
271
272	2/2 ✓ Branch 0 taken 57 times. ✓ Branch 1 taken 13 times.	70	if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
273		57	c->pix_abs[0][3] = ff_sad16_approx_xy2_mmxext;
274		57	c->pix_abs[1][3] = ff_sad8_approx_xy2_mmxext;
275
276		57	c->vsad[0] = ff_vsad16_approx_mmxext;
277		57	c->vsad[1] = ff_vsad8_approx_mmxext;
278			}
279			}
280
281	2/2 ✓ Branch 0 taken 68 times. ✓ Branch 1 taken 1001 times.	1069	if (EXTERNAL_SSE2(cpu_flags)) {
282		68	c->sse[0] = ff_sse16_sse2;
283		68	c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2;
284
285			#if HAVE_ALIGNED_STACK
286		68	c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
287		68	c->hadamard8_diff[1] = ff_hadamard8_diff_sse2;
288			#endif
289	2/4 ✓ Branch 0 taken 68 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 68 times. ✗ Branch 3 not taken.	68	if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
290		68	c->sad[0] = ff_sad16_sse2;
291		68	c->pix_abs[0][0] = ff_sad16_sse2;
292		68	c->pix_abs[0][1] = ff_sad16_x2_sse2;
293		68	c->pix_abs[0][2] = ff_sad16_y2_sse2;
294
295		68	c->vsad[4] = ff_vsad_intra16_sse2;
296	2/2 ✓ Branch 0 taken 57 times. ✓ Branch 1 taken 11 times.	68	if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
297		57	c->pix_abs[0][3] = ff_sad16_approx_xy2_sse2;
298		57	c->vsad[0] = ff_vsad16_approx_sse2;
299			}
300			}
301			}
302
303	2/2 ✓ Branch 0 taken 66 times. ✓ Branch 1 taken 1003 times.	1069	if (EXTERNAL_SSSE3(cpu_flags)) {
304		66	c->sum_abs_dctelem = ff_sum_abs_dctelem_ssse3;
305			#if HAVE_ALIGNED_STACK
306		66	c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
307		66	c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
308			#endif
309			}
310		1069	}
311

Function (Line)	Call count	Block coverage
ff_me_cmp_init_x86 (line 233)	called 1069 times, returned 1069 times	100.0%
nsse16_mmx (line 91)	called 16 times, returned 16 times	82.0%
nsse8_mmx (line 109)	called 16 times, returned 16 times	86.0%
sad16_xy2_mmx (line 229)	called 35656 times, returned 35656 times	100.0%
sad8_4_mmx (line 132)	called 71328 times, returned 71328 times	100.0%
sad8_xy2_mmx (line 229)	called 16 times, returned 16 times	100.0%
sum_mmx (line 186)	called 35672 times, returned 35672 times	100.0%