FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegvideoencdsp_init.c
Date: 2026-04-24 10:13:59
Exec Total Coverage
Lines: 27 33 81.8%
Functions: 2 3 66.7%
Branches: 15 18 83.3%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <stdint.h>
20
21 #include "libavutil/attributes.h"
22 #include "libavutil/avassert.h"
23 #include "libavutil/common.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/avcodec.h"
28 #include "libavcodec/mpegvideoencdsp.h"
29 #include "mpegvideoencdsp.h"
30
31 int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
32 int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
33 int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
34 void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale);
35
36 #if HAVE_SSSE3_INLINE
37 #define SCALE_OFFSET -1
38
39 #define MAX_ABS 512
40
41 static int try_8x8basis_ssse3(const int16_t rem[64], const int16_t weight[64], const int16_t basis[64], int scale)
42 {
43 x86_reg i=0;
44
45 av_assert2(FFABS(scale) < MAX_ABS);
46 scale *= 1 << (16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT);
47
48 __asm__ volatile(
49 "pxor %%xmm2, %%xmm2 \n\t"
50 "movd %4, %%xmm3 \n\t"
51 "punpcklwd %%xmm3, %%xmm3 \n\t"
52 "pshufd $0, %%xmm3, %%xmm3 \n\t"
53 ".p2align 4 \n\t"
54 "1: \n\t"
55 "movdqa (%1, %0), %%xmm0 \n\t"
56 "movdqa 16(%1, %0), %%xmm1 \n\t"
57 "pmulhrsw %%xmm3, %%xmm0 \n\t"
58 "pmulhrsw %%xmm3, %%xmm1 \n\t"
59 "paddw (%2, %0), %%xmm0 \n\t"
60 "paddw 16(%2, %0), %%xmm1 \n\t"
61 "psraw $6, %%xmm0 \n\t"
62 "psraw $6, %%xmm1 \n\t"
63 "pmullw (%3, %0), %%xmm0 \n\t"
64 "pmullw 16(%3, %0), %%xmm1 \n\t"
65 "pmaddwd %%xmm0, %%xmm0 \n\t"
66 "pmaddwd %%xmm1, %%xmm1 \n\t"
67 "paddd %%xmm1, %%xmm0 \n\t"
68 "psrld $4, %%xmm0 \n\t"
69 "paddd %%xmm0, %%xmm2 \n\t"
70 "add $32, %0 \n\t"
71 "cmp $128, %0 \n\t" //FIXME optimize & bench
72 " jb 1b \n\t"
73 "pshufd $0x0E, %%xmm2, %%xmm0 \n\t"
74 "paddd %%xmm0, %%xmm2 \n\t"
75 "pshufd $0x01, %%xmm2, %%xmm0 \n\t"
76 "paddd %%xmm0, %%xmm2 \n\t"
77 "psrld $2, %%xmm2 \n\t"
78 "movd %%xmm2, %0 \n\t"
79 : "+r" (i)
80 : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
81 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
82 );
83 return i;
84 }
85
86 /* Draw the edges of width 'w' of an image of size width, height */
87 294 static void draw_edges_ssse3(uint8_t *buf, ptrdiff_t wrap, int width, int height,
88 int w, int h, int sides)
89 {
90 294 uint8_t *ptr = buf, *last_line;
91 int i;
92
93 av_assert1(w == 16 || w == 8 || w == 4);
94
95 /* left and right */
96 294 __asm__ volatile (
97 "pcmpeqw %%xmm3, %%xmm3 \n\t"
98 "pxor %%xmm2, %%xmm2 \n\t"
99 "psrlw $14, %%xmm3 \n\t" // pw_3
100 "pshufb %%xmm2, %%xmm3 \n\t" // pb_3
101 "cmp $8, %4 \n\t"
102 "jg 16f \n\t"
103 "jl 4f \n\t"
104 "8: \n\t"
105 "movd (%0), %%xmm0 \n\t"
106 "movd -4(%0, %2), %%xmm1 \n\t"
107 "pshufb %%xmm2, %%xmm0 \n\t"
108 "pshufb %%xmm3, %%xmm1 \n\t"
109 "movq %%xmm0, -8(%0) \n\t"
110 "movq %%xmm1, (%0, %2) \n\t"
111 "add %1, %0 \n\t"
112 "cmp %3, %0 \n\t"
113 "jnz 8b \n\t"
114 "jmp 1f \n\t"
115 "4: \n\t"
116 "movd (%0), %%xmm0 \n\t"
117 "movd -4(%0, %2), %%xmm1 \n\t"
118 "pshufb %%xmm2, %%xmm0 \n\t"
119 "pshufb %%xmm3, %%xmm1 \n\t"
120 "movd %%xmm0, -4(%0) \n\t"
121 "movd %%xmm1, (%0, %2) \n\t"
122 "add %1, %0 \n\t"
123 "cmp %3, %0 \n\t"
124 "jnz 4b \n\t"
125 "jmp 1f \n\t"
126 "16: \n\t"
127 "movd (%0), %%xmm0 \n\t"
128 "movd -4(%0, %2), %%xmm1 \n\t"
129 "pshufb %%xmm2, %%xmm0 \n\t"
130 "pshufb %%xmm3, %%xmm1 \n\t"
131 "movdqu %%xmm0, -16(%0) \n\t"
132 "movdqu %%xmm1, (%0, %2) \n\t"
133 "add %1, %0 \n\t"
134 "cmp %3, %0 \n\t"
135 "jnz 16b \n\t"
136 "1: \n\t"
137 : "+r" (ptr)
138 294 : "r" ((x86_reg) wrap), "r" ((x86_reg) width), "r"(ptr + wrap * height), "r" (w)
139 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
140 );
141
142 /* top and bottom + corners */
143 294 buf -= w;
144 294 last_line = buf + (height - 1) * wrap;
145
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 54 times.
294 if (sides & EDGE_TOP)
146
2/2
✓ Branch 0 taken 2540 times.
✓ Branch 1 taken 240 times.
2780 for (i = 0; i < h; i++)
147 // top
148 2540 memcpy(buf - (i + 1) * wrap, buf, width + w + w);
149
1/2
✓ Branch 0 taken 294 times.
✗ Branch 1 not taken.
294 if (sides & EDGE_BOTTOM)
150
2/2
✓ Branch 0 taken 3116 times.
✓ Branch 1 taken 294 times.
3410 for (i = 0; i < h; i++)
151 // bottom
152 3116 memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
153 294 }
154 #endif /* HAVE_SSSE3_INLINE */
155
156 387 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
157 AVCodecContext *avctx)
158 {
159 387 int cpu_flags = av_get_cpu_flags();
160
161
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 367 times.
387 if (EXTERNAL_SSE2(cpu_flags)) {
162 20 c->denoise_dct = ff_mpv_denoise_dct_sse2;
163 20 c->pix_sum = ff_pix_sum16_sse2;
164 20 c->pix_norm1 = ff_pix_norm1_sse2;
165 }
166
167
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 387 times.
387 if (EXTERNAL_XOP(cpu_flags)) {
168 c->pix_sum = ff_pix_sum16_xop;
169 }
170
171
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 369 times.
387 if (X86_SSSE3(cpu_flags)) {
172 #if HAVE_SSSE3_INLINE
173
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
18 if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
174 16 c->try_8x8basis = try_8x8basis_ssse3;
175 }
176
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (avctx->bits_per_raw_sample <= 8) {
177 18 c->draw_edges = draw_edges_ssse3;
178 }
179 #endif /* HAVE_SSSE3_INLINE */
180 #if HAVE_SSSE3_EXTERNAL
181 18 c->add_8x8basis = ff_add_8x8basis_ssse3;
182 #endif
183 }
184
185 387 }
186