FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegvideoencdsp_init.c
Date: 2026-01-16 07:34:38
Exec Total Coverage
Lines: 36 42 85.7%
Functions: 2 3 66.7%
Branches: 20 24 83.3%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <stdint.h>
20
21 #include "libavutil/attributes.h"
22 #include "libavutil/avassert.h"
23 #include "libavutil/common.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/avcodec.h"
28 #include "libavcodec/mpegvideoencdsp.h"
29
30 void ff_mpv_denoise_dct_sse2(int16_t block[64], int dct_error_sum[64],
31 const uint16_t dct_offset[64]);
32 int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
33 int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
34 int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
35 void ff_add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale);
36
37 #if HAVE_INLINE_ASM
38 #if HAVE_SSSE3_INLINE
39 #define SCALE_OFFSET -1
40
41 #define MAX_ABS 512
42
43 static int try_8x8basis_ssse3(const int16_t rem[64], const int16_t weight[64], const int16_t basis[64], int scale)
44 {
45 x86_reg i=0;
46
47 av_assert2(FFABS(scale) < MAX_ABS);
48 scale *= 1 << (16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT);
49
50 __asm__ volatile(
51 "pxor %%xmm2, %%xmm2 \n\t"
52 "movd %4, %%xmm3 \n\t"
53 "punpcklwd %%xmm3, %%xmm3 \n\t"
54 "pshufd $0, %%xmm3, %%xmm3 \n\t"
55 ".p2align 4 \n\t"
56 "1: \n\t"
57 "movdqa (%1, %0), %%xmm0 \n\t"
58 "movdqa 16(%1, %0), %%xmm1 \n\t"
59 "pmulhrsw %%xmm3, %%xmm0 \n\t"
60 "pmulhrsw %%xmm3, %%xmm1 \n\t"
61 "paddw (%2, %0), %%xmm0 \n\t"
62 "paddw 16(%2, %0), %%xmm1 \n\t"
63 "psraw $6, %%xmm0 \n\t"
64 "psraw $6, %%xmm1 \n\t"
65 "pmullw (%3, %0), %%xmm0 \n\t"
66 "pmullw 16(%3, %0), %%xmm1 \n\t"
67 "pmaddwd %%xmm0, %%xmm0 \n\t"
68 "pmaddwd %%xmm1, %%xmm1 \n\t"
69 "paddd %%xmm1, %%xmm0 \n\t"
70 "psrld $4, %%xmm0 \n\t"
71 "paddd %%xmm0, %%xmm2 \n\t"
72 "add $32, %0 \n\t"
73 "cmp $128, %0 \n\t" //FIXME optimize & bench
74 " jb 1b \n\t"
75 "pshufd $0x0E, %%xmm2, %%xmm0 \n\t"
76 "paddd %%xmm0, %%xmm2 \n\t"
77 "pshufd $0x01, %%xmm2, %%xmm0 \n\t"
78 "paddd %%xmm0, %%xmm2 \n\t"
79 "psrld $2, %%xmm2 \n\t"
80 "movd %%xmm2, %0 \n\t"
81 : "+r" (i)
82 : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
83 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
84 );
85 return i;
86 }
87 #endif /* HAVE_SSSE3_INLINE */
88
89 /* Draw the edges of width 'w' of an image of size width, height */
90 165 static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap, int width, int height,
91 int w, int h, int sides)
92 {
93 uint8_t *ptr, *last_line;
94 int i;
95
96 /* left and right */
97 165 ptr = buf;
98
2/2
✓ Branch 0 taken 105 times.
✓ Branch 1 taken 60 times.
165 if (w == 8) {
99 105 __asm__ volatile (
100 "1: \n\t"
101 "movd (%0), %%mm0 \n\t"
102 "punpcklbw %%mm0, %%mm0 \n\t"
103 "punpcklwd %%mm0, %%mm0 \n\t"
104 "punpckldq %%mm0, %%mm0 \n\t"
105 "movq %%mm0, -8(%0) \n\t"
106 "movq -8(%0, %2), %%mm1 \n\t"
107 "punpckhbw %%mm1, %%mm1 \n\t"
108 "punpckhwd %%mm1, %%mm1 \n\t"
109 "punpckhdq %%mm1, %%mm1 \n\t"
110 "movq %%mm1, (%0, %2) \n\t"
111 "add %1, %0 \n\t"
112 "cmp %3, %0 \n\t"
113 "jnz 1b \n\t"
114 : "+r" (ptr)
115 105 : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
116 105 "r" (ptr + wrap * height));
117
2/2
✓ Branch 0 taken 55 times.
✓ Branch 1 taken 5 times.
60 } else if (w == 16) {
118 55 __asm__ volatile (
119 "1: \n\t"
120 "movd (%0), %%mm0 \n\t"
121 "punpcklbw %%mm0, %%mm0 \n\t"
122 "punpcklwd %%mm0, %%mm0 \n\t"
123 "punpckldq %%mm0, %%mm0 \n\t"
124 "movq %%mm0, -8(%0) \n\t"
125 "movq %%mm0, -16(%0) \n\t"
126 "movq -8(%0, %2), %%mm1 \n\t"
127 "punpckhbw %%mm1, %%mm1 \n\t"
128 "punpckhwd %%mm1, %%mm1 \n\t"
129 "punpckhdq %%mm1, %%mm1 \n\t"
130 "movq %%mm1, (%0, %2) \n\t"
131 "movq %%mm1, 8(%0, %2) \n\t"
132 "add %1, %0 \n\t"
133 "cmp %3, %0 \n\t"
134 "jnz 1b \n\t"
135 : "+r"(ptr)
136 55 : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
137 );
138 } else {
139 av_assert1(w == 4);
140 5 __asm__ volatile (
141 "1: \n\t"
142 "movd (%0), %%mm0 \n\t"
143 "punpcklbw %%mm0, %%mm0 \n\t"
144 "punpcklwd %%mm0, %%mm0 \n\t"
145 "movd %%mm0, -4(%0) \n\t"
146 "movd -4(%0, %2), %%mm1 \n\t"
147 "punpcklbw %%mm1, %%mm1 \n\t"
148 "punpckhwd %%mm1, %%mm1 \n\t"
149 "punpckhdq %%mm1, %%mm1 \n\t"
150 "movd %%mm1, (%0, %2) \n\t"
151 "add %1, %0 \n\t"
152 "cmp %3, %0 \n\t"
153 "jnz 1b \n\t"
154 : "+r" (ptr)
155 5 : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
156 5 "r" (ptr + wrap * height));
157 }
158
159 /* top and bottom + corners */
160 165 buf -= w;
161 165 last_line = buf + (height - 1) * wrap;
162
1/2
✓ Branch 0 taken 165 times.
✗ Branch 1 not taken.
165 if (sides & EDGE_TOP)
163
2/2
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 165 times.
1905 for (i = 0; i < h; i++)
164 // top
165 1740 memcpy(buf - (i + 1) * wrap, buf, width + w + w);
166
1/2
✓ Branch 0 taken 165 times.
✗ Branch 1 not taken.
165 if (sides & EDGE_BOTTOM)
167
2/2
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 165 times.
1905 for (i = 0; i < h; i++)
168 // bottom
169 1740 memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
170 165 }
171
172 #endif /* HAVE_INLINE_ASM */
173
174 379 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
175 AVCodecContext *avctx)
176 {
177 379 int cpu_flags = av_get_cpu_flags();
178
179
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 368 times.
379 if (EXTERNAL_SSE2(cpu_flags)) {
180 11 c->denoise_dct = ff_mpv_denoise_dct_sse2;
181 11 c->pix_sum = ff_pix_sum16_sse2;
182 11 c->pix_norm1 = ff_pix_norm1_sse2;
183 }
184
185
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 379 times.
379 if (EXTERNAL_XOP(cpu_flags)) {
186 c->pix_sum = ff_pix_sum16_xop;
187 }
188
189 #if HAVE_INLINE_ASM
190
191
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 365 times.
379 if (INLINE_MMX(cpu_flags)) {
192
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (avctx->bits_per_raw_sample <= 8) {
193 14 c->draw_edges = draw_edges_mmx;
194 }
195 }
196 #endif /* HAVE_INLINE_ASM */
197
198
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 370 times.
379 if (X86_SSSE3(cpu_flags)) {
199 #if HAVE_SSSE3_INLINE
200
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
201 8 c->try_8x8basis = try_8x8basis_ssse3;
202 }
203 #endif /* HAVE_SSSE3_INLINE */
204 #if HAVE_SSSE3_EXTERNAL
205 9 c->add_8x8basis = ff_add_8x8basis_ssse3;
206 #endif
207 }
208
209 379 }
210