FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegvideoencdsp_init.c
Date: 2025-10-19 14:07:46
Exec Total Coverage
Lines: 41 49 83.7%
Functions: 3 4 75.0%
Branches: 22 30 73.3%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include <stdint.h>
20
21 #include "libavutil/attributes.h"
22 #include "libavutil/avassert.h"
23 #include "libavutil/common.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/avcodec.h"
28 #include "libavcodec/mpegvideoencdsp.h"
29
30 int ff_pix_sum16_sse2(const uint8_t *pix, ptrdiff_t line_size);
31 int ff_pix_sum16_xop(const uint8_t *pix, ptrdiff_t line_size);
32 int ff_pix_norm1_sse2(const uint8_t *pix, ptrdiff_t line_size);
33
34 #if HAVE_INLINE_ASM
35 #if HAVE_SSSE3_INLINE
36 #define SCALE_OFFSET -1
37
38 #define MAX_ABS 512
39
40 static int try_8x8basis_ssse3(const int16_t rem[64], const int16_t weight[64], const int16_t basis[64], int scale)
41 {
42 x86_reg i=0;
43
44 av_assert2(FFABS(scale) < MAX_ABS);
45 scale <<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
46
47 __asm__ volatile(
48 "pxor %%xmm2, %%xmm2 \n\t"
49 "movd %4, %%xmm3 \n\t"
50 "punpcklwd %%xmm3, %%xmm3 \n\t"
51 "pshufd $0, %%xmm3, %%xmm3 \n\t"
52 ".p2align 4 \n\t"
53 "1: \n\t"
54 "movdqa (%1, %0), %%xmm0 \n\t"
55 "movdqa 16(%1, %0), %%xmm1 \n\t"
56 "pmulhrsw %%xmm3, %%xmm0 \n\t"
57 "pmulhrsw %%xmm3, %%xmm1 \n\t"
58 "paddw (%2, %0), %%xmm0 \n\t"
59 "paddw 16(%2, %0), %%xmm1 \n\t"
60 "psraw $6, %%xmm0 \n\t"
61 "psraw $6, %%xmm1 \n\t"
62 "pmullw (%3, %0), %%xmm0 \n\t"
63 "pmullw 16(%3, %0), %%xmm1 \n\t"
64 "pmaddwd %%xmm0, %%xmm0 \n\t"
65 "pmaddwd %%xmm1, %%xmm1 \n\t"
66 "paddd %%xmm1, %%xmm0 \n\t"
67 "psrld $4, %%xmm0 \n\t"
68 "paddd %%xmm0, %%xmm2 \n\t"
69 "add $32, %0 \n\t"
70 "cmp $128, %0 \n\t" //FIXME optimize & bench
71 " jb 1b \n\t"
72 "pshufd $0x0E, %%xmm2, %%xmm0 \n\t"
73 "paddd %%xmm0, %%xmm2 \n\t"
74 "pshufd $0x01, %%xmm2, %%xmm0 \n\t"
75 "paddd %%xmm0, %%xmm2 \n\t"
76 "psrld $2, %%xmm2 \n\t"
77 "movd %%xmm2, %0 \n\t"
78 : "+r" (i)
79 : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
80 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3")
81 );
82 return i;
83 }
84
85 1 static void add_8x8basis_ssse3(int16_t rem[64], const int16_t basis[64], int scale)
86 {
87 1 x86_reg i=0;
88
89
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 if (FFABS(scale) < 1024) {
90 1 scale <<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
91 1 __asm__ volatile(
92 "movd %3, %%xmm2 \n\t"
93 "punpcklwd %%xmm2, %%xmm2 \n\t"
94 "pshufd $0, %%xmm2, %%xmm2 \n\t"
95 ".p2align 4 \n\t"
96 "1: \n\t"
97 "movdqa (%1, %0), %%xmm0 \n\t"
98 "movdqa 16(%1, %0), %%xmm1 \n\t"
99 "pmulhrsw %%xmm2, %%xmm0 \n\t"
100 "pmulhrsw %%xmm2, %%xmm1 \n\t"
101 "paddw (%2, %0), %%xmm0 \n\t"
102 "paddw 16(%2, %0), %%xmm1 \n\t"
103 "movdqa %%xmm0, (%2, %0) \n\t"
104 "movdqa %%xmm1, 16(%2, %0) \n\t"
105 "add $32, %0 \n\t"
106 "cmp $128, %0 \n\t" // FIXME optimize & bench
107 " jb 1b \n\t"
108 : "+r" (i)
109 : "r"(basis), "r"(rem), "g"(scale)
110 XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2")
111 );
112 } else {
113 for (i=0; i<8*8; i++) {
114 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
115 }
116 }
117 1 }
118
119 #endif /* HAVE_SSSE3_INLINE */
120
121 /* Draw the edges of width 'w' of an image of size width, height */
122 165 static void draw_edges_mmx(uint8_t *buf, ptrdiff_t wrap, int width, int height,
123 int w, int h, int sides)
124 {
125 uint8_t *ptr, *last_line;
126 int i;
127
128 /* left and right */
129 165 ptr = buf;
130
2/2
✓ Branch 0 taken 105 times.
✓ Branch 1 taken 60 times.
165 if (w == 8) {
131 105 __asm__ volatile (
132 "1: \n\t"
133 "movd (%0), %%mm0 \n\t"
134 "punpcklbw %%mm0, %%mm0 \n\t"
135 "punpcklwd %%mm0, %%mm0 \n\t"
136 "punpckldq %%mm0, %%mm0 \n\t"
137 "movq %%mm0, -8(%0) \n\t"
138 "movq -8(%0, %2), %%mm1 \n\t"
139 "punpckhbw %%mm1, %%mm1 \n\t"
140 "punpckhwd %%mm1, %%mm1 \n\t"
141 "punpckhdq %%mm1, %%mm1 \n\t"
142 "movq %%mm1, (%0, %2) \n\t"
143 "add %1, %0 \n\t"
144 "cmp %3, %0 \n\t"
145 "jnz 1b \n\t"
146 : "+r" (ptr)
147 105 : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
148 105 "r" (ptr + wrap * height));
149
2/2
✓ Branch 0 taken 55 times.
✓ Branch 1 taken 5 times.
60 } else if (w == 16) {
150 55 __asm__ volatile (
151 "1: \n\t"
152 "movd (%0), %%mm0 \n\t"
153 "punpcklbw %%mm0, %%mm0 \n\t"
154 "punpcklwd %%mm0, %%mm0 \n\t"
155 "punpckldq %%mm0, %%mm0 \n\t"
156 "movq %%mm0, -8(%0) \n\t"
157 "movq %%mm0, -16(%0) \n\t"
158 "movq -8(%0, %2), %%mm1 \n\t"
159 "punpckhbw %%mm1, %%mm1 \n\t"
160 "punpckhwd %%mm1, %%mm1 \n\t"
161 "punpckhdq %%mm1, %%mm1 \n\t"
162 "movq %%mm1, (%0, %2) \n\t"
163 "movq %%mm1, 8(%0, %2) \n\t"
164 "add %1, %0 \n\t"
165 "cmp %3, %0 \n\t"
166 "jnz 1b \n\t"
167 : "+r"(ptr)
168 55 : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
169 );
170 } else {
171 av_assert1(w == 4);
172 5 __asm__ volatile (
173 "1: \n\t"
174 "movd (%0), %%mm0 \n\t"
175 "punpcklbw %%mm0, %%mm0 \n\t"
176 "punpcklwd %%mm0, %%mm0 \n\t"
177 "movd %%mm0, -4(%0) \n\t"
178 "movd -4(%0, %2), %%mm1 \n\t"
179 "punpcklbw %%mm1, %%mm1 \n\t"
180 "punpckhwd %%mm1, %%mm1 \n\t"
181 "punpckhdq %%mm1, %%mm1 \n\t"
182 "movd %%mm1, (%0, %2) \n\t"
183 "add %1, %0 \n\t"
184 "cmp %3, %0 \n\t"
185 "jnz 1b \n\t"
186 : "+r" (ptr)
187 5 : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
188 5 "r" (ptr + wrap * height));
189 }
190
191 /* top and bottom + corners */
192 165 buf -= w;
193 165 last_line = buf + (height - 1) * wrap;
194
1/2
✓ Branch 0 taken 165 times.
✗ Branch 1 not taken.
165 if (sides & EDGE_TOP)
195
2/2
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 165 times.
1905 for (i = 0; i < h; i++)
196 // top
197 1740 memcpy(buf - (i + 1) * wrap, buf, width + w + w);
198
1/2
✓ Branch 0 taken 165 times.
✗ Branch 1 not taken.
165 if (sides & EDGE_BOTTOM)
199
2/2
✓ Branch 0 taken 1740 times.
✓ Branch 1 taken 165 times.
1905 for (i = 0; i < h; i++)
200 // bottom
201 1740 memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
202 165 }
203
204 #endif /* HAVE_INLINE_ASM */
205
206 377 av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
207 AVCodecContext *avctx)
208 {
209 377 int cpu_flags = av_get_cpu_flags();
210
211
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 367 times.
377 if (EXTERNAL_SSE2(cpu_flags)) {
212 10 c->pix_sum = ff_pix_sum16_sse2;
213 10 c->pix_norm1 = ff_pix_norm1_sse2;
214 }
215
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 377 times.
377 if (EXTERNAL_XOP(cpu_flags)) {
217 c->pix_sum = ff_pix_sum16_xop;
218 }
219
220 #if HAVE_INLINE_ASM
221
222
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 364 times.
377 if (INLINE_MMX(cpu_flags)) {
223
1/2
✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
13 if (avctx->bits_per_raw_sample <= 8) {
224 13 c->draw_edges = draw_edges_mmx;
225 }
226 }
227
228 #if HAVE_SSSE3_INLINE
229
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 369 times.
377 if (INLINE_SSSE3(cpu_flags)) {
230
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
8 if (!(avctx->flags & AV_CODEC_FLAG_BITEXACT)) {
231 7 c->try_8x8basis = try_8x8basis_ssse3;
232 }
233 8 c->add_8x8basis = add_8x8basis_ssse3;
234 }
235 #endif /* HAVE_SSSE3_INLINE */
236
237 #endif /* HAVE_INLINE_ASM */
238 377 }
239