FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpeg4videodsp.c
Date: 2026-04-24 19:58:39
Exec Total Coverage
Lines: 39 41 95.1%
Functions: 2 2 100.0%
Branches: 11 16 68.8%

Line Branch Exec Source
1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "config.h"
20 #include "libavutil/attributes.h"
21 #include "libavutil/cpu.h"
22 #include "libavutil/mem_internal.h"
23 #include "libavutil/x86/asm.h"
24 #include "libavutil/x86/cpu.h"
25 #include "libavcodec/mpeg4videodsp.h"
26 #include "videodsp.h"
27
28 #if HAVE_SSSE3_INLINE
29
30 #define SPLATW(reg) "pshuflw $0, %%" #reg ", %%" #reg "\n\t" \
31 "punpcklqdq %%" #reg ", %%" #reg "\n\t"
32
33 typedef struct {
34 DECLARE_ALIGNED_16(uint16_t, u16)[8];
35 } xmm_u16;
36
37 DECLARE_ASM_CONST(16, xmm_u16, pw_0to7) = { { 0, 1, 2, 3, 4, 5, 6, 7 } };
38
39 2 static void gmc_ssse3(uint8_t *dst, const uint8_t *src,
40 int stride, int h, int ox, int oy,
41 int dxx, int dxy, int dyx, int dyy,
42 int shift, int r, int width, int height)
43 {
44 enum {
45 W = 8,
46 EDGE_EMU_STRIDE = 16, //< anything >= W+1 will do
47 MAX_H = 16,
48 };
49 2 const int w = 8;
50 2 const int ix = ox >> (16 + shift);
51 2 const int iy = oy >> (16 + shift);
52 2 const int ox2 = ox & (1 << (16 + shift)) - 1;
53 2 const int oy2 = oy & (1 << (16 + shift)) - 1;
54 2 const int oxs = ox2 >> 4;
55 2 const int oys = oy2 >> 4;
56 2 const int dxx2 = dxx - (1 << (16 + shift));
57 2 const int dyy2 = dyy - (1 << (16 + shift));
58 2 const int dxxs = dxx2 >> 4;
59 2 const int dxys = dxy >> 4;
60 2 const int dyxs = dyx >> 4;
61 2 const int dyys = dyy2 >> 4;
62
63 2 const int dxw = dxx2 * (w - 1);
64 2 const int dyh = dyy2 * (h - 1);
65 2 const int dxh = dxy * (h - 1);
66 2 const int dyw = dyx * (w - 1);
67
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 int need_emu = (unsigned) ix >= width - w || width < w ||
68
4/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
4 (unsigned) iy >= height - h || height< h
69 ;
70
71 2 if ( // non-constant fullpel offset (3% of blocks)
72 2 ((ox2 + dxw) | (ox2 + dxh) | (ox2 + dxw + dxh) |
73
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 (oy2 + dyw) | (oy2 + dyh) | (oy2 + dyw + dyh)) >> (16 + shift) ||
74 // uses more than 16 bits of subpel mv (only at huge resolution)
75
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 (dxx | dxy | dyx | dyy) & 15 ||
76 (!HAVE_SSE2_EXTERNAL && need_emu)) {
77 ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
78 shift, r, width, height);
79 return;
80 }
81
82 2 src += ix + iy * stride;
83 2 const ptrdiff_t dst_stride = stride;
84 2 ptrdiff_t src_stride = stride;
85 #if HAVE_SSE2_EXTERNAL
86 uint8_t edge_buf[(MAX_H + 1) * EDGE_EMU_STRIDE];
87
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (need_emu) {
88 1 ff_emulated_edge_mc_sse2(edge_buf, src, EDGE_EMU_STRIDE, src_stride,
89 w + 1, h + 1, ix, iy, width, height);
90 1 src = edge_buf;
91 1 src_stride = EDGE_EMU_STRIDE;
92 }
93 #endif
94
95 #if ARCH_X86_32
96 xmm_u16 dxy8, dyy8, r8;
97 DECLARE_ALIGNED_16(uint64_t, shift2) = 2 * shift;
98 #endif
99
100 2 __asm__ volatile (
101 "movd %[dxxs], %%xmm2 \n\t"
102 "movd %[dyxs], %%xmm3 \n\t"
103 "movd %[oxs], %%xmm1 \n\t"
104 SPLATW(xmm2)
105 "movd %[oys], %%xmm7 \n\t"
106 SPLATW(xmm3)
107 "pmullw "MANGLE(pw_0to7)", %%xmm2 \n\t"
108 SPLATW(xmm1)
109 "movd %[s], %%xmm6 \n\t"
110 "pmullw "MANGLE(pw_0to7)", %%xmm3 \n\t"
111 "movq (%[src]), %%xmm5 \n\t"
112 SPLATW(xmm7)
113 #if ARCH_X86_32
114 "movd %[dxys], %%xmm0 \n\t"
115 #else
116 "movd %[dxys], %%xmm11 \n\t"
117 #endif
118 "paddw %%xmm2, %%xmm1 \n\t"
119 "movq 1(%[src]), %%xmm2 \n\t"
120 SPLATW(xmm6)
121 #if ARCH_X86_32
122 "movd %[dyys], %%xmm4 \n\t"
123 #else
124 "movd %[dyys], %%xmm9 \n\t"
125 #endif
126 "paddw %%xmm3, %%xmm7 \n\t"
127 "punpcklbw %%xmm2, %%xmm5 \n\t"
128 #if ARCH_X86_32
129 SPLATW(xmm0)
130 "movd %[r], %%xmm2 \n\t"
131 SPLATW(xmm4)
132 "movdqa %%xmm0, %[dxy8] \n\t"
133 SPLATW(xmm2)
134 "movdqa %%xmm4, %[dyy8] \n\t"
135 "movdqa %%xmm2, %[r8] \n\t"
136 #else
137 SPLATW(xmm11)
138 "movd %[r], %%xmm8 \n\t"
139 SPLATW(xmm9)
140 SPLATW(xmm8)
141 "movd %[shift2], %%xmm12 \n\t"
142 #endif
143
144 "1: \n\t"
145 "add %[src_stride], %[src] \n\t"
146 "movq (%[src]), %%xmm3 \n\t"
147 "movq 1(%[src]), %%xmm0 \n\t"
148 "movdqa %%xmm1, %%xmm4 \n\t"
149 "psrlw $12, %%xmm4 \n\t" // dx
150 "movdqa %%xmm6, %%xmm2 \n\t"
151 "psubw %%xmm4, %%xmm2 \n\t" // (s-dx)
152 "psllw $8, %%xmm4 \n\t"
153 "por %%xmm4, %%xmm2 \n\t" // s-dx,dx,s-dx,dx (bytes)
154 "pmaddubsw %%xmm2, %%xmm5 \n\t" // src[0, 0] * (s - dx) + src[1,0] * dx
155 "punpcklbw %%xmm0, %%xmm3 \n\t"
156 "movdqa %%xmm3, %%xmm0 \n\t"
157 "pmaddubsw %%xmm2, %%xmm3 \n\t" // src[0, 1] * (s - dx) + src[1,1] * dx
158 #if ARCH_X86_32
159 "paddw %[dxy8], %%xmm1 \n\t"
160 #else
161 "paddw %%xmm11, %%xmm1 \n\t"
162 #endif
163 "movdqa %%xmm7, %%xmm4 \n\t"
164 "movdqa %%xmm6, %%xmm2 \n\t"
165 "psrlw $12, %%xmm4 \n\t" // dy
166 "psubw %%xmm4, %%xmm2 \n\t" // (s-dy)
167 "pmullw %%xmm5, %%xmm2 \n\t" // (src[0, 0] * (s - dx) + src[1,0] * dx) * (s - dy)
168 #if ARCH_X86_32
169 "paddw %[dyy8], %%xmm7 \n\t"
170 #else
171 "paddw %%xmm9, %%xmm7 \n\t"
172 #endif
173 "pmullw %%xmm3, %%xmm4 \n\t" // (src[0, 1] * (s - dx) + src[1,1] * dx) * dy
174
175 #if ARCH_X86_32
176 "paddw %[r8], %%xmm2 \n\t"
177 #else
178 "paddw %%xmm8, %%xmm2 \n\t"
179 #endif
180 "paddw %%xmm2, %%xmm4 \n\t"
181
182 #if ARCH_X86_32
183 "psrlw %[shift2], %%xmm4 \n\t"
184 #else
185 "psrlw %%xmm12, %%xmm4 \n\t"
186 #endif
187 "packuswb %%xmm4, %%xmm4 \n\t"
188 "movq %%xmm4, (%[dst]) \n\t"
189 "movdqa %%xmm0, %%xmm5 \n\t"
190 "add %[dst_stride], %[dst] \n\t"
191
192 "decl %[h] \n\t"
193 "jnz 1b \n\t"
194 : [dst]"+r"(dst), [src]"+r"(src),
195 #if HAVE_6REGS || HAVE_INLINE_ASM_DIRECT_SYMBOL_REFS
196 [h]"+r"(h)
197 #else
198 [h]"+m"(h)
199 #endif
200 #if ARCH_X86_32
201 , [dxy8]"=m" (dxy8), [dyy8]"=m" (dyy8), [r8]"=m" (r8)
202 #endif
203 : [dst_stride]"r"(dst_stride), [src_stride]"r"(src_stride),
204 2 [s]"g" (1 << shift),
205 #if ARCH_X86_32
206 [shift2]"m" (shift2),
207 #else
208 2 [shift2]"g" (2*shift),
209 #endif
210 [oxs]"g"(oxs), [oys]"g"(oys), [dxxs]"g"(dxxs), [dyxs]"g"(dyxs),
211 [dxys]"g"(dxys), [dyys]"g"(dyys), [r]"g"(r) NAMED_CONSTRAINTS_ADD(pw_0to7)
212 : XMM_CLOBBERS("xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",)
213 #if ARCH_X86_64
214 XMM_CLOBBERS("xmm8", "xmm9", "xmm10", "xmm11", "xmm12",)
215 #endif
216 "memory");
217 }
218
219 #endif /* HAVE_SSSE3_INLINE */
220
221 201 av_cold void ff_mpeg4videodsp_init_x86(Mpeg4VideoDSPContext *c)
222 {
223 #if HAVE_SSSE3_INLINE
224 201 int cpu_flags = av_get_cpu_flags();
225
226
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 159 times.
201 if (INLINE_SSSE3(cpu_flags))
227 42 c->gmc = gmc_ssse3;
228 #endif /* HAVE_SSSE3_INLINE */
229 201 }
230