FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/x86/mpegvideo.c
Date: 2026-01-16 07:34:38
Exec Total Coverage
Lines: 54 68 79.4%
Functions: 6 7 85.7%
Branches: 14 20 70.0%

Line Branch Exec Source
1 /*
2 * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
3 * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "libavutil/attributes.h"
23 #include "libavutil/avassert.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavutil/x86/cpu.h"
27 #include "libavcodec/mpegvideo.h"
28 #include "libavcodec/mpegvideodata.h"
29 #include "libavcodec/mpegvideo_unquantize.h"
30
31 #if HAVE_SSE2_INLINE
32
33 #define SPLATW(reg) "punpcklwd %%" #reg ", %%" #reg "\n\t" \
34 "pshufd $0, %%" #reg ", %%" #reg "\n\t"
35
36 #if HAVE_SSSE3_INLINE
37
38 194461 static void dct_unquantize_h263_intra_ssse3(const MPVContext *s,
39 int16_t *block, int n, int qscale)
40 {
41 194461 x86_reg qmul = (unsigned)qscale << 1;
42 int level, qadd;
43
44 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
45
46
2/2
✓ Branch 0 taken 194460 times.
✓ Branch 1 taken 1 times.
194461 if (!s->h263_aic) {
47
2/2
✓ Branch 0 taken 129640 times.
✓ Branch 1 taken 64820 times.
194460 if (n < 4)
48 129640 level = block[0] * s->y_dc_scale;
49 else
50 64820 level = block[0] * s->c_dc_scale;
51 194460 qadd = (qscale - 1) | 1;
52 }else{
53 1 qadd = 0;
54 1 level= block[0];
55 }
56
2/2
✓ Branch 0 taken 194460 times.
✓ Branch 1 taken 1 times.
194461 x86_reg offset = s->ac_pred ? 63 << 1 : s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
57
58 194461 __asm__ volatile(
59 "movd %k1, %%xmm0 \n\t" //qmul
60 "lea (%2, %0), %1 \n\t"
61 "neg %0 \n\t"
62 "movd %3, %%xmm1 \n\t" //qadd
63 SPLATW(xmm0)
64 SPLATW(xmm1)
65
66 ".p2align 4 \n\t"
67 "1: \n\t"
68 "movdqa (%1, %0), %%xmm2 \n\t"
69 "movdqa 16(%1, %0), %%xmm3 \n\t"
70
71 "movdqa %%xmm1, %%xmm4 \n\t"
72 "movdqa %%xmm1, %%xmm5 \n\t"
73
74 "psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd
75 "psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd
76
77 "pmullw %%xmm0, %%xmm2 \n\t"
78 "pmullw %%xmm0, %%xmm3 \n\t"
79
80 "paddw %%xmm4, %%xmm2 \n\t"
81 "paddw %%xmm5, %%xmm3 \n\t"
82
83 "movdqa %%xmm2, (%1, %0) \n\t"
84 "movdqa %%xmm3, 16(%1, %0) \n\t"
85
86 "add $32, %0 \n\t"
87 "jng 1b \n\t"
88 : "+r"(offset), "+r"(qmul)
89 : "r" (block), "rm" (qadd)
90 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
91 );
92 194461 block[0]= level;
93 194461 }
94
95
96 49920 static void dct_unquantize_h263_inter_ssse3(const MPVContext *s,
97 int16_t *block, int n, int qscale)
98 {
99 49920 int qmul = qscale << 1;
100 49920 int qadd = (qscale - 1) | 1;
101
102 av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
103
104 49920 x86_reg offset = s->inter_scantable.raster_end[s->block_last_index[n]] << 1;
105
106 49920 __asm__ volatile(
107 "movd %2, %%xmm0 \n\t" //qmul
108 "movd %3, %%xmm1 \n\t" //qadd
109 "add %1, %0 \n\t"
110 "neg %1 \n\t"
111 SPLATW(xmm0)
112 SPLATW(xmm1)
113
114 ".p2align 4 \n\t"
115 "1: \n\t"
116 "movdqa (%0, %1), %%xmm2 \n\t"
117 "movdqa 16(%0, %1), %%xmm3 \n\t"
118
119 "movdqa %%xmm1, %%xmm4 \n\t"
120 "movdqa %%xmm1, %%xmm5 \n\t"
121
122 "psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd
123 "psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd
124
125 "pmullw %%xmm0, %%xmm2 \n\t"
126 "pmullw %%xmm0, %%xmm3 \n\t"
127
128 "paddw %%xmm4, %%xmm2 \n\t"
129 "paddw %%xmm5, %%xmm3 \n\t"
130
131 "movdqa %%xmm2, (%0, %1) \n\t"
132 "movdqa %%xmm3, 16(%0, %1) \n\t"
133
134 "add $32, %1 \n\t"
135 "jng 1b \n\t"
136 : "+r" (block), "+r" (offset)
137 : "rm"(qmul), "rm" (qadd)
138 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
139 );
140 49920 }
141
142 1 static void dct_unquantize_mpeg1_intra_ssse3(const MPVContext *s,
143 int16_t *block, int n, int qscale)
144 {
145 x86_reg nCoeffs;
146 const uint16_t *quant_matrix;
147 int block0;
148
149 av_assert2(s->block_last_index[n]>=0);
150
151 1 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
152
153
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (n < 4)
154 1 block0 = block[0] * s->y_dc_scale;
155 else
156 block0 = block[0] * s->c_dc_scale;
157 /* XXX: only MPEG-1 */
158 1 quant_matrix = s->intra_matrix;
159 1 x86_reg offset = -2 * nCoeffs;
160 1 __asm__ volatile(
161 "movd %3, %%xmm6 \n\t"
162 "pcmpeqw %%xmm7, %%xmm7 \n\t"
163 "psrlw $15, %%xmm7 \n\t"
164 SPLATW(xmm6)
165 ".p2align 4 \n\t"
166 "1: \n\t"
167 "movdqa (%2, %0), %%xmm4 \n\t"
168 "movdqa 16(%2, %0), %%xmm5 \n\t"
169 "movdqa (%1, %0), %%xmm0 \n\t"
170 "movdqa 16(%1, %0), %%xmm1 \n\t"
171 "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
172 "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
173 "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
174 "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
175 "pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])*q
176 "pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])*q
177 "psraw $3, %%xmm2 \n\t"
178 "psraw $3, %%xmm3 \n\t"
179 "psubw %%xmm7, %%xmm2 \n\t"
180 "psubw %%xmm7, %%xmm3 \n\t"
181 "por %%xmm7, %%xmm2 \n\t"
182 "por %%xmm7, %%xmm3 \n\t"
183 "psignw %%xmm0, %%xmm2 \n\t"
184 "psignw %%xmm1, %%xmm3 \n\t"
185 "movdqa %%xmm2, (%1, %0) \n\t"
186 "movdqa %%xmm3, 16(%1, %0) \n\t"
187
188 "add $32, %0 \n\t"
189 "js 1b \n\t"
190 : "+r" (offset)
191 1 : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
192 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
193 "memory"
194 );
195 1 block[0]= block0;
196 1 }
197
198 1 static void dct_unquantize_mpeg1_inter_ssse3(const MPVContext *s,
199 int16_t *block, int n, int qscale)
200 {
201 x86_reg nCoeffs;
202 const uint16_t *quant_matrix;
203
204 av_assert2(s->block_last_index[n]>=0);
205
206 1 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
207
208 1 quant_matrix = s->inter_matrix;
209 1 x86_reg offset = -2 * nCoeffs;
210 1 __asm__ volatile(
211 "movd %3, %%xmm6 \n\t"
212 "pcmpeqw %%xmm7, %%xmm7 \n\t"
213 "psrlw $15, %%xmm7 \n\t"
214 SPLATW(xmm6)
215 ".p2align 4 \n\t"
216 "1: \n\t"
217 "movdqa (%2, %0), %%xmm4 \n\t"
218 "movdqa 16(%2, %0), %%xmm5 \n\t"
219 "movdqa (%1, %0), %%xmm0 \n\t"
220 "movdqa 16(%1, %0), %%xmm1 \n\t"
221 "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
222 "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
223 "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
224 "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
225 "paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2
226 "paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2
227 "paddw %%xmm7, %%xmm2 \n\t" // abs(block[i])*2 + 1
228 "paddw %%xmm7, %%xmm3 \n\t" // abs(block[i])*2 + 1
229 "pmullw %%xmm4, %%xmm2 \n\t" // (abs(block[i])*2 + 1)*q
230 "pmullw %%xmm5, %%xmm3 \n\t" // (abs(block[i])*2 + 1)*q
231 "psraw $4, %%xmm2 \n\t"
232 "psraw $4, %%xmm3 \n\t"
233 "psubw %%xmm7, %%xmm2 \n\t"
234 "psubw %%xmm7, %%xmm3 \n\t"
235 "por %%xmm7, %%xmm2 \n\t"
236 "por %%xmm7, %%xmm3 \n\t"
237 "psignw %%xmm0, %%xmm2 \n\t"
238 "psignw %%xmm1, %%xmm3 \n\t"
239 "movdqa %%xmm2, (%1, %0) \n\t"
240 "movdqa %%xmm3, 16(%1, %0) \n\t"
241
242 "add $32, %0 \n\t"
243 "js 1b \n\t"
244 : "+r" (offset)
245 1 : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
246 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
247 "memory"
248 );
249 1 }
250
251 #endif /* HAVE_SSSE3_INLINE */
252
253 static void dct_unquantize_mpeg2_intra_sse2(const MPVContext *s,
254 int16_t *block, int n, int qscale)
255 {
256 x86_reg nCoeffs;
257 const uint16_t *quant_matrix;
258 int block0;
259
260 av_assert2(s->block_last_index[n]>=0);
261
262 if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
263 else qscale <<= 1;
264
265 nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
266
267 if (n < 4)
268 block0 = block[0] * s->y_dc_scale;
269 else
270 block0 = block[0] * s->c_dc_scale;
271 quant_matrix = s->intra_matrix;
272 x86_reg offset = -2 * nCoeffs;
273 __asm__ volatile(
274 "movd %3, %%xmm6 \n\t"
275 SPLATW(xmm6)
276 ".p2align 4 \n\t"
277 "1: \n\t"
278 "movdqa (%1, %0), %%xmm0 \n\t"
279 "movdqa 16(%1, %0), %%xmm1 \n\t"
280 "movdqa (%2, %0), %%xmm4 \n\t"
281 "movdqa 16(%2, %0), %%xmm5 \n\t"
282 "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
283 "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
284 "movdqa %%xmm0, %%xmm2 \n\t"
285 "movdqa %%xmm1, %%xmm3 \n\t"
286 "psrlw $12, %%xmm2 \n\t" // block[i] < 0 ? 0xf : 0
287 "psrlw $12, %%xmm3 \n\t" // (block[i] is in the -2048..2047 range)
288 "pmullw %%xmm4, %%xmm0 \n\t" // block[i]*q
289 "pmullw %%xmm5, %%xmm1 \n\t" // block[i]*q
290 "paddw %%xmm2, %%xmm0 \n\t" // bias negative block[i]
291 "paddw %%xmm3, %%xmm1 \n\t" // so that a right-shift
292 "psraw $4, %%xmm0 \n\t" // is equivalent to divide
293 "psraw $4, %%xmm1 \n\t" // with rounding towards zero
294 "movdqa %%xmm0, (%1, %0) \n\t"
295 "movdqa %%xmm1, 16(%1, %0) \n\t"
296
297 "add $32, %0 \n\t"
298 "jng 1b \n\t"
299 : "+r" (offset)
300 : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
301 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6",)
302 "memory"
303 );
304 block[0]= block0;
305 //Note, we do not do mismatch control for intra as errors cannot accumulate
306 }
307
308 #if HAVE_SSSE3_INLINE
309
310 5942 static void dct_unquantize_mpeg2_inter_ssse3(const MPVContext *s,
311 int16_t *block, int n, int qscale)
312 {
313 av_assert2(s->block_last_index[n]>=0);
314
315
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5942 times.
5942 x86_reg qscale2 = s->q_scale_type ? ff_mpeg2_non_linear_qscale[qscale] : (unsigned)qscale << 1;
316 5942 x86_reg offset = s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
317 5942 const void *quant_matrix = (const char*)s->inter_matrix + offset;
318
319
320 5942 __asm__ volatile(
321 "movd %k1, %%xmm6 \n\t"
322 "lea (%2, %0), %1 \n\t"
323 "neg %0 \n\t"
324 SPLATW(xmm6)
325 "pcmpeqw %%xmm7, %%xmm7 \n\t"
326 "psrldq $14, %%xmm7 \n\t"
327 ".p2align 4 \n\t"
328 "1: \n\t"
329 "movdqa (%3, %0), %%xmm4 \n\t"
330 "movdqa 16(%3, %0), %%xmm5 \n\t"
331 "movdqa (%1, %0), %%xmm0 \n\t"
332 "movdqa 16(%1, %0), %%xmm1 \n\t"
333 "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
334 "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
335 "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
336 "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
337 "paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2
338 "paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2
339 "pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])*2*q
340 "pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])*2*q
341 "paddw %%xmm4, %%xmm2 \n\t" // (abs(block[i])*2 + 1)*q
342 "paddw %%xmm5, %%xmm3 \n\t" // (abs(block[i])*2 + 1)*q
343 "psrlw $5, %%xmm2 \n\t"
344 "psrlw $5, %%xmm3 \n\t"
345 "psignw %%xmm0, %%xmm2 \n\t"
346 "psignw %%xmm1, %%xmm3 \n\t"
347 "movdqa %%xmm2, (%1, %0) \n\t"
348 "movdqa %%xmm3, 16(%1, %0) \n\t"
349 "pxor %%xmm2, %%xmm7 \n\t"
350 "pxor %%xmm3, %%xmm7 \n\t"
351
352 "add $32, %0 \n\t"
353 "jng 1b \n\t"
354 "movd 124(%2), %%xmm0 \n\t"
355 "movhlps %%xmm7, %%xmm6 \n\t"
356 "pxor %%xmm6, %%xmm7 \n\t"
357 "pshufd $1, %%xmm7, %%xmm6 \n\t"
358 "pxor %%xmm6, %%xmm7 \n\t"
359 "pshuflw $1, %%xmm7, %%xmm6 \n\t"
360 "pxor %%xmm6, %%xmm7 \n\t"
361 "pslld $31, %%xmm7 \n\t"
362 "psrld $15, %%xmm7 \n\t"
363 "pxor %%xmm7, %%xmm0 \n\t"
364 "movd %%xmm0, 124(%2) \n\t"
365
366 : "+r"(offset), "+r" (qscale2)
367 : "r" (block), "r"(quant_matrix)
368 : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
369 "memory"
370 );
371 5942 }
372
373 #endif /* HAVE_SSSE3_INLINE */
374 #endif /* HAVE_SSE2_INLINE */
375
376 696 av_cold void ff_mpv_unquantize_init_x86(MPVUnquantDSPContext *s, int bitexact)
377 {
378 #if HAVE_SSE2_INLINE
379 696 int cpu_flags = av_get_cpu_flags();
380
381
2/2
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 600 times.
696 if (INLINE_SSE2(cpu_flags)) {
382
2/2
✓ Branch 0 taken 81 times.
✓ Branch 1 taken 15 times.
96 if (!bitexact)
383 81 s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_sse2;
384 }
385 #if HAVE_SSSE3_INLINE
386
2/2
✓ Branch 0 taken 94 times.
✓ Branch 1 taken 602 times.
696 if (INLINE_SSSE3(cpu_flags)) {
387 94 s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_ssse3;
388 94 s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_ssse3;
389 94 s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_ssse3;
390 94 s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_ssse3;
391 94 s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_ssse3;
392 }
393 #endif /* HAVE_SSSE3_INLINE */
394 #endif /* HAVE_SSE2_INLINE */
395 696 }
396