Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * The simplest mpeg encoder (well, it was the simplest!) | ||
3 | * Copyright (c) 2000,2001 Fabrice Bellard | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | #include "libavutil/attributes.h" | ||
23 | #include "libavutil/cpu.h" | ||
24 | #include "libavutil/mem_internal.h" | ||
25 | #include "libavutil/x86/asm.h" | ||
26 | #include "libavutil/x86/cpu.h" | ||
27 | #include "libavcodec/avcodec.h" | ||
28 | #include "libavcodec/dct.h" | ||
29 | #include "libavcodec/mpegvideoenc.h" | ||
30 | |||
31 | /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ | ||
32 | DECLARE_ALIGNED(16, static const uint16_t, inv_zigzag_direct16)[64] = { | ||
33 | 1, 2, 6, 7, 15, 16, 28, 29, | ||
34 | 3, 5, 8, 14, 17, 27, 30, 43, | ||
35 | 4, 9, 13, 18, 26, 31, 42, 44, | ||
36 | 10, 12, 19, 25, 32, 41, 45, 54, | ||
37 | 11, 20, 24, 33, 40, 46, 53, 55, | ||
38 | 21, 23, 34, 39, 47, 52, 56, 61, | ||
39 | 22, 35, 38, 48, 51, 57, 60, 62, | ||
40 | 36, 37, 49, 50, 58, 59, 63, 64, | ||
41 | }; | ||
42 | |||
43 | #if HAVE_6REGS | ||
44 | |||
45 | #if HAVE_SSE2_INLINE | ||
46 | #undef COMPILE_TEMPLATE_SSE2 | ||
47 | #undef COMPILE_TEMPLATE_SSSE3 | ||
48 | #define COMPILE_TEMPLATE_SSE2 1 | ||
49 | #define COMPILE_TEMPLATE_SSSE3 0 | ||
50 | #undef RENAME | ||
51 | #undef RENAME_FDCT | ||
52 | #define RENAME(a) a ## _sse2 | ||
53 | #define RENAME_FDCT(a) a ## _sse2 | ||
54 | #include "mpegvideoenc_template.c" | ||
55 | #endif /* HAVE_SSE2_INLINE */ | ||
56 | |||
57 | #if HAVE_SSSE3_INLINE | ||
58 | #undef COMPILE_TEMPLATE_SSE2 | ||
59 | #undef COMPILE_TEMPLATE_SSSE3 | ||
60 | #define COMPILE_TEMPLATE_SSE2 1 | ||
61 | #define COMPILE_TEMPLATE_SSSE3 1 | ||
62 | #undef RENAME | ||
63 | #undef RENAME_FDCT | ||
64 | #define RENAME(a) a ## _ssse3 | ||
65 | #define RENAME_FDCT(a) a ## _sse2 | ||
66 | #include "mpegvideoenc_template.c" | ||
67 | #endif /* HAVE_SSSE3_INLINE */ | ||
68 | |||
69 | #endif /* HAVE_6REGS */ | ||
70 | |||
71 | #if HAVE_INLINE_ASM | ||
72 | #if HAVE_SSE2_INLINE | ||
73 | ✗ | static void denoise_dct_sse2(MpegEncContext *s, int16_t *block){ | |
74 | ✗ | const int intra= s->mb_intra; | |
75 | ✗ | int *sum= s->dct_error_sum[intra]; | |
76 | ✗ | uint16_t *offset= s->dct_offset[intra]; | |
77 | |||
78 | ✗ | s->dct_count[intra]++; | |
79 | |||
80 | ✗ | __asm__ volatile( | |
81 | "pxor %%xmm7, %%xmm7 \n\t" | ||
82 | "1: \n\t" | ||
83 | "pxor %%xmm0, %%xmm0 \n\t" | ||
84 | "pxor %%xmm1, %%xmm1 \n\t" | ||
85 | "movdqa (%0), %%xmm2 \n\t" | ||
86 | "movdqa 16(%0), %%xmm3 \n\t" | ||
87 | "pcmpgtw %%xmm2, %%xmm0 \n\t" | ||
88 | "pcmpgtw %%xmm3, %%xmm1 \n\t" | ||
89 | "pxor %%xmm0, %%xmm2 \n\t" | ||
90 | "pxor %%xmm1, %%xmm3 \n\t" | ||
91 | "psubw %%xmm0, %%xmm2 \n\t" | ||
92 | "psubw %%xmm1, %%xmm3 \n\t" | ||
93 | "movdqa %%xmm2, %%xmm4 \n\t" | ||
94 | "movdqa %%xmm3, %%xmm5 \n\t" | ||
95 | "psubusw (%2), %%xmm2 \n\t" | ||
96 | "psubusw 16(%2), %%xmm3 \n\t" | ||
97 | "pxor %%xmm0, %%xmm2 \n\t" | ||
98 | "pxor %%xmm1, %%xmm3 \n\t" | ||
99 | "psubw %%xmm0, %%xmm2 \n\t" | ||
100 | "psubw %%xmm1, %%xmm3 \n\t" | ||
101 | "movdqa %%xmm2, (%0) \n\t" | ||
102 | "movdqa %%xmm3, 16(%0) \n\t" | ||
103 | "movdqa %%xmm4, %%xmm6 \n\t" | ||
104 | "movdqa %%xmm5, %%xmm0 \n\t" | ||
105 | "punpcklwd %%xmm7, %%xmm4 \n\t" | ||
106 | "punpckhwd %%xmm7, %%xmm6 \n\t" | ||
107 | "punpcklwd %%xmm7, %%xmm5 \n\t" | ||
108 | "punpckhwd %%xmm7, %%xmm0 \n\t" | ||
109 | "paddd (%1), %%xmm4 \n\t" | ||
110 | "paddd 16(%1), %%xmm6 \n\t" | ||
111 | "paddd 32(%1), %%xmm5 \n\t" | ||
112 | "paddd 48(%1), %%xmm0 \n\t" | ||
113 | "movdqa %%xmm4, (%1) \n\t" | ||
114 | "movdqa %%xmm6, 16(%1) \n\t" | ||
115 | "movdqa %%xmm5, 32(%1) \n\t" | ||
116 | "movdqa %%xmm0, 48(%1) \n\t" | ||
117 | "add $32, %0 \n\t" | ||
118 | "add $64, %1 \n\t" | ||
119 | "add $32, %2 \n\t" | ||
120 | "cmp %3, %0 \n\t" | ||
121 | " jb 1b \n\t" | ||
122 | : "+r" (block), "+r" (sum), "+r" (offset) | ||
123 | ✗ | : "r"(block+64) | |
124 | XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3", | ||
125 | "%xmm4", "%xmm5", "%xmm6", "%xmm7") | ||
126 | ); | ||
127 | ✗ | } | |
128 | #endif /* HAVE_SSE2_INLINE */ | ||
129 | #endif /* HAVE_INLINE_ASM */ | ||
130 | |||
131 | 277 | av_cold void ff_dct_encode_init_x86(MpegEncContext *s) | |
132 | { | ||
133 | 277 | const int dct_algo = s->avctx->dct_algo; | |
134 | |||
135 |
3/4✓ Branch 0 taken 273 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 273 times.
|
277 | if (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX) { |
136 | #if HAVE_MMX_INLINE | ||
137 | 4 | int cpu_flags = av_get_cpu_flags(); | |
138 | #if HAVE_SSE2_INLINE | ||
139 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (INLINE_SSE2(cpu_flags)) { |
140 | #if HAVE_6REGS | ||
141 | ✗ | s->dct_quantize = dct_quantize_sse2; | |
142 | #endif | ||
143 | ✗ | s->denoise_dct = denoise_dct_sse2; | |
144 | } | ||
145 | #endif | ||
146 | #if HAVE_6REGS && HAVE_SSSE3_INLINE | ||
147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (INLINE_SSSE3(cpu_flags)) |
148 | ✗ | s->dct_quantize = dct_quantize_ssse3; | |
149 | #endif | ||
150 | #endif | ||
151 | } | ||
152 | 277 | } | |
153 |