Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * SIMD-optimized halfpel functions | ||
3 | * Copyright (c) 2000, 2001 Fabrice Bellard | ||
4 | * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | * | ||
22 | * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | ||
23 | */ | ||
24 | |||
25 | #include "libavutil/attributes.h" | ||
26 | #include "libavutil/cpu.h" | ||
27 | #include "libavutil/x86/cpu.h" | ||
28 | #include "libavcodec/avcodec.h" | ||
29 | #include "libavcodec/hpeldsp.h" | ||
30 | #include "libavcodec/pixels.h" | ||
31 | #include "fpel.h" | ||
32 | #include "hpeldsp.h" | ||
33 | |||
34 | void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
35 | ptrdiff_t line_size, int h); | ||
36 | void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
37 | ptrdiff_t line_size, int h); | ||
38 | void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, | ||
39 | ptrdiff_t line_size, int h); | ||
40 | void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, | ||
41 | ptrdiff_t line_size, int h); | ||
42 | void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, | ||
43 | ptrdiff_t line_size, int h); | ||
44 | void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, | ||
45 | ptrdiff_t line_size, int h); | ||
46 | void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
47 | ptrdiff_t line_size, int h); | ||
48 | void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block, | ||
49 | const uint8_t *pixels, | ||
50 | ptrdiff_t line_size, int h); | ||
51 | void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
52 | ptrdiff_t line_size, int h); | ||
53 | void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
54 | ptrdiff_t line_size, int h); | ||
55 | void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block, | ||
56 | const uint8_t *pixels, | ||
57 | ptrdiff_t line_size, int h); | ||
58 | void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
59 | ptrdiff_t line_size, int h); | ||
60 | void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
61 | ptrdiff_t line_size, int h); | ||
62 | void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, | ||
63 | ptrdiff_t line_size, int h); | ||
64 | |||
65 | #define put_pixels8_mmx ff_put_pixels8_mmx | ||
66 | #define put_pixels16_mmx ff_put_pixels16_mmx | ||
67 | #define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx | ||
68 | #define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx | ||
69 | #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx | ||
70 | |||
71 | #if HAVE_INLINE_ASM | ||
72 | |||
73 | /***********************************/ | ||
74 | /* MMX no rounding */ | ||
75 | #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx | ||
76 | #define SET_RND MOVQ_WONE | ||
77 | #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) | ||
78 | #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) | ||
79 | #define STATIC static | ||
80 | |||
81 | #include "rnd_template.c" | ||
82 | #include "hpeldsp_rnd_template.c" | ||
83 | |||
84 | #undef DEF | ||
85 | #undef SET_RND | ||
86 | #undef PAVGBP | ||
87 | #undef PAVGB | ||
88 | #undef STATIC | ||
89 | |||
90 | #if HAVE_MMX | ||
91 | ✗ | CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8) | |
92 | 1477 | CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8) | |
93 | |||
94 | ✗ | CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8) | |
95 | 10118 | CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) | |
96 | #endif | ||
97 | |||
98 | /***********************************/ | ||
99 | /* MMX rounding */ | ||
100 | |||
101 | #define SET_RND MOVQ_WTWO | ||
102 | #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx | ||
103 | #define STATIC | ||
104 | #define NO_AVG | ||
105 | |||
106 | #include "rnd_template.c" | ||
107 | |||
108 | #undef NO_AVG | ||
109 | #undef DEF | ||
110 | #undef SET_RND | ||
111 | |||
112 | #if HAVE_MMX | ||
113 | ✗ | CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) | |
114 | #endif | ||
115 | |||
116 | #endif /* HAVE_INLINE_ASM */ | ||
117 | |||
118 | |||
119 | #if HAVE_X86ASM | ||
120 | |||
121 | #define HPELDSP_AVG_PIXELS16(CPUEXT) \ | ||
122 | CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \ | ||
123 | CALL_2X_PIXELS(put_pixels16_y2 ## CPUEXT, ff_put_pixels8_y2 ## CPUEXT, 8) \ | ||
124 | CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \ | ||
125 | CALL_2X_PIXELS(avg_pixels16 ## CPUEXT, ff_avg_pixels8 ## CPUEXT, 8) \ | ||
126 | CALL_2X_PIXELS(avg_pixels16_x2 ## CPUEXT, ff_avg_pixels8_x2 ## CPUEXT, 8) \ | ||
127 | CALL_2X_PIXELS(avg_pixels16_y2 ## CPUEXT, ff_avg_pixels8_y2 ## CPUEXT, 8) \ | ||
128 | CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \ | ||
129 | CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8) | ||
130 | |||
131 | 17364 | HPELDSP_AVG_PIXELS16(_mmxext) | |
132 | |||
133 | #endif /* HAVE_X86ASM */ | ||
134 | |||
135 | #define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \ | ||
136 | if (HAVE_MMX_EXTERNAL) \ | ||
137 | c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU | ||
138 | |||
139 | #define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) \ | ||
140 | do { \ | ||
141 | SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU); \ | ||
142 | c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \ | ||
143 | } while (0) | ||
144 | #define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) \ | ||
145 | do { \ | ||
146 | c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ | ||
147 | c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ | ||
148 | } while (0) | ||
149 | #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | ||
150 | do { \ | ||
151 | SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU); \ | ||
152 | SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU); \ | ||
153 | } while (0) | ||
154 | |||
155 | 190 | static void hpeldsp_init_mmx(HpelDSPContext *c, int flags) | |
156 | { | ||
157 | #if HAVE_MMX_INLINE | ||
158 | 190 | SET_HPEL_FUNCS03(put, [0], 16, mmx); | |
159 | 190 | SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx); | |
160 | 190 | SET_HPEL_FUNCS12(avg_no_rnd, , 16, mmx); | |
161 | 190 | c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_mmx; | |
162 | 190 | SET_HPEL_FUNCS03(put, [1], 8, mmx); | |
163 | 190 | SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx); | |
164 | #endif | ||
165 | 190 | } | |
166 | |||
167 | 190 | static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) | |
168 | { | ||
169 | #if HAVE_MMXEXT_EXTERNAL | ||
170 | 190 | c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext; | |
171 | 190 | c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; | |
172 | |||
173 | 190 | c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; | |
174 | 190 | c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; | |
175 | 190 | c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; | |
176 | 190 | c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; | |
177 | |||
178 | 190 | c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext; | |
179 | 190 | c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext; | |
180 | |||
181 | 190 | c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext; | |
182 | 190 | c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext; | |
183 | 190 | c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext; | |
184 | 190 | c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext; | |
185 | |||
186 | 190 | c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext; | |
187 | 190 | c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext; | |
188 | |||
189 | 190 | c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_mmxext; | |
190 | |||
191 |
2/2✓ Branch 0 taken 173 times.
✓ Branch 1 taken 17 times.
|
190 | if (!(flags & AV_CODEC_FLAG_BITEXACT)) { |
192 | 173 | c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext; | |
193 | 173 | c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext; | |
194 | 173 | c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext; | |
195 | 173 | c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext; | |
196 | |||
197 | 173 | c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext; | |
198 | 173 | c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext; | |
199 | } | ||
200 | #endif /* HAVE_MMXEXT_EXTERNAL */ | ||
201 | 190 | } | |
202 | |||
203 | 190 | static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags) | |
204 | { | ||
205 | #if HAVE_SSE2_EXTERNAL | ||
206 | 190 | c->put_pixels_tab[0][0] = ff_put_pixels16_sse2; | |
207 | 190 | c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2; | |
208 | 190 | c->put_pixels_tab[0][1] = ff_put_pixels16_x2_sse2; | |
209 | 190 | c->put_pixels_tab[0][2] = ff_put_pixels16_y2_sse2; | |
210 | 190 | c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_sse2; | |
211 | 190 | c->avg_pixels_tab[0][0] = ff_avg_pixels16_sse2; | |
212 | 190 | c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_sse2; | |
213 | 190 | c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_sse2; | |
214 | 190 | c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_sse2; | |
215 | 190 | c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_sse2; | |
216 | #endif /* HAVE_SSE2_EXTERNAL */ | ||
217 | 190 | } | |
218 | |||
219 | 190 | static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags) | |
220 | { | ||
221 | #if HAVE_SSSE3_EXTERNAL | ||
222 | 190 | c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_ssse3; | |
223 | 190 | c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_ssse3; | |
224 | 190 | c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_ssse3; | |
225 | 190 | c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_ssse3; | |
226 | #endif | ||
227 | 190 | } | |
228 | |||
229 | 1136 | av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) | |
230 | { | ||
231 | 1136 | int cpu_flags = av_get_cpu_flags(); | |
232 | |||
233 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 946 times.
|
1136 | if (INLINE_MMX(cpu_flags)) |
234 | 190 | hpeldsp_init_mmx(c, flags); | |
235 | |||
236 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 946 times.
|
1136 | if (EXTERNAL_MMXEXT(cpu_flags)) |
237 | 190 | hpeldsp_init_mmxext(c, flags); | |
238 | |||
239 |
3/4✓ Branch 0 taken 190 times.
✓ Branch 1 taken 946 times.
✓ Branch 2 taken 190 times.
✗ Branch 3 not taken.
|
1136 | if (EXTERNAL_SSE2_FAST(cpu_flags)) |
240 | 190 | hpeldsp_init_sse2_fast(c, flags); | |
241 | |||
242 |
2/2✓ Branch 0 taken 190 times.
✓ Branch 1 taken 946 times.
|
1136 | if (EXTERNAL_SSSE3(cpu_flags)) |
243 | 190 | hpeldsp_init_ssse3(c, flags); | |
244 | 1136 | } | |
245 |