Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * VP9 SIMD optimizations | ||
3 | * | ||
4 | * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "libavutil/attributes.h" | ||
24 | #include "libavutil/cpu.h" | ||
25 | #include "libavutil/x86/cpu.h" | ||
26 | #include "libavcodec/vp9dsp.h" | ||
27 | #include "libavcodec/x86/vp9dsp_init.h" | ||
28 | |||
29 | #if HAVE_X86ASM | ||
30 | |||
31 | decl_fpel_func(put, 4, , mmx); | ||
32 | decl_fpel_func(put, 8, , mmx); | ||
33 | decl_fpel_func(put, 16, , sse); | ||
34 | decl_fpel_func(put, 32, , sse); | ||
35 | decl_fpel_func(put, 64, , sse); | ||
36 | decl_fpel_func(avg, 4, _8, mmxext); | ||
37 | decl_fpel_func(avg, 8, _8, mmxext); | ||
38 | decl_fpel_func(avg, 16, _8, sse2); | ||
39 | decl_fpel_func(avg, 32, _8, sse2); | ||
40 | decl_fpel_func(avg, 64, _8, sse2); | ||
41 | decl_fpel_func(put, 32, , avx); | ||
42 | decl_fpel_func(put, 64, , avx); | ||
43 | decl_fpel_func(avg, 32, _8, avx2); | ||
44 | decl_fpel_func(avg, 64, _8, avx2); | ||
45 | |||
46 | decl_mc_funcs(4, mmxext, int16_t, 8, 8); | ||
47 | decl_mc_funcs(8, sse2, int16_t, 8, 8); | ||
48 | decl_mc_funcs(4, ssse3, int8_t, 32, 8); | ||
49 | decl_mc_funcs(8, ssse3, int8_t, 32, 8); | ||
50 | #if ARCH_X86_64 | ||
51 | decl_mc_funcs(16, ssse3, int8_t, 32, 8); | ||
52 | decl_mc_funcs(32, avx2, int8_t, 32, 8); | ||
53 | #endif | ||
54 | |||
55 | 672 | mc_rep_funcs(16, 8, 8, sse2, int16_t, 8, 8) | |
56 | #if ARCH_X86_32 | ||
57 | mc_rep_funcs(16, 8, 8, ssse3, int8_t, 32, 8) | ||
58 | #endif | ||
59 | 288 | mc_rep_funcs(32, 16, 16, sse2, int16_t, 8, 8) | |
60 | 288 | mc_rep_funcs(32, 16, 16, ssse3, int8_t, 32, 8) | |
61 | 96 | mc_rep_funcs(64, 32, 32, sse2, int16_t, 8, 8) | |
62 | 96 | mc_rep_funcs(64, 32, 32, ssse3, int8_t, 32, 8) | |
63 | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL | ||
64 | 52 | mc_rep_funcs(64, 32, 32, avx2, int8_t, 32, 8) | |
65 | #endif | ||
66 | |||
67 | extern const int8_t ff_filters_ssse3[3][15][4][32]; | ||
68 | extern const int16_t ff_filters_sse2[3][15][8][8]; | ||
69 | |||
70 | 60 | filters_8tap_2d_fn2(put, 16, 8, 1, mmxext, sse2, sse2) | |
71 | 60 | filters_8tap_2d_fn2(avg, 16, 8, 1, mmxext, sse2, sse2) | |
72 | 894 | filters_8tap_2d_fn2(put, 16, 8, 1, ssse3, ssse3, ssse3) | |
73 | 42 | filters_8tap_2d_fn2(avg, 16, 8, 1, ssse3, ssse3, ssse3) | |
74 | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL | ||
75 | 6 | filters_8tap_2d_fn(put, 64, 32, 8, 1, avx2, ssse3) | |
76 | 10 | filters_8tap_2d_fn(put, 32, 32, 8, 1, avx2, ssse3) | |
77 | 6 | filters_8tap_2d_fn(avg, 64, 32, 8, 1, avx2, ssse3) | |
78 | 6 | filters_8tap_2d_fn(avg, 32, 32, 8, 1, avx2, ssse3) | |
79 | #endif | ||
80 | |||
81 | 120 | filters_8tap_1d_fn3(put, 8, mmxext, sse2, sse2) | |
82 | 120 | filters_8tap_1d_fn3(avg, 8, mmxext, sse2, sse2) | |
83 | 628 | filters_8tap_1d_fn3(put, 8, ssse3, ssse3, ssse3) | |
84 | 84 | filters_8tap_1d_fn3(avg, 8, ssse3, ssse3, ssse3) | |
85 | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL | ||
86 | 16 | filters_8tap_1d_fn2(put, 64, 8, avx2, ssse3) | |
87 | 50 | filters_8tap_1d_fn2(put, 32, 8, avx2, ssse3) | |
88 | 12 | filters_8tap_1d_fn2(avg, 64, 8, avx2, ssse3) | |
89 | 12 | filters_8tap_1d_fn2(avg, 32, 8, avx2, ssse3) | |
90 | #endif | ||
91 | |||
92 | #define itxfm_func(typea, typeb, size, opt) \ | ||
93 | void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||
94 | int16_t *block, int eob) | ||
95 | #define itxfm_funcs(size, opt) \ | ||
96 | itxfm_func(idct, idct, size, opt); \ | ||
97 | itxfm_func(iadst, idct, size, opt); \ | ||
98 | itxfm_func(idct, iadst, size, opt); \ | ||
99 | itxfm_func(iadst, iadst, size, opt) | ||
100 | |||
101 | itxfm_func(idct, idct, 4, mmxext); | ||
102 | itxfm_func(idct, iadst, 4, sse2); | ||
103 | itxfm_func(iadst, idct, 4, sse2); | ||
104 | itxfm_func(iadst, iadst, 4, sse2); | ||
105 | itxfm_funcs(4, ssse3); | ||
106 | itxfm_funcs(8, sse2); | ||
107 | itxfm_funcs(8, ssse3); | ||
108 | itxfm_funcs(8, avx); | ||
109 | itxfm_funcs(16, sse2); | ||
110 | itxfm_funcs(16, ssse3); | ||
111 | itxfm_funcs(16, avx); | ||
112 | itxfm_func(idct, idct, 32, sse2); | ||
113 | itxfm_func(idct, idct, 32, ssse3); | ||
114 | itxfm_func(idct, idct, 32, avx); | ||
115 | itxfm_func(iwht, iwht, 4, mmx); | ||
116 | itxfm_funcs(16, avx2); | ||
117 | itxfm_func(idct, idct, 32, avx2); | ||
118 | |||
119 | #undef itxfm_func | ||
120 | #undef itxfm_funcs | ||
121 | |||
122 | #define lpf_funcs(size1, size2, opt) \ | ||
123 | void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||
124 | int E, int I, int H); \ | ||
125 | void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||
126 | int E, int I, int H) | ||
127 | |||
128 | lpf_funcs(4, 8, mmxext); | ||
129 | lpf_funcs(8, 8, mmxext); | ||
130 | lpf_funcs(16, 16, sse2); | ||
131 | lpf_funcs(16, 16, ssse3); | ||
132 | lpf_funcs(16, 16, avx); | ||
133 | lpf_funcs(44, 16, sse2); | ||
134 | lpf_funcs(44, 16, ssse3); | ||
135 | lpf_funcs(44, 16, avx); | ||
136 | lpf_funcs(84, 16, sse2); | ||
137 | lpf_funcs(84, 16, ssse3); | ||
138 | lpf_funcs(84, 16, avx); | ||
139 | lpf_funcs(48, 16, sse2); | ||
140 | lpf_funcs(48, 16, ssse3); | ||
141 | lpf_funcs(48, 16, avx); | ||
142 | lpf_funcs(88, 16, sse2); | ||
143 | lpf_funcs(88, 16, ssse3); | ||
144 | lpf_funcs(88, 16, avx); | ||
145 | |||
146 | #undef lpf_funcs | ||
147 | |||
148 | #define ipred_func(size, type, opt) \ | ||
149 | void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \ | ||
150 | const uint8_t *l, const uint8_t *a) | ||
151 | |||
152 | ipred_func(8, v, mmx); | ||
153 | |||
154 | #define ipred_dc_funcs(size, opt) \ | ||
155 | ipred_func(size, dc, opt); \ | ||
156 | ipred_func(size, dc_left, opt); \ | ||
157 | ipred_func(size, dc_top, opt) | ||
158 | |||
159 | ipred_dc_funcs(4, mmxext); | ||
160 | ipred_dc_funcs(8, mmxext); | ||
161 | |||
162 | #define ipred_dir_tm_funcs(size, opt) \ | ||
163 | ipred_func(size, tm, opt); \ | ||
164 | ipred_func(size, dl, opt); \ | ||
165 | ipred_func(size, dr, opt); \ | ||
166 | ipred_func(size, hd, opt); \ | ||
167 | ipred_func(size, hu, opt); \ | ||
168 | ipred_func(size, vl, opt); \ | ||
169 | ipred_func(size, vr, opt) | ||
170 | |||
171 | ipred_dir_tm_funcs(4, mmxext); | ||
172 | |||
173 | ipred_func(16, v, sse); | ||
174 | ipred_func(32, v, sse); | ||
175 | |||
176 | ipred_dc_funcs(16, sse2); | ||
177 | ipred_dc_funcs(32, sse2); | ||
178 | |||
179 | #define ipred_dir_tm_h_funcs(size, opt) \ | ||
180 | ipred_dir_tm_funcs(size, opt); \ | ||
181 | ipred_func(size, h, opt) | ||
182 | |||
183 | ipred_dir_tm_h_funcs(8, sse2); | ||
184 | ipred_dir_tm_h_funcs(16, sse2); | ||
185 | ipred_dir_tm_h_funcs(32, sse2); | ||
186 | |||
187 | ipred_func(4, h, sse2); | ||
188 | |||
189 | #define ipred_all_funcs(size, opt) \ | ||
190 | ipred_dc_funcs(size, opt); \ | ||
191 | ipred_dir_tm_h_funcs(size, opt) | ||
192 | |||
193 | // FIXME hd/vl_4x4_ssse3 does not exist | ||
194 | ipred_all_funcs(4, ssse3); | ||
195 | ipred_all_funcs(8, ssse3); | ||
196 | ipred_all_funcs(16, ssse3); | ||
197 | ipred_all_funcs(32, ssse3); | ||
198 | |||
199 | ipred_dir_tm_h_funcs(8, avx); | ||
200 | ipred_dir_tm_h_funcs(16, avx); | ||
201 | ipred_dir_tm_h_funcs(32, avx); | ||
202 | |||
203 | ipred_func(32, v, avx); | ||
204 | |||
205 | ipred_dc_funcs(32, avx2); | ||
206 | ipred_func(32, h, avx2); | ||
207 | ipred_func(32, tm, avx2); | ||
208 | |||
209 | #undef ipred_func | ||
210 | #undef ipred_dir_tm_h_funcs | ||
211 | #undef ipred_dir_tm_funcs | ||
212 | #undef ipred_dc_funcs | ||
213 | |||
214 | #endif /* HAVE_X86ASM */ | ||
215 | |||
216 | 671 | av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact) | |
217 | { | ||
218 | #if HAVE_X86ASM | ||
219 | int cpu_flags; | ||
220 | |||
221 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 590 times.
|
671 | if (bpp == 10) { |
222 | 81 | ff_vp9dsp_init_10bpp_x86(dsp, bitexact); | |
223 | 81 | return; | |
224 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 515 times.
|
590 | } else if (bpp == 12) { |
225 | 75 | ff_vp9dsp_init_12bpp_x86(dsp, bitexact); | |
226 | 75 | return; | |
227 | } | ||
228 | |||
229 | 515 | cpu_flags = av_get_cpu_flags(); | |
230 | |||
231 | #define init_lpf(opt) do { \ | ||
232 | dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \ | ||
233 | dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \ | ||
234 | dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \ | ||
235 | dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \ | ||
236 | dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \ | ||
237 | dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \ | ||
238 | dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \ | ||
239 | dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \ | ||
240 | dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \ | ||
241 | dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \ | ||
242 | } while (0) | ||
243 | |||
244 | #define init_ipred(sz, opt, t, e) \ | ||
245 | dsp->intra_pred[TX_##sz##X##sz][e##_PRED] = ff_vp9_ipred_##t##_##sz##x##sz##_##opt | ||
246 | |||
247 | #define ff_vp9_ipred_hd_4x4_ssse3 ff_vp9_ipred_hd_4x4_mmxext | ||
248 | #define ff_vp9_ipred_vl_4x4_ssse3 ff_vp9_ipred_vl_4x4_mmxext | ||
249 | #define init_dir_tm_ipred(sz, opt) do { \ | ||
250 | init_ipred(sz, opt, dl, DIAG_DOWN_LEFT); \ | ||
251 | init_ipred(sz, opt, dr, DIAG_DOWN_RIGHT); \ | ||
252 | init_ipred(sz, opt, hd, HOR_DOWN); \ | ||
253 | init_ipred(sz, opt, vl, VERT_LEFT); \ | ||
254 | init_ipred(sz, opt, hu, HOR_UP); \ | ||
255 | init_ipred(sz, opt, tm, TM_VP8); \ | ||
256 | init_ipred(sz, opt, vr, VERT_RIGHT); \ | ||
257 | } while (0) | ||
258 | #define init_dir_tm_h_ipred(sz, opt) do { \ | ||
259 | init_dir_tm_ipred(sz, opt); \ | ||
260 | init_ipred(sz, opt, h, HOR); \ | ||
261 | } while (0) | ||
262 | #define init_dc_ipred(sz, opt) do { \ | ||
263 | init_ipred(sz, opt, dc, DC); \ | ||
264 | init_ipred(sz, opt, dc_left, LEFT_DC); \ | ||
265 | init_ipred(sz, opt, dc_top, TOP_DC); \ | ||
266 | } while (0) | ||
267 | #define init_all_ipred(sz, opt) do { \ | ||
268 | init_dc_ipred(sz, opt); \ | ||
269 | init_dir_tm_h_ipred(sz, opt); \ | ||
270 | } while (0) | ||
271 | |||
272 |
2/2✓ Branch 0 taken 61 times.
✓ Branch 1 taken 454 times.
|
515 | if (EXTERNAL_MMX(cpu_flags)) { |
273 | 61 | init_fpel_func(4, 0, 4, put, , mmx); | |
274 | 61 | init_fpel_func(3, 0, 8, put, , mmx); | |
275 |
1/2✓ Branch 0 taken 61 times.
✗ Branch 1 not taken.
|
61 | if (!bitexact) { |
276 | 61 | dsp->itxfm_add[4 /* lossless */][DCT_DCT] = | |
277 | 61 | dsp->itxfm_add[4 /* lossless */][ADST_DCT] = | |
278 | 61 | dsp->itxfm_add[4 /* lossless */][DCT_ADST] = | |
279 | 61 | dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; | |
280 | } | ||
281 | 61 | init_ipred(8, mmx, v, VERT); | |
282 | } | ||
283 | |||
284 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 459 times.
|
515 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
285 | 56 | dsp->loop_filter_8[0][0] = ff_vp9_loop_filter_h_4_8_mmxext; | |
286 | 56 | dsp->loop_filter_8[0][1] = ff_vp9_loop_filter_v_4_8_mmxext; | |
287 | 56 | dsp->loop_filter_8[1][0] = ff_vp9_loop_filter_h_8_8_mmxext; | |
288 | 56 | dsp->loop_filter_8[1][1] = ff_vp9_loop_filter_v_8_8_mmxext; | |
289 | 56 | init_subpel2(4, 0, 4, put, 8, mmxext); | |
290 | 56 | init_subpel2(4, 1, 4, avg, 8, mmxext); | |
291 | 56 | init_fpel_func(4, 1, 4, avg, _8, mmxext); | |
292 | 56 | init_fpel_func(3, 1, 8, avg, _8, mmxext); | |
293 | 56 | dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; | |
294 | 56 | init_dc_ipred(4, mmxext); | |
295 | 56 | init_dc_ipred(8, mmxext); | |
296 | 56 | init_dir_tm_ipred(4, mmxext); | |
297 | } | ||
298 | |||
299 |
2/2✓ Branch 0 taken 51 times.
✓ Branch 1 taken 464 times.
|
515 | if (EXTERNAL_SSE(cpu_flags)) { |
300 | 51 | init_fpel_func(2, 0, 16, put, , sse); | |
301 | 51 | init_fpel_func(1, 0, 32, put, , sse); | |
302 | 51 | init_fpel_func(0, 0, 64, put, , sse); | |
303 | 51 | init_ipred(16, sse, v, VERT); | |
304 | 51 | init_ipred(32, sse, v, VERT); | |
305 | } | ||
306 | |||
307 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 469 times.
|
515 | if (EXTERNAL_SSE2(cpu_flags)) { |
308 | 46 | init_subpel3_8to64(0, put, 8, sse2); | |
309 | 46 | init_subpel3_8to64(1, avg, 8, sse2); | |
310 | 46 | init_fpel_func(2, 1, 16, avg, _8, sse2); | |
311 | 46 | init_fpel_func(1, 1, 32, avg, _8, sse2); | |
312 | 46 | init_fpel_func(0, 1, 64, avg, _8, sse2); | |
313 | 46 | init_lpf(sse2); | |
314 | 46 | dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2; | |
315 | 46 | dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2; | |
316 | 46 | dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2; | |
317 | 46 | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2; | |
318 | 46 | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2; | |
319 | 46 | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2; | |
320 | 46 | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2; | |
321 | 46 | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2; | |
322 | 46 | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2; | |
323 | 46 | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2; | |
324 | 46 | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2; | |
325 | 46 | dsp->itxfm_add[TX_32X32][ADST_ADST] = | |
326 | 46 | dsp->itxfm_add[TX_32X32][ADST_DCT] = | |
327 | 46 | dsp->itxfm_add[TX_32X32][DCT_ADST] = | |
328 | 46 | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2; | |
329 | 46 | init_dc_ipred(16, sse2); | |
330 | 46 | init_dc_ipred(32, sse2); | |
331 | 46 | init_dir_tm_h_ipred(8, sse2); | |
332 | 46 | init_dir_tm_h_ipred(16, sse2); | |
333 | 46 | init_dir_tm_h_ipred(32, sse2); | |
334 | 46 | init_ipred(4, sse2, h, HOR); | |
335 | } | ||
336 | |||
337 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 479 times.
|
515 | if (EXTERNAL_SSSE3(cpu_flags)) { |
338 | 36 | init_subpel3(0, put, 8, ssse3); | |
339 | 36 | init_subpel3(1, avg, 8, ssse3); | |
340 | 36 | dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3; | |
341 | 36 | dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3; | |
342 | 36 | dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3; | |
343 | 36 | dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3; | |
344 | 36 | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; | |
345 | 36 | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; | |
346 | 36 | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; | |
347 | 36 | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; | |
348 | 36 | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; | |
349 | 36 | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; | |
350 | 36 | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; | |
351 | 36 | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; | |
352 | 36 | dsp->itxfm_add[TX_32X32][ADST_ADST] = | |
353 | 36 | dsp->itxfm_add[TX_32X32][ADST_DCT] = | |
354 | 36 | dsp->itxfm_add[TX_32X32][DCT_ADST] = | |
355 | 36 | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; | |
356 | 36 | init_lpf(ssse3); | |
357 | 36 | init_all_ipred(4, ssse3); | |
358 | 36 | init_all_ipred(8, ssse3); | |
359 | 36 | init_all_ipred(16, ssse3); | |
360 | 36 | init_all_ipred(32, ssse3); | |
361 | } | ||
362 | |||
363 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 499 times.
|
515 | if (EXTERNAL_AVX(cpu_flags)) { |
364 | 16 | dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; | |
365 | 16 | dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; | |
366 | 16 | dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; | |
367 | 16 | dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; | |
368 | 16 | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; | |
369 | 16 | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; | |
370 | 16 | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; | |
371 | 16 | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; | |
372 | 16 | dsp->itxfm_add[TX_32X32][ADST_ADST] = | |
373 | 16 | dsp->itxfm_add[TX_32X32][ADST_DCT] = | |
374 | 16 | dsp->itxfm_add[TX_32X32][DCT_ADST] = | |
375 | 16 | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; | |
376 | 16 | init_lpf(avx); | |
377 | 16 | init_dir_tm_h_ipred(8, avx); | |
378 | 16 | init_dir_tm_h_ipred(16, avx); | |
379 | 16 | init_dir_tm_h_ipred(32, avx); | |
380 | } | ||
381 |
3/4✓ Branch 0 taken 16 times.
✓ Branch 1 taken 499 times.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
515 | if (EXTERNAL_AVX_FAST(cpu_flags)) { |
382 | 16 | init_fpel_func(1, 0, 32, put, , avx); | |
383 | 16 | init_fpel_func(0, 0, 64, put, , avx); | |
384 | 16 | init_ipred(32, avx, v, VERT); | |
385 | } | ||
386 | |||
387 |
3/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 509 times.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
515 | if (EXTERNAL_AVX2_FAST(cpu_flags)) { |
388 | 6 | init_fpel_func(1, 1, 32, avg, _8, avx2); | |
389 | 6 | init_fpel_func(0, 1, 64, avg, _8, avx2); | |
390 | if (ARCH_X86_64) { | ||
391 | #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL | ||
392 | 6 | dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx2; | |
393 | 6 | dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx2; | |
394 | 6 | dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx2; | |
395 | 6 | dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx2; | |
396 | 6 | dsp->itxfm_add[TX_32X32][ADST_ADST] = | |
397 | 6 | dsp->itxfm_add[TX_32X32][ADST_DCT] = | |
398 | 6 | dsp->itxfm_add[TX_32X32][DCT_ADST] = | |
399 | 6 | dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx2; | |
400 | 6 | init_subpel3_32_64(0, put, 8, avx2); | |
401 | 6 | init_subpel3_32_64(1, avg, 8, avx2); | |
402 | #endif | ||
403 | } | ||
404 | 6 | init_dc_ipred(32, avx2); | |
405 | 6 | init_ipred(32, avx2, h, HOR); | |
406 | 6 | init_ipred(32, avx2, tm, TM_VP8); | |
407 | } | ||
408 | |||
409 | #undef init_fpel | ||
410 | #undef init_subpel1 | ||
411 | #undef init_subpel2 | ||
412 | #undef init_subpel3 | ||
413 | |||
414 | #endif /* HAVE_X86ASM */ | ||
415 | } | ||
416 |