Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * VP8 DSP functions x86-optimized | ||
3 | * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com> | ||
4 | * Copyright (c) 2010 Fiona Glaser <fiona@x264.com> | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "libavutil/attributes.h" | ||
24 | #include "libavutil/cpu.h" | ||
25 | #include "libavutil/mem_internal.h" | ||
26 | #include "libavutil/x86/cpu.h" | ||
27 | #include "libavcodec/vp8dsp.h" | ||
28 | |||
29 | #if HAVE_X86ASM | ||
30 | |||
31 | /* | ||
32 | * MC functions | ||
33 | */ | ||
34 | void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
35 | const uint8_t *src, ptrdiff_t srcstride, | ||
36 | int height, int mx, int my); | ||
37 | void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
38 | const uint8_t *src, ptrdiff_t srcstride, | ||
39 | int height, int mx, int my); | ||
40 | void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
41 | const uint8_t *src, ptrdiff_t srcstride, | ||
42 | int height, int mx, int my); | ||
43 | void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
44 | const uint8_t *src, ptrdiff_t srcstride, | ||
45 | int height, int mx, int my); | ||
46 | |||
47 | void ff_put_vp8_epel8_h4_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
48 | const uint8_t *src, ptrdiff_t srcstride, | ||
49 | int height, int mx, int my); | ||
50 | void ff_put_vp8_epel8_h6_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
51 | const uint8_t *src, ptrdiff_t srcstride, | ||
52 | int height, int mx, int my); | ||
53 | void ff_put_vp8_epel8_v4_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
54 | const uint8_t *src, ptrdiff_t srcstride, | ||
55 | int height, int mx, int my); | ||
56 | void ff_put_vp8_epel8_v6_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
57 | const uint8_t *src, ptrdiff_t srcstride, | ||
58 | int height, int mx, int my); | ||
59 | |||
60 | void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
61 | const uint8_t *src, ptrdiff_t srcstride, | ||
62 | int height, int mx, int my); | ||
63 | void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
64 | const uint8_t *src, ptrdiff_t srcstride, | ||
65 | int height, int mx, int my); | ||
66 | void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
67 | const uint8_t *src, ptrdiff_t srcstride, | ||
68 | int height, int mx, int my); | ||
69 | void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
70 | const uint8_t *src, ptrdiff_t srcstride, | ||
71 | int height, int mx, int my); | ||
72 | void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
73 | const uint8_t *src, ptrdiff_t srcstride, | ||
74 | int height, int mx, int my); | ||
75 | void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
76 | const uint8_t *src, ptrdiff_t srcstride, | ||
77 | int height, int mx, int my); | ||
78 | void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
79 | const uint8_t *src, ptrdiff_t srcstride, | ||
80 | int height, int mx, int my); | ||
81 | void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
82 | const uint8_t *src, ptrdiff_t srcstride, | ||
83 | int height, int mx, int my); | ||
84 | |||
85 | void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
86 | const uint8_t *src, ptrdiff_t srcstride, | ||
87 | int height, int mx, int my); | ||
88 | void ff_put_vp8_bilinear8_h_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
89 | const uint8_t *src, ptrdiff_t srcstride, | ||
90 | int height, int mx, int my); | ||
91 | void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
92 | const uint8_t *src, ptrdiff_t srcstride, | ||
93 | int height, int mx, int my); | ||
94 | void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
95 | const uint8_t *src, ptrdiff_t srcstride, | ||
96 | int height, int mx, int my); | ||
97 | |||
98 | void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride, | ||
99 | const uint8_t *src, ptrdiff_t srcstride, | ||
100 | int height, int mx, int my); | ||
101 | void ff_put_vp8_bilinear8_v_sse2 (uint8_t *dst, ptrdiff_t dststride, | ||
102 | const uint8_t *src, ptrdiff_t srcstride, | ||
103 | int height, int mx, int my); | ||
104 | void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
105 | const uint8_t *src, ptrdiff_t srcstride, | ||
106 | int height, int mx, int my); | ||
107 | void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride, | ||
108 | const uint8_t *src, ptrdiff_t srcstride, | ||
109 | int height, int mx, int my); | ||
110 | |||
111 | |||
112 | void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride, | ||
113 | const uint8_t *src, ptrdiff_t srcstride, | ||
114 | int height, int mx, int my); | ||
115 | void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride, | ||
116 | const uint8_t *src, ptrdiff_t srcstride, | ||
117 | int height, int mx, int my); | ||
118 | |||
119 | #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \ | ||
120 | static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \ | ||
121 | uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
122 | ptrdiff_t srcstride, int height, int mx, int my) \ | ||
123 | { \ | ||
124 | ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ||
125 | dst, dststride, src, srcstride, height, mx, my); \ | ||
126 | ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ||
127 | dst + 8, dststride, src + 8, srcstride, height, mx, my); \ | ||
128 | } | ||
129 | #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \ | ||
130 | static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \ | ||
131 | uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \ | ||
132 | ptrdiff_t srcstride, int height, int mx, int my) \ | ||
133 | { \ | ||
134 | ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ | ||
135 | dst, dststride, src, srcstride, height, mx, my); \ | ||
136 | ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \ | ||
137 | dst + 4, dststride, src + 4, srcstride, height, mx, my); \ | ||
138 | } | ||
139 | |||
140 | 4 | TAP_W16(sse2, epel, h6) | |
141 | 4 | TAP_W16(sse2, epel, v6) | |
142 | 4 | TAP_W16(sse2, bilinear, h) | |
143 | 4 | TAP_W16(sse2, bilinear, v) | |
144 | |||
145 | 2 | TAP_W16(ssse3, epel, h6) | |
146 | 2 | TAP_W16(ssse3, epel, v6) | |
147 | 2 | TAP_W16(ssse3, bilinear, h) | |
148 | 2 | TAP_W16(ssse3, bilinear, v) | |
149 | |||
150 | #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \ | ||
151 | static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \ | ||
152 | uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
153 | ptrdiff_t srcstride, int height, int mx, int my) \ | ||
154 | { \ | ||
155 | LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + TAPNUMY - 1)]); \ | ||
156 | uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \ | ||
157 | src -= srcstride * (TAPNUMY / 2 - 1); \ | ||
158 | ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \ | ||
159 | tmp, SIZE, src, srcstride, height + TAPNUMY - 1, mx, my); \ | ||
160 | ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \ | ||
161 | dst, dststride, tmpptr, SIZE, height, mx, my); \ | ||
162 | } | ||
163 | |||
164 | #define HVTAPMMX(x, y) \ | ||
165 | HVTAP(mmxext, 8, x, y, 4, 8) | ||
166 | |||
167 | 2 | HVTAPMMX(4, 4) | |
168 | 2 | HVTAPMMX(4, 6) | |
169 | 2 | HVTAPMMX(6, 4) | |
170 | 2 | HVTAPMMX(6, 6) | |
171 | |||
172 | #define HVTAPSSE2(x, y, w) \ | ||
173 | HVTAP(sse2, 16, x, y, w, 16) \ | ||
174 | HVTAP(ssse3, 16, x, y, w, 16) | ||
175 | |||
176 | 6 | HVTAPSSE2(4, 4, 8) | |
177 | 6 | HVTAPSSE2(4, 6, 8) | |
178 | 6 | HVTAPSSE2(6, 4, 8) | |
179 | 6 | HVTAPSSE2(6, 6, 8) | |
180 | 6 | HVTAPSSE2(6, 6, 16) | |
181 | |||
182 | 1 | HVTAP(ssse3, 16, 4, 4, 4, 8) | |
183 | 1 | HVTAP(ssse3, 16, 4, 6, 4, 8) | |
184 | 1 | HVTAP(ssse3, 16, 6, 4, 4, 8) | |
185 | 1 | HVTAP(ssse3, 16, 6, 6, 4, 8) | |
186 | |||
187 | #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \ | ||
188 | static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \ | ||
189 | uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
190 | ptrdiff_t srcstride, int height, int mx, int my) \ | ||
191 | { \ | ||
192 | LOCAL_ALIGNED(ALIGN, uint8_t, tmp, [SIZE * (MAXHEIGHT + 2)]); \ | ||
193 | ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \ | ||
194 | tmp, SIZE, src, srcstride, height + 1, mx, my); \ | ||
195 | ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \ | ||
196 | dst, dststride, tmp, SIZE, height, mx, my); \ | ||
197 | } | ||
198 | |||
199 | 2 | HVBILIN(mmxext, 8, 4, 8) | |
200 | 2 | HVBILIN(sse2, 8, 8, 16) | |
201 | 2 | HVBILIN(sse2, 8, 16, 16) | |
202 | 1 | HVBILIN(ssse3, 8, 4, 8) | |
203 | 1 | HVBILIN(ssse3, 8, 8, 16) | |
204 | 1 | HVBILIN(ssse3, 8, 16, 16) | |
205 | |||
206 | void ff_vp8_idct_dc_add_sse2(uint8_t *dst, int16_t block[16], | ||
207 | ptrdiff_t stride); | ||
208 | void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16], | ||
209 | ptrdiff_t stride); | ||
210 | void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16], | ||
211 | ptrdiff_t stride); | ||
212 | void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16], | ||
213 | ptrdiff_t stride); | ||
214 | void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]); | ||
215 | void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride); | ||
216 | |||
217 | #define DECLARE_LOOP_FILTER(NAME) \ | ||
218 | void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, \ | ||
219 | ptrdiff_t stride, \ | ||
220 | int flim); \ | ||
221 | void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, \ | ||
222 | ptrdiff_t stride, \ | ||
223 | int flim); \ | ||
224 | void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ | ||
225 | ptrdiff_t stride, \ | ||
226 | int e, int i, int hvt); \ | ||
227 | void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, \ | ||
228 | ptrdiff_t stride, \ | ||
229 | int e, int i, int hvt); \ | ||
230 | void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ | ||
231 | uint8_t *dstV, \ | ||
232 | ptrdiff_t s, \ | ||
233 | int e, int i, int hvt); \ | ||
234 | void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, \ | ||
235 | uint8_t *dstV, \ | ||
236 | ptrdiff_t s, \ | ||
237 | int e, int i, int hvt); \ | ||
238 | void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ | ||
239 | ptrdiff_t stride, \ | ||
240 | int e, int i, int hvt); \ | ||
241 | void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, \ | ||
242 | ptrdiff_t stride, \ | ||
243 | int e, int i, int hvt); \ | ||
244 | void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ | ||
245 | uint8_t *dstV, \ | ||
246 | ptrdiff_t s, \ | ||
247 | int e, int i, int hvt); \ | ||
248 | void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \ | ||
249 | uint8_t *dstV, \ | ||
250 | ptrdiff_t s, \ | ||
251 | int e, int i, int hvt); | ||
252 | |||
253 | DECLARE_LOOP_FILTER(sse2) | ||
254 | DECLARE_LOOP_FILTER(ssse3) | ||
255 | DECLARE_LOOP_FILTER(sse4) | ||
256 | |||
257 | #endif /* HAVE_X86ASM */ | ||
258 | |||
259 | #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ | ||
260 | c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \ | ||
261 | c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \ | ||
262 | c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT | ||
263 | |||
264 | #define VP8_MC_FUNC(IDX, SIZE, OPT) \ | ||
265 | c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \ | ||
266 | c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \ | ||
267 | c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \ | ||
268 | c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \ | ||
269 | c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \ | ||
270 | VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) | ||
271 | |||
272 | #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \ | ||
273 | c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ | ||
274 | c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \ | ||
275 | c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ | ||
276 | c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ | ||
277 | c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ | ||
278 | c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \ | ||
279 | c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \ | ||
280 | c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT | ||
281 | |||
282 | |||
283 | 76 | av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c) | |
284 | { | ||
285 | #if HAVE_X86ASM | ||
286 | 76 | int cpu_flags = av_get_cpu_flags(); | |
287 | |||
288 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 54 times.
|
76 | if (EXTERNAL_MMX(cpu_flags)) { |
289 | 22 | c->put_vp8_epel_pixels_tab[1][0][0] = | |
290 | 22 | c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx; | |
291 | } | ||
292 | |||
293 | /* note that 4-tap width=16 functions are missing because w=16 | ||
294 | * is only used for luma, and luma is always a copy or sixtap. */ | ||
295 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 55 times.
|
76 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
296 | 21 | VP8_MC_FUNC(2, 4, mmxext); | |
297 | 21 | VP8_BILINEAR_MC_FUNC(2, 4, mmxext); | |
298 | } | ||
299 | |||
300 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 56 times.
|
76 | if (EXTERNAL_SSE(cpu_flags)) { |
301 | 20 | c->put_vp8_epel_pixels_tab[0][0][0] = | |
302 | 20 | c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse; | |
303 | } | ||
304 | |||
305 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 57 times.
|
76 | if (EXTERNAL_SSE2_SLOW(cpu_flags)) { |
306 | 19 | VP8_LUMA_MC_FUNC(0, 16, sse2); | |
307 | 19 | VP8_MC_FUNC(1, 8, sse2); | |
308 | 19 | VP8_BILINEAR_MC_FUNC(0, 16, sse2); | |
309 | 19 | VP8_BILINEAR_MC_FUNC(1, 8, sse2); | |
310 | } | ||
311 | |||
312 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 59 times.
|
76 | if (EXTERNAL_SSSE3(cpu_flags)) { |
313 | 17 | VP8_LUMA_MC_FUNC(0, 16, ssse3); | |
314 | 17 | VP8_MC_FUNC(1, 8, ssse3); | |
315 | 17 | VP8_MC_FUNC(2, 4, ssse3); | |
316 | 17 | VP8_BILINEAR_MC_FUNC(0, 16, ssse3); | |
317 | 17 | VP8_BILINEAR_MC_FUNC(1, 8, ssse3); | |
318 | 17 | VP8_BILINEAR_MC_FUNC(2, 4, ssse3); | |
319 | } | ||
320 | #endif /* HAVE_X86ASM */ | ||
321 | 76 | } | |
322 | |||
323 | 73 | av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c) | |
324 | { | ||
325 | #if HAVE_X86ASM | ||
326 | 73 | int cpu_flags = av_get_cpu_flags(); | |
327 | |||
328 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 51 times.
|
73 | if (EXTERNAL_MMX(cpu_flags)) { |
329 | 22 | c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx; | |
330 | } | ||
331 | |||
332 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 53 times.
|
73 | if (EXTERNAL_SSE(cpu_flags)) { |
333 | 20 | c->vp8_idct_add = ff_vp8_idct_add_sse; | |
334 | 20 | c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_sse; | |
335 | } | ||
336 | |||
337 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 54 times.
|
73 | if (EXTERNAL_SSE2_SLOW(cpu_flags)) { |
338 | 19 | c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2; | |
339 | |||
340 | 19 | c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2; | |
341 | 19 | c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2; | |
342 | |||
343 | 19 | c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2; | |
344 | 19 | c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2; | |
345 | } | ||
346 | |||
347 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 54 times.
|
73 | if (EXTERNAL_SSE2(cpu_flags)) { |
348 | 19 | c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse2; | |
349 | 19 | c->vp8_idct_dc_add4y = ff_vp8_idct_dc_add4y_sse2; | |
350 | |||
351 | 19 | c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2; | |
352 | |||
353 | 19 | c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2; | |
354 | 19 | c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2; | |
355 | |||
356 | 19 | c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2; | |
357 | 19 | c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2; | |
358 | } | ||
359 | |||
360 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 56 times.
|
73 | if (EXTERNAL_SSSE3(cpu_flags)) { |
361 | 17 | c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3; | |
362 | 17 | c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3; | |
363 | |||
364 | 17 | c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3; | |
365 | 17 | c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3; | |
366 | 17 | c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3; | |
367 | 17 | c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3; | |
368 | |||
369 | 17 | c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3; | |
370 | 17 | c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3; | |
371 | 17 | c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3; | |
372 | 17 | c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3; | |
373 | } | ||
374 | |||
375 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 57 times.
|
73 | if (EXTERNAL_SSE4(cpu_flags)) { |
376 | 16 | c->vp8_idct_dc_add = ff_vp8_idct_dc_add_sse4; | |
377 | |||
378 | 16 | c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse4; | |
379 | 16 | c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse4; | |
380 | 16 | c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse4; | |
381 | } | ||
382 | #endif /* HAVE_X86ASM */ | ||
383 | 73 | } | |
384 |