Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2002-2012 Michael Niedermayer | ||
3 | * Copyright (C) 2012 Ronald S. Bultje | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | #include "config.h" | ||
23 | #include "libavutil/attributes.h" | ||
24 | #include "libavutil/avassert.h" | ||
25 | #include "libavutil/common.h" | ||
26 | #include "libavutil/cpu.h" | ||
27 | #include "libavutil/x86/asm.h" | ||
28 | #include "libavutil/x86/cpu.h" | ||
29 | #include "libavcodec/videodsp.h" | ||
30 | |||
31 | #if HAVE_X86ASM | ||
32 | typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride, | ||
33 | const uint8_t *src, x86_reg src_stride, | ||
34 | x86_reg start_y, x86_reg end_y, x86_reg bh); | ||
35 | typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride, | ||
36 | const uint8_t *src, x86_reg src_stride, | ||
37 | x86_reg start_y, x86_reg end_y, x86_reg bh, | ||
38 | x86_reg w); | ||
39 | |||
40 | extern emu_edge_vfix_func ff_emu_edge_vfix1_sse2; | ||
41 | extern emu_edge_vfix_func ff_emu_edge_vfix2_sse2; | ||
42 | extern emu_edge_vfix_func ff_emu_edge_vfix3_sse2; | ||
43 | extern emu_edge_vfix_func ff_emu_edge_vfix4_sse2; | ||
44 | extern emu_edge_vfix_func ff_emu_edge_vfix5_sse2; | ||
45 | extern emu_edge_vfix_func ff_emu_edge_vfix6_sse2; | ||
46 | extern emu_edge_vfix_func ff_emu_edge_vfix7_sse2; | ||
47 | extern emu_edge_vfix_func ff_emu_edge_vfix8_sse2; | ||
48 | extern emu_edge_vfix_func ff_emu_edge_vfix9_sse2; | ||
49 | extern emu_edge_vfix_func ff_emu_edge_vfix10_sse2; | ||
50 | extern emu_edge_vfix_func ff_emu_edge_vfix11_sse2; | ||
51 | extern emu_edge_vfix_func ff_emu_edge_vfix12_sse2; | ||
52 | extern emu_edge_vfix_func ff_emu_edge_vfix13_sse2; | ||
53 | extern emu_edge_vfix_func ff_emu_edge_vfix14_sse2; | ||
54 | extern emu_edge_vfix_func ff_emu_edge_vfix15_sse2; | ||
55 | extern emu_edge_vfix_func ff_emu_edge_vfix16_sse2; | ||
56 | extern emu_edge_vfix_func ff_emu_edge_vfix17_sse2; | ||
57 | extern emu_edge_vfix_func ff_emu_edge_vfix18_sse2; | ||
58 | extern emu_edge_vfix_func ff_emu_edge_vfix19_sse2; | ||
59 | extern emu_edge_vfix_func ff_emu_edge_vfix20_sse2; | ||
60 | extern emu_edge_vfix_func ff_emu_edge_vfix21_sse2; | ||
61 | extern emu_edge_vfix_func ff_emu_edge_vfix22_sse2; | ||
62 | static emu_edge_vfix_func * const vfixtbl_sse2[22] = { | ||
63 | ff_emu_edge_vfix1_sse2, ff_emu_edge_vfix2_sse2, ff_emu_edge_vfix3_sse2, | ||
64 | ff_emu_edge_vfix4_sse2, ff_emu_edge_vfix5_sse2, ff_emu_edge_vfix6_sse2, | ||
65 | ff_emu_edge_vfix7_sse2, ff_emu_edge_vfix8_sse2, ff_emu_edge_vfix9_sse2, | ||
66 | ff_emu_edge_vfix10_sse2, ff_emu_edge_vfix11_sse2, ff_emu_edge_vfix12_sse2, | ||
67 | ff_emu_edge_vfix13_sse2, ff_emu_edge_vfix14_sse2, ff_emu_edge_vfix15_sse2, | ||
68 | ff_emu_edge_vfix16_sse2, ff_emu_edge_vfix17_sse2, ff_emu_edge_vfix18_sse2, | ||
69 | ff_emu_edge_vfix19_sse2, ff_emu_edge_vfix20_sse2, ff_emu_edge_vfix21_sse2, | ||
70 | ff_emu_edge_vfix22_sse2 | ||
71 | }; | ||
72 | extern emu_edge_vvar_func ff_emu_edge_vvar_sse; | ||
73 | |||
74 | typedef void emu_edge_hfix_func(uint8_t *dst, x86_reg dst_stride, | ||
75 | x86_reg start_x, x86_reg bh); | ||
76 | typedef void emu_edge_hvar_func(uint8_t *dst, x86_reg dst_stride, | ||
77 | x86_reg start_x, x86_reg n_words, x86_reg bh); | ||
78 | |||
79 | extern emu_edge_hfix_func ff_emu_edge_hfix2_sse2; | ||
80 | extern emu_edge_hfix_func ff_emu_edge_hfix4_sse2; | ||
81 | extern emu_edge_hfix_func ff_emu_edge_hfix6_sse2; | ||
82 | extern emu_edge_hfix_func ff_emu_edge_hfix8_sse2; | ||
83 | extern emu_edge_hfix_func ff_emu_edge_hfix10_sse2; | ||
84 | extern emu_edge_hfix_func ff_emu_edge_hfix12_sse2; | ||
85 | extern emu_edge_hfix_func ff_emu_edge_hfix14_sse2; | ||
86 | extern emu_edge_hfix_func ff_emu_edge_hfix16_sse2; | ||
87 | extern emu_edge_hfix_func ff_emu_edge_hfix18_sse2; | ||
88 | extern emu_edge_hfix_func ff_emu_edge_hfix20_sse2; | ||
89 | extern emu_edge_hfix_func ff_emu_edge_hfix22_sse2; | ||
90 | static emu_edge_hfix_func * const hfixtbl_sse2[11] = { | ||
91 | ff_emu_edge_hfix2_sse2, ff_emu_edge_hfix4_sse2, ff_emu_edge_hfix6_sse2, | ||
92 | ff_emu_edge_hfix8_sse2, ff_emu_edge_hfix10_sse2, ff_emu_edge_hfix12_sse2, | ||
93 | ff_emu_edge_hfix14_sse2, ff_emu_edge_hfix16_sse2, ff_emu_edge_hfix18_sse2, | ||
94 | ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2 | ||
95 | }; | ||
96 | extern emu_edge_hvar_func ff_emu_edge_hvar_sse2; | ||
97 | #if HAVE_AVX2_EXTERNAL | ||
98 | extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2; | ||
99 | extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2; | ||
100 | extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2; | ||
101 | extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2; | ||
102 | extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2; | ||
103 | extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2; | ||
104 | extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2; | ||
105 | extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2; | ||
106 | static emu_edge_hfix_func * const hfixtbl_avx2[11] = { | ||
107 | ff_emu_edge_hfix2_sse2, ff_emu_edge_hfix4_sse2, ff_emu_edge_hfix6_sse2, | ||
108 | ff_emu_edge_hfix8_avx2, ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2, | ||
109 | ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2, | ||
110 | ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2 | ||
111 | }; | ||
112 | extern emu_edge_hvar_func ff_emu_edge_hvar_avx2; | ||
113 | #endif | ||
114 | |||
115 | 409204 | static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src, | |
116 | ptrdiff_t dst_stride, | ||
117 | ptrdiff_t src_stride, | ||
118 | x86_reg block_w, x86_reg block_h, | ||
119 | x86_reg src_x, x86_reg src_y, | ||
120 | x86_reg w, x86_reg h, | ||
121 | emu_edge_vfix_func * const *vfix_tbl, | ||
122 | emu_edge_vvar_func *v_extend_var, | ||
123 | emu_edge_hfix_func * const *hfix_tbl, | ||
124 | emu_edge_hvar_func *h_extend_var) | ||
125 | { | ||
126 | 409204 | x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p; | |
127 | |||
128 |
2/4✓ Branch 0 taken 409204 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 409204 times.
|
409204 | if (!w || !h) |
129 | ✗ | return; | |
130 | |||
131 | av_assert2(block_w <= FFABS(dst_stride)); | ||
132 | |||
133 |
2/2✓ Branch 0 taken 2231 times.
✓ Branch 1 taken 406973 times.
|
409204 | if (src_y >= h) { |
134 | 2231 | src -= src_y*src_stride; | |
135 | 2231 | src_y_add = h - 1; | |
136 | 2231 | src_y = h - 1; | |
137 |
2/2✓ Branch 0 taken 403 times.
✓ Branch 1 taken 406570 times.
|
406973 | } else if (src_y <= -block_h) { |
138 | 403 | src -= src_y*src_stride; | |
139 | 403 | src_y_add = 1 - block_h; | |
140 | 403 | src_y = 1 - block_h; | |
141 | } | ||
142 |
2/2✓ Branch 0 taken 758 times.
✓ Branch 1 taken 408446 times.
|
409204 | if (src_x >= w) { |
143 | 758 | src += w - 1 - src_x; | |
144 | 758 | src_x = w - 1; | |
145 |
2/2✓ Branch 0 taken 423 times.
✓ Branch 1 taken 408023 times.
|
408446 | } else if (src_x <= -block_w) { |
146 | 423 | src += 1 - block_w - src_x; | |
147 | 423 | src_x = 1 - block_w; | |
148 | } | ||
149 | |||
150 | 409204 | start_y = FFMAX(0, -src_y); | |
151 | 409204 | start_x = FFMAX(0, -src_x); | |
152 | 409204 | end_y = FFMIN(block_h, h-src_y); | |
153 | 409204 | end_x = FFMIN(block_w, w-src_x); | |
154 | av_assert2(start_x < end_x && block_w > 0); | ||
155 | av_assert2(start_y < end_y && block_h > 0); | ||
156 | |||
157 | // fill in the to-be-copied part plus all above/below | ||
158 | 409204 | src += (src_y_add + start_y) * src_stride + start_x; | |
159 | 409204 | w = end_x - start_x; | |
160 |
2/2✓ Branch 0 taken 403544 times.
✓ Branch 1 taken 5660 times.
|
409204 | if (w <= 22) { |
161 | 403544 | vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride, | |
162 | start_y, end_y, block_h); | ||
163 | } else { | ||
164 | 5660 | v_extend_var(dst + start_x, dst_stride, src, src_stride, | |
165 | start_y, end_y, block_h, w); | ||
166 | } | ||
167 | |||
168 | // fill left | ||
169 |
2/2✓ Branch 0 taken 33855 times.
✓ Branch 1 taken 375349 times.
|
409204 | if (start_x) { |
170 |
2/2✓ Branch 0 taken 33216 times.
✓ Branch 1 taken 639 times.
|
33855 | if (start_x <= 22) { |
171 | 33216 | hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h); | |
172 | } else { | ||
173 | 639 | h_extend_var(dst, dst_stride, | |
174 | 639 | start_x, (start_x + 1) >> 1, block_h); | |
175 | } | ||
176 | } | ||
177 | |||
178 | // fill right | ||
179 | 409204 | p = block_w - end_x; | |
180 |
2/2✓ Branch 0 taken 110990 times.
✓ Branch 1 taken 298214 times.
|
409204 | if (p) { |
181 |
2/2✓ Branch 0 taken 109734 times.
✓ Branch 1 taken 1256 times.
|
110990 | if (p <= 22) { |
182 | 109734 | hfix_tbl[(p - 1) >> 1](dst + end_x - (p & 1), dst_stride, | |
183 | 109734 | -!(p & 1), block_h); | |
184 | } else { | ||
185 | 1256 | h_extend_var(dst + end_x - (p & 1), dst_stride, | |
186 | 1256 | -!(p & 1), (p + 1) >> 1, block_h); | |
187 | } | ||
188 | } | ||
189 | } | ||
190 | |||
191 | 1440 | static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src, | |
192 | ptrdiff_t buf_stride, | ||
193 | ptrdiff_t src_stride, | ||
194 | int block_w, int block_h, | ||
195 | int src_x, int src_y, int w, | ||
196 | int h) | ||
197 | { | ||
198 | 1440 | emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, | |
199 | src_x, src_y, w, h, vfixtbl_sse2, &ff_emu_edge_vvar_sse, | ||
200 | hfixtbl_sse2, &ff_emu_edge_hvar_sse2); | ||
201 | 1440 | } | |
202 | |||
203 | #if HAVE_AVX2_EXTERNAL | ||
204 | 407764 | static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src, | |
205 | ptrdiff_t buf_stride, | ||
206 | ptrdiff_t src_stride, | ||
207 | int block_w, int block_h, | ||
208 | int src_x, int src_y, int w, | ||
209 | int h) | ||
210 | { | ||
211 | 407764 | emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, | |
212 | src_x, src_y, w, h, vfixtbl_sse2, &ff_emu_edge_vvar_sse, | ||
213 | hfixtbl_avx2, &ff_emu_edge_hvar_avx2); | ||
214 | 407764 | } | |
215 | #endif /* HAVE_AVX2_EXTERNAL */ | ||
216 | #endif /* HAVE_X86ASM */ | ||
217 | |||
218 | void ff_prefetch_mmxext(const uint8_t *buf, ptrdiff_t stride, int h); | ||
219 | |||
220 | 3565 | av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc) | |
221 | { | ||
222 | #if HAVE_X86ASM | ||
223 | 3565 | int cpu_flags = av_get_cpu_flags(); | |
224 | |||
225 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 3309 times.
|
3565 | if (EXTERNAL_MMXEXT(cpu_flags)) { |
226 | 256 | ctx->prefetch = ff_prefetch_mmxext; | |
227 | } | ||
228 |
4/4✓ Branch 0 taken 254 times.
✓ Branch 1 taken 3311 times.
✓ Branch 2 taken 229 times.
✓ Branch 3 taken 25 times.
|
3565 | if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) { |
229 | 229 | ctx->emulated_edge_mc = emulated_edge_mc_sse2; | |
230 | } | ||
231 | #if HAVE_AVX2_EXTERNAL | ||
232 |
4/4✓ Branch 0 taken 246 times.
✓ Branch 1 taken 3319 times.
✓ Branch 2 taken 221 times.
✓ Branch 3 taken 25 times.
|
3565 | if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) { |
233 | 221 | ctx->emulated_edge_mc = emulated_edge_mc_avx2; | |
234 | } | ||
235 | #endif | ||
236 | #endif /* HAVE_X86ASM */ | ||
237 | 3565 | } | |
238 |