Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * HEVC video decoder | ||
3 | * | ||
4 | * Copyright (C) 2012 - 2013 Guillaume Martres | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include "get_bits.h" | ||
24 | #include "hevcdec.h" | ||
25 | |||
26 | #include "bit_depth_template.c" | ||
27 | #include "dsp.h" | ||
28 | #include "h26x/h2656_sao_template.c" | ||
29 | #include "h26x/h2656_inter_template.c" | ||
30 | |||
31 | 74598 | static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height, | |
32 | GetBitContext *gb, int pcm_bit_depth) | ||
33 | { | ||
34 | int x, y; | ||
35 | 74598 | pixel *dst = (pixel *)_dst; | |
36 | |||
37 | 74598 | stride /= sizeof(pixel); | |
38 | |||
39 |
2/2✓ Branch 0 taken 327424 times.
✓ Branch 1 taken 37299 times.
|
729446 | for (y = 0; y < height; y++) { |
40 |
2/2✓ Branch 0 taken 4382080 times.
✓ Branch 1 taken 327424 times.
|
9419008 | for (x = 0; x < width; x++) |
41 | 8764160 | dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth); | |
42 | 654848 | dst += stride; | |
43 | } | ||
44 | 74598 | } | |
45 | |||
46 | 29453092 | static av_always_inline void FUNC(add_residual)(uint8_t *_dst, const int16_t *res, | |
47 | ptrdiff_t stride, int size) | ||
48 | { | ||
49 | int x, y; | ||
50 | 29453092 | pixel *dst = (pixel *)_dst; | |
51 | |||
52 | 29453092 | stride /= sizeof(pixel); | |
53 | |||
54 |
2/2✓ Branch 0 taken 116245116 times.
✓ Branch 1 taken 14726546 times.
|
261943324 | for (y = 0; y < size; y++) { |
55 |
2/2✓ Branch 0 taken 1581897296 times.
✓ Branch 1 taken 116245116 times.
|
3396284824 | for (x = 0; x < size; x++) { |
56 | 3163794592 | dst[x] = av_clip_pixel(dst[x] + *res); | |
57 | 3163794592 | res++; | |
58 | } | ||
59 | 232490232 | dst += stride; | |
60 | } | ||
61 | 29453092 | } | |
62 | |||
63 | 16727906 | static void FUNC(add_residual4x4)(uint8_t *_dst, const int16_t *res, | |
64 | ptrdiff_t stride) | ||
65 | { | ||
66 | 16727906 | FUNC(add_residual)(_dst, res, stride, 4); | |
67 | 16727906 | } | |
68 | |||
69 | 7623282 | static void FUNC(add_residual8x8)(uint8_t *_dst, const int16_t *res, | |
70 | ptrdiff_t stride) | ||
71 | { | ||
72 | 7623282 | FUNC(add_residual)(_dst, res, stride, 8); | |
73 | 7623282 | } | |
74 | |||
75 | 3666786 | static void FUNC(add_residual16x16)(uint8_t *_dst, const int16_t *res, | |
76 | ptrdiff_t stride) | ||
77 | { | ||
78 | 3666786 | FUNC(add_residual)(_dst, res, stride, 16); | |
79 | 3666786 | } | |
80 | |||
81 | 1435118 | static void FUNC(add_residual32x32)(uint8_t *_dst, const int16_t *res, | |
82 | ptrdiff_t stride) | ||
83 | { | ||
84 | 1435118 | FUNC(add_residual)(_dst, res, stride, 32); | |
85 | 1435118 | } | |
86 | |||
87 | 54126 | static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode) | |
88 | { | ||
89 | 54126 | int16_t *coeffs = (int16_t *) _coeffs; | |
90 | int x, y; | ||
91 | 54126 | int size = 1 << log2_size; | |
92 | |||
93 |
2/2✓ Branch 0 taken 9989 times.
✓ Branch 1 taken 17074 times.
|
54126 | if (mode) { |
94 | 19978 | coeffs += size; | |
95 |
2/2✓ Branch 0 taken 56315 times.
✓ Branch 1 taken 9989 times.
|
132608 | for (y = 0; y < size - 1; y++) { |
96 |
2/2✓ Branch 0 taken 523168 times.
✓ Branch 1 taken 56315 times.
|
1158966 | for (x = 0; x < size; x++) |
97 | 1046336 | coeffs[x] += coeffs[x - size]; | |
98 | 112630 | coeffs += size; | |
99 | } | ||
100 | } else { | ||
101 |
2/2✓ Branch 0 taken 124352 times.
✓ Branch 1 taken 17074 times.
|
282852 | for (y = 0; y < size; y++) { |
102 |
2/2✓ Branch 0 taken 1049312 times.
✓ Branch 1 taken 124352 times.
|
2347328 | for (x = 1; x < size; x++) |
103 | 2098624 | coeffs[x] += coeffs[x - 1]; | |
104 | 248704 | coeffs += size; | |
105 | } | ||
106 | } | ||
107 | 54126 | } | |
108 | |||
109 | 877016 | static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) | |
110 | { | ||
111 | 877016 | int shift = 15 - BIT_DEPTH - log2_size; | |
112 | int x, y; | ||
113 | 877016 | int size = 1 << log2_size; | |
114 | |||
115 |
2/2✓ Branch 0 taken 429222 times.
✓ Branch 1 taken 9286 times.
|
877016 | if (shift > 0) { |
116 | 858444 | int offset = 1 << (shift - 1); | |
117 |
2/2✓ Branch 0 taken 1790600 times.
✓ Branch 1 taken 429222 times.
|
4439644 | for (y = 0; y < size; y++) { |
118 |
2/2✓ Branch 0 taken 8185344 times.
✓ Branch 1 taken 1790600 times.
|
19951888 | for (x = 0; x < size; x++) { |
119 | 16370688 | *coeffs = (*coeffs + offset) >> shift; | |
120 | 16370688 | coeffs++; | |
121 | } | ||
122 | } | ||
123 | } else { | ||
124 |
2/2✓ Branch 0 taken 74992 times.
✓ Branch 1 taken 9286 times.
|
168556 | for (y = 0; y < size; y++) { |
125 |
2/2✓ Branch 0 taken 613504 times.
✓ Branch 1 taken 74992 times.
|
1376992 | for (x = 0; x < size; x++) { |
126 | 1227008 | *coeffs = *(uint16_t*)coeffs << -shift; | |
127 | 1227008 | coeffs++; | |
128 | } | ||
129 | } | ||
130 | } | ||
131 | 877016 | } | |
132 | |||
133 | #define SET(dst, x) (dst) = (x) | ||
134 | #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift) | ||
135 | |||
136 | #define TR_4x4_LUMA(dst, src, step, assign) \ | ||
137 | do { \ | ||
138 | int c0 = src[0 * step] + src[2 * step]; \ | ||
139 | int c1 = src[2 * step] + src[3 * step]; \ | ||
140 | int c2 = src[0 * step] - src[3 * step]; \ | ||
141 | int c3 = 74 * src[1 * step]; \ | ||
142 | \ | ||
143 | assign(dst[2 * step], 74 * (src[0 * step] - \ | ||
144 | src[2 * step] + \ | ||
145 | src[3 * step])); \ | ||
146 | assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \ | ||
147 | assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \ | ||
148 | assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ | ||
149 | } while (0) | ||
150 | |||
151 | 7369832 | static void FUNC(transform_4x4_luma)(int16_t *coeffs) | |
152 | { | ||
153 | int i; | ||
154 | 7369832 | int shift = 7; | |
155 | 7369832 | int add = 1 << (shift - 1); | |
156 | 7369832 | int16_t *src = coeffs; | |
157 | |||
158 |
2/2✓ Branch 0 taken 14739664 times.
✓ Branch 1 taken 3684916 times.
|
36849160 | for (i = 0; i < 4; i++) { |
159 | 29479328 | TR_4x4_LUMA(src, src, 4, SCALE); | |
160 | 29479328 | src++; | |
161 | } | ||
162 | |||
163 | 7369832 | shift = 20 - BIT_DEPTH; | |
164 | 7369832 | add = 1 << (shift - 1); | |
165 |
2/2✓ Branch 0 taken 14739664 times.
✓ Branch 1 taken 3684916 times.
|
36849160 | for (i = 0; i < 4; i++) { |
166 | 29479328 | TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); | |
167 | 29479328 | coeffs += 4; | |
168 | } | ||
169 | 7369832 | } | |
170 | |||
171 | #undef TR_4x4_LUMA | ||
172 | |||
173 | #define TR_4(dst, src, dstep, sstep, assign, end) \ | ||
174 | do { \ | ||
175 | const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ | ||
176 | const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ | ||
177 | const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ | ||
178 | const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ | ||
179 | \ | ||
180 | assign(dst[0 * dstep], e0 + o0); \ | ||
181 | assign(dst[1 * dstep], e1 + o1); \ | ||
182 | assign(dst[2 * dstep], e1 - o1); \ | ||
183 | assign(dst[3 * dstep], e0 - o0); \ | ||
184 | } while (0) | ||
185 | |||
186 | #define TR_8(dst, src, dstep, sstep, assign, end) \ | ||
187 | do { \ | ||
188 | int i, j; \ | ||
189 | int e_8[4]; \ | ||
190 | int o_8[4] = { 0 }; \ | ||
191 | for (i = 0; i < 4; i++) \ | ||
192 | for (j = 1; j < end; j += 2) \ | ||
193 | o_8[i] += transform[4 * j][i] * src[j * sstep]; \ | ||
194 | TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ | ||
195 | \ | ||
196 | for (i = 0; i < 4; i++) { \ | ||
197 | assign(dst[i * dstep], e_8[i] + o_8[i]); \ | ||
198 | assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ | ||
199 | } \ | ||
200 | } while (0) | ||
201 | |||
202 | #define TR_16(dst, src, dstep, sstep, assign, end) \ | ||
203 | do { \ | ||
204 | int i, j; \ | ||
205 | int e_16[8]; \ | ||
206 | int o_16[8] = { 0 }; \ | ||
207 | for (i = 0; i < 8; i++) \ | ||
208 | for (j = 1; j < end; j += 2) \ | ||
209 | o_16[i] += transform[2 * j][i] * src[j * sstep]; \ | ||
210 | TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ | ||
211 | \ | ||
212 | for (i = 0; i < 8; i++) { \ | ||
213 | assign(dst[i * dstep], e_16[i] + o_16[i]); \ | ||
214 | assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ | ||
215 | } \ | ||
216 | } while (0) | ||
217 | |||
218 | #define TR_32(dst, src, dstep, sstep, assign, end) \ | ||
219 | do { \ | ||
220 | int i, j; \ | ||
221 | int e_32[16]; \ | ||
222 | int o_32[16] = { 0 }; \ | ||
223 | for (i = 0; i < 16; i++) \ | ||
224 | for (j = 1; j < end; j += 2) \ | ||
225 | o_32[i] += transform[j][i] * src[j * sstep]; \ | ||
226 | TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \ | ||
227 | \ | ||
228 | for (i = 0; i < 16; i++) { \ | ||
229 | assign(dst[i * dstep], e_32[i] + o_32[i]); \ | ||
230 | assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ | ||
231 | } \ | ||
232 | } while (0) | ||
233 | |||
234 | #define IDCT_VAR4(H) \ | ||
235 | int limit2 = FFMIN(col_limit + 4, H) | ||
236 | #define IDCT_VAR8(H) \ | ||
237 | int limit = FFMIN(col_limit, H); \ | ||
238 | int limit2 = FFMIN(col_limit + 4, H) | ||
239 | #define IDCT_VAR16(H) IDCT_VAR8(H) | ||
240 | #define IDCT_VAR32(H) IDCT_VAR8(H) | ||
241 | |||
242 | #define IDCT(H) \ | ||
243 | static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ | ||
244 | int col_limit) \ | ||
245 | { \ | ||
246 | int i; \ | ||
247 | int shift = 7; \ | ||
248 | int add = 1 << (shift - 1); \ | ||
249 | int16_t *src = coeffs; \ | ||
250 | IDCT_VAR ## H(H); \ | ||
251 | \ | ||
252 | for (i = 0; i < H; i++) { \ | ||
253 | TR_ ## H(src, src, H, H, SCALE, limit2); \ | ||
254 | if (limit2 < H && i%4 == 0 && !!i) \ | ||
255 | limit2 -= 4; \ | ||
256 | src++; \ | ||
257 | } \ | ||
258 | \ | ||
259 | shift = 20 - BIT_DEPTH; \ | ||
260 | add = 1 << (shift - 1); \ | ||
261 | for (i = 0; i < H; i++) { \ | ||
262 | TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ | ||
263 | coeffs += H; \ | ||
264 | } \ | ||
265 | } | ||
266 | |||
267 | #define IDCT_DC(H) \ | ||
268 | static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \ | ||
269 | { \ | ||
270 | int i, j; \ | ||
271 | int shift = 14 - BIT_DEPTH; \ | ||
272 | int add = 1 << (shift - 1); \ | ||
273 | int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ | ||
274 | \ | ||
275 | for (j = 0; j < H; j++) { \ | ||
276 | for (i = 0; i < H; i++) { \ | ||
277 | coeffs[i + j * H] = coeff; \ | ||
278 | } \ | ||
279 | } \ | ||
280 | } | ||
281 | |||
282 |
5/10✗ Branch 0 not taken.
✓ Branch 1 taken 10688476 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 10688476 times.
✓ Branch 7 taken 2672119 times.
✓ Branch 8 taken 10688476 times.
✓ Branch 9 taken 2672119 times.
|
48098142 | IDCT( 4) |
283 |
17/22✓ Branch 0 taken 376372352 times.
✓ Branch 1 taken 94093088 times.
✓ Branch 2 taken 94093088 times.
✓ Branch 3 taken 23523272 times.
✓ Branch 4 taken 94093088 times.
✓ Branch 5 taken 23523272 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 23523272 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 23523272 times.
✓ Branch 13 taken 2940409 times.
✓ Branch 14 taken 264298560 times.
✓ Branch 15 taken 94093088 times.
✓ Branch 16 taken 94093088 times.
✓ Branch 17 taken 23523272 times.
✓ Branch 18 taken 94093088 times.
✓ Branch 19 taken 23523272 times.
✓ Branch 20 taken 23523272 times.
✓ Branch 21 taken 2940409 times.
|
2134060434 | IDCT( 8) |
284 |
34/34✓ Branch 0 taken 707399376 times.
✓ Branch 1 taken 190189568 times.
✓ Branch 2 taken 190189568 times.
✓ Branch 3 taken 23773696 times.
✓ Branch 4 taken 380379136 times.
✓ Branch 5 taken 95094784 times.
✓ Branch 6 taken 95094784 times.
✓ Branch 7 taken 23773696 times.
✓ Branch 8 taken 95094784 times.
✓ Branch 9 taken 23773696 times.
✓ Branch 10 taken 190189568 times.
✓ Branch 11 taken 23773696 times.
✓ Branch 12 taken 17904240 times.
✓ Branch 13 taken 5869456 times.
✓ Branch 14 taken 4476060 times.
✓ Branch 15 taken 13428180 times.
✓ Branch 16 taken 3357045 times.
✓ Branch 17 taken 1119015 times.
✓ Branch 18 taken 23773696 times.
✓ Branch 19 taken 1485856 times.
✓ Branch 20 taken 745224320 times.
✓ Branch 21 taken 190189568 times.
✓ Branch 22 taken 190189568 times.
✓ Branch 23 taken 23773696 times.
✓ Branch 24 taken 380379136 times.
✓ Branch 25 taken 95094784 times.
✓ Branch 26 taken 95094784 times.
✓ Branch 27 taken 23773696 times.
✓ Branch 28 taken 95094784 times.
✓ Branch 29 taken 23773696 times.
✓ Branch 30 taken 190189568 times.
✓ Branch 31 taken 23773696 times.
✓ Branch 32 taken 23773696 times.
✓ Branch 33 taken 1485856 times.
|
6807105248 | IDCT(16) |
285 |
46/46✓ Branch 0 taken 1189891040 times.
✓ Branch 1 taken 302496256 times.
✓ Branch 2 taken 302496256 times.
✓ Branch 3 taken 18906016 times.
✓ Branch 4 taken 289395288 times.
✓ Branch 5 taken 151248128 times.
✓ Branch 6 taken 151248128 times.
✓ Branch 7 taken 18906016 times.
✓ Branch 8 taken 302496256 times.
✓ Branch 9 taken 75624064 times.
✓ Branch 10 taken 75624064 times.
✓ Branch 11 taken 18906016 times.
✓ Branch 12 taken 75624064 times.
✓ Branch 13 taken 18906016 times.
✓ Branch 14 taken 151248128 times.
✓ Branch 15 taken 18906016 times.
✓ Branch 16 taken 302496256 times.
✓ Branch 17 taken 18906016 times.
✓ Branch 18 taken 16701600 times.
✓ Branch 19 taken 2204416 times.
✓ Branch 20 taken 4175400 times.
✓ Branch 21 taken 12526200 times.
✓ Branch 22 taken 3653475 times.
✓ Branch 23 taken 521925 times.
✓ Branch 24 taken 18906016 times.
✓ Branch 25 taken 590813 times.
✓ Branch 26 taken 1790057984 times.
✓ Branch 27 taken 302496256 times.
✓ Branch 28 taken 302496256 times.
✓ Branch 29 taken 18906016 times.
✓ Branch 30 taken 436636160 times.
✓ Branch 31 taken 151248128 times.
✓ Branch 32 taken 151248128 times.
✓ Branch 33 taken 18906016 times.
✓ Branch 34 taken 302496256 times.
✓ Branch 35 taken 75624064 times.
✓ Branch 36 taken 75624064 times.
✓ Branch 37 taken 18906016 times.
✓ Branch 38 taken 75624064 times.
✓ Branch 39 taken 18906016 times.
✓ Branch 40 taken 151248128 times.
✓ Branch 41 taken 18906016 times.
✓ Branch 42 taken 302496256 times.
✓ Branch 43 taken 18906016 times.
✓ Branch 44 taken 18906016 times.
✓ Branch 45 taken 590813 times.
|
12933699242 | IDCT(32) |
286 | |||
287 |
4/4✓ Branch 0 taken 17976448 times.
✓ Branch 1 taken 4494112 times.
✓ Branch 2 taken 4494112 times.
✓ Branch 3 taken 1123528 times.
|
47188176 | IDCT_DC( 4) |
288 |
4/4✓ Branch 0 taken 50940480 times.
✓ Branch 1 taken 6367560 times.
✓ Branch 2 taken 6367560 times.
✓ Branch 3 taken 795945 times.
|
116207970 | IDCT_DC( 8) |
289 |
4/4✓ Branch 0 taken 84889600 times.
✓ Branch 1 taken 5305600 times.
✓ Branch 2 taken 5305600 times.
✓ Branch 3 taken 331600 times.
|
181053600 | IDCT_DC(16) |
290 |
4/4✓ Branch 0 taken 127001600 times.
✓ Branch 1 taken 3968800 times.
✓ Branch 2 taken 3968800 times.
✓ Branch 3 taken 124025 times.
|
262188850 | IDCT_DC(32) |
291 | |||
292 | #undef TR_4 | ||
293 | #undef TR_8 | ||
294 | #undef TR_16 | ||
295 | #undef TR_32 | ||
296 | |||
297 | #undef SET | ||
298 | #undef SCALE | ||
299 | |||
300 | //////////////////////////////////////////////////////////////////////////////// | ||
301 | // | ||
302 | //////////////////////////////////////////////////////////////////////////////// | ||
303 | #define ff_hevc_pel_filters ff_hevc_qpel_filters | ||
304 | #define DECL_HV_FILTER(f) \ | ||
305 | const int8_t *hf = ff_hevc_ ## f ## _filters[mx]; \ | ||
306 | const int8_t *vf = ff_hevc_ ## f ## _filters[my]; | ||
307 | |||
308 | #define FW_PUT(p, f, t) \ | ||
309 | static void FUNC(put_hevc_## f)(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, \ | ||
310 | intptr_t mx, intptr_t my, int width) \ | ||
311 | { \ | ||
312 | DECL_HV_FILTER(p) \ | ||
313 | FUNC(put_ ## t)(dst, src, srcstride, height, hf, vf, width); \ | ||
314 | } | ||
315 | |||
316 | #define FW_PUT_UNI(p, f, t) \ | ||
317 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
318 | ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) \ | ||
319 | { \ | ||
320 | DECL_HV_FILTER(p) \ | ||
321 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, hf, vf, width); \ | ||
322 | } | ||
323 | |||
324 | #define FW_PUT_UNI_W(p, f, t) \ | ||
325 | static void FUNC(put_hevc_ ## f)(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, \ | ||
326 | ptrdiff_t srcstride,int height, int denom, int wx, int ox, \ | ||
327 | intptr_t mx, intptr_t my, int width) \ | ||
328 | { \ | ||
329 | DECL_HV_FILTER(p) \ | ||
330 | FUNC(put_ ## t)(dst, dststride, src, srcstride, height, denom, wx, ox, hf, vf, width); \ | ||
331 | } | ||
332 | |||
333 | #define FW_PUT_FUNCS(f, t, dir) \ | ||
334 | FW_PUT(f, f ## _ ## dir, t ## _ ## dir) \ | ||
335 | FW_PUT_UNI(f, f ## _uni_ ## dir, uni_ ## t ## _ ## dir) \ | ||
336 | FW_PUT_UNI_W(f, f ## _uni_w_ ## dir, uni_## t ## _w_ ## dir) | ||
337 | |||
338 | 4955258 | FW_PUT(pel, pel_pixels, pixels) | |
339 | 5077448 | FW_PUT_UNI(pel, pel_uni_pixels, uni_pixels) | |
340 | 145958 | FW_PUT_UNI_W(pel, pel_uni_w_pixels, uni_w_pixels) | |
341 | |||
342 | 3114704 | FW_PUT_FUNCS(qpel, luma, h ) | |
343 | 2600776 | FW_PUT_FUNCS(qpel, luma, v ) | |
344 | 8186578 | FW_PUT_FUNCS(qpel, luma, hv ) | |
345 | 5047532 | FW_PUT_FUNCS(epel, chroma, h ) | |
346 | 3811016 | FW_PUT_FUNCS(epel, chroma, v ) | |
347 | 21217332 | FW_PUT_FUNCS(epel, chroma, hv ) | |
348 | |||
349 | 5414456 | static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
350 | const int16_t *src2, | ||
351 | int height, intptr_t mx, intptr_t my, int width) | ||
352 | { | ||
353 | int x, y; | ||
354 | 5414456 | const pixel *src = (const pixel *)_src; | |
355 | 5414456 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
356 | 5414456 | pixel *dst = (pixel *)_dst; | |
357 | 5414456 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
358 | |||
359 | 5414456 | int shift = 14 + 1 - BIT_DEPTH; | |
360 | #if BIT_DEPTH < 14 | ||
361 | 5414456 | int offset = 1 << (shift - 1); | |
362 | #else | ||
363 | int offset = 0; | ||
364 | #endif | ||
365 | |||
366 |
2/2✓ Branch 0 taken 40991802 times.
✓ Branch 1 taken 2707228 times.
|
87398060 | for (y = 0; y < height; y++) { |
367 |
2/2✓ Branch 0 taken 1061369396 times.
✓ Branch 1 taken 40991802 times.
|
2204722396 | for (x = 0; x < width; x++) |
368 | 2122738792 | dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); | |
369 | 81983604 | src += srcstride; | |
370 | 81983604 | dst += dststride; | |
371 | 81983604 | src2 += MAX_PB_SIZE; | |
372 | } | ||
373 | 5414456 | } | |
374 | |||
375 | 64154 | static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
376 | const int16_t *src2, | ||
377 | int height, int denom, int wx0, int wx1, | ||
378 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
379 | { | ||
380 | int x, y; | ||
381 | 64154 | const pixel *src = (const pixel *)_src; | |
382 | 64154 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
383 | 64154 | pixel *dst = (pixel *)_dst; | |
384 | 64154 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
385 | |||
386 | 64154 | int shift = 14 + 1 - BIT_DEPTH; | |
387 | 64154 | int log2Wd = denom + shift - 1; | |
388 | |||
389 | 64154 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
390 | 64154 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
391 |
2/2✓ Branch 0 taken 671000 times.
✓ Branch 1 taken 32077 times.
|
1406154 | for (y = 0; y < height; y++) { |
392 |
2/2✓ Branch 0 taken 21890728 times.
✓ Branch 1 taken 671000 times.
|
45123456 | for (x = 0; x < width; x++) { |
393 | 43781456 | dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1)); | |
394 | } | ||
395 | 1342000 | src += srcstride; | |
396 | 1342000 | dst += dststride; | |
397 | 1342000 | src2 += MAX_PB_SIZE; | |
398 | } | ||
399 | 64154 | } | |
400 | |||
401 | //////////////////////////////////////////////////////////////////////////////// | ||
402 | // | ||
403 | //////////////////////////////////////////////////////////////////////////////// | ||
404 | #define QPEL_FILTER(src, stride) \ | ||
405 | (filter[0] * src[x - 3 * stride] + \ | ||
406 | filter[1] * src[x - 2 * stride] + \ | ||
407 | filter[2] * src[x - stride] + \ | ||
408 | filter[3] * src[x ] + \ | ||
409 | filter[4] * src[x + stride] + \ | ||
410 | filter[5] * src[x + 2 * stride] + \ | ||
411 | filter[6] * src[x + 3 * stride] + \ | ||
412 | filter[7] * src[x + 4 * stride]) | ||
413 | |||
414 | 1354728 | static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, | |
415 | const int16_t *src2, | ||
416 | int height, intptr_t mx, intptr_t my, int width) | ||
417 | { | ||
418 | int x, y; | ||
419 | 1354728 | const pixel *src = (const pixel*)_src; | |
420 | 1354728 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
421 | 1354728 | pixel *dst = (pixel *)_dst; | |
422 | 1354728 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
423 | |||
424 | 1354728 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
425 | |||
426 | 1354728 | int shift = 14 + 1 - BIT_DEPTH; | |
427 | #if BIT_DEPTH < 14 | ||
428 | 1354728 | int offset = 1 << (shift - 1); | |
429 | #else | ||
430 | int offset = 0; | ||
431 | #endif | ||
432 | |||
433 |
2/2✓ Branch 0 taken 15073720 times.
✓ Branch 1 taken 677364 times.
|
31502168 | for (y = 0; y < height; y++) { |
434 |
2/2✓ Branch 0 taken 511366472 times.
✓ Branch 1 taken 15073720 times.
|
1052880384 | for (x = 0; x < width; x++) |
435 | 1022732944 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
436 | 30147440 | src += srcstride; | |
437 | 30147440 | dst += dststride; | |
438 | 30147440 | src2 += MAX_PB_SIZE; | |
439 | } | ||
440 | 1354728 | } | |
441 | |||
442 | 1066828 | static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
443 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
444 | int height, intptr_t mx, intptr_t my, int width) | ||
445 | { | ||
446 | int x, y; | ||
447 | 1066828 | const pixel *src = (const pixel*)_src; | |
448 | 1066828 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
449 | 1066828 | pixel *dst = (pixel *)_dst; | |
450 | 1066828 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
451 | |||
452 | 1066828 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
453 | |||
454 | 1066828 | int shift = 14 + 1 - BIT_DEPTH; | |
455 | #if BIT_DEPTH < 14 | ||
456 | 1066828 | int offset = 1 << (shift - 1); | |
457 | #else | ||
458 | int offset = 0; | ||
459 | #endif | ||
460 | |||
461 |
2/2✓ Branch 0 taken 11505952 times.
✓ Branch 1 taken 533414 times.
|
24078732 | for (y = 0; y < height; y++) { |
462 |
2/2✓ Branch 0 taken 376371400 times.
✓ Branch 1 taken 11505952 times.
|
775754704 | for (x = 0; x < width; x++) |
463 | 752742800 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
464 | 23011904 | src += srcstride; | |
465 | 23011904 | dst += dststride; | |
466 | 23011904 | src2 += MAX_PB_SIZE; | |
467 | } | ||
468 | 1066828 | } | |
469 | |||
470 | 3346270 | static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
471 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
472 | int height, intptr_t mx, intptr_t my, int width) | ||
473 | { | ||
474 | int x, y; | ||
475 | const int8_t *filter; | ||
476 | 3346270 | const pixel *src = (const pixel*)_src; | |
477 | 3346270 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
478 | 3346270 | pixel *dst = (pixel *)_dst; | |
479 | 3346270 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
480 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
481 | 3346270 | int16_t *tmp = tmp_array; | |
482 | 3346270 | int shift = 14 + 1 - BIT_DEPTH; | |
483 | #if BIT_DEPTH < 14 | ||
484 | 3346270 | int offset = 1 << (shift - 1); | |
485 | #else | ||
486 | int offset = 0; | ||
487 | #endif | ||
488 | |||
489 | 3346270 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
490 | 3346270 | filter = ff_hevc_qpel_filters[mx]; | |
491 |
2/2✓ Branch 0 taken 47805493 times.
✓ Branch 1 taken 1673135 times.
|
98957256 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
492 |
2/2✓ Branch 0 taken 1420553512 times.
✓ Branch 1 taken 47805493 times.
|
2936718010 | for (x = 0; x < width; x++) |
493 | 2841107024 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
494 | 95610986 | src += srcstride; | |
495 | 95610986 | tmp += MAX_PB_SIZE; | |
496 | } | ||
497 | |||
498 | 3346270 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
499 | 3346270 | filter = ff_hevc_qpel_filters[my]; | |
500 | |||
501 |
2/2✓ Branch 0 taken 36093548 times.
✓ Branch 1 taken 1673135 times.
|
75533366 | for (y = 0; y < height; y++) { |
502 |
2/2✓ Branch 0 taken 1170958152 times.
✓ Branch 1 taken 36093548 times.
|
2414103400 | for (x = 0; x < width; x++) |
503 | 2341916304 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
504 | 72187096 | tmp += MAX_PB_SIZE; | |
505 | 72187096 | dst += dststride; | |
506 | 72187096 | src2 += MAX_PB_SIZE; | |
507 | } | ||
508 | 3346270 | } | |
509 | |||
510 | 22810 | static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
511 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
512 | int height, int denom, int wx0, int wx1, | ||
513 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
514 | { | ||
515 | int x, y; | ||
516 | 22810 | const pixel *src = (const pixel*)_src; | |
517 | 22810 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
518 | 22810 | pixel *dst = (pixel *)_dst; | |
519 | 22810 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
520 | |||
521 | 22810 | const int8_t *filter = ff_hevc_qpel_filters[mx]; | |
522 | |||
523 | 22810 | int shift = 14 + 1 - BIT_DEPTH; | |
524 | 22810 | int log2Wd = denom + shift - 1; | |
525 | |||
526 | 22810 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
527 | 22810 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
528 |
2/2✓ Branch 0 taken 278600 times.
✓ Branch 1 taken 11405 times.
|
580010 | for (y = 0; y < height; y++) { |
529 |
2/2✓ Branch 0 taken 9869200 times.
✓ Branch 1 taken 278600 times.
|
20295600 | for (x = 0; x < width; x++) |
530 | 19738400 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
531 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
532 | 557200 | src += srcstride; | |
533 | 557200 | dst += dststride; | |
534 | 557200 | src2 += MAX_PB_SIZE; | |
535 | } | ||
536 | 22810 | } | |
537 | |||
538 | 20448 | static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
539 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
540 | int height, int denom, int wx0, int wx1, | ||
541 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
542 | { | ||
543 | int x, y; | ||
544 | 20448 | const pixel *src = (const pixel*)_src; | |
545 | 20448 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
546 | 20448 | pixel *dst = (pixel *)_dst; | |
547 | 20448 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
548 | |||
549 | 20448 | const int8_t *filter = ff_hevc_qpel_filters[my]; | |
550 | |||
551 | 20448 | int shift = 14 + 1 - BIT_DEPTH; | |
552 | 20448 | int log2Wd = denom + shift - 1; | |
553 | |||
554 | 20448 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
555 | 20448 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
556 |
2/2✓ Branch 0 taken 236324 times.
✓ Branch 1 taken 10224 times.
|
493096 | for (y = 0; y < height; y++) { |
557 |
2/2✓ Branch 0 taken 7942032 times.
✓ Branch 1 taken 236324 times.
|
16356712 | for (x = 0; x < width; x++) |
558 | 15884064 | dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
559 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
560 | 472648 | src += srcstride; | |
561 | 472648 | dst += dststride; | |
562 | 472648 | src2 += MAX_PB_SIZE; | |
563 | } | ||
564 | 20448 | } | |
565 | |||
566 | 71042 | static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
567 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
568 | int height, int denom, int wx0, int wx1, | ||
569 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
570 | { | ||
571 | int x, y; | ||
572 | const int8_t *filter; | ||
573 | 71042 | const pixel *src = (const pixel*)_src; | |
574 | 71042 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
575 | 71042 | pixel *dst = (pixel *)_dst; | |
576 | 71042 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
577 | int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; | ||
578 | 71042 | int16_t *tmp = tmp_array; | |
579 | 71042 | int shift = 14 + 1 - BIT_DEPTH; | |
580 | 71042 | int log2Wd = denom + shift - 1; | |
581 | |||
582 | 71042 | src -= QPEL_EXTRA_BEFORE * srcstride; | |
583 | 71042 | filter = ff_hevc_qpel_filters[mx]; | |
584 |
2/2✓ Branch 0 taken 1131207 times.
✓ Branch 1 taken 35521 times.
|
2333456 | for (y = 0; y < height + QPEL_EXTRA; y++) { |
585 |
2/2✓ Branch 0 taken 36956072 times.
✓ Branch 1 taken 1131207 times.
|
76174558 | for (x = 0; x < width; x++) |
586 | 73912144 | tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
587 | 2262414 | src += srcstride; | |
588 | 2262414 | tmp += MAX_PB_SIZE; | |
589 | } | ||
590 | |||
591 | 71042 | tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
592 | 71042 | filter = ff_hevc_qpel_filters[my]; | |
593 | |||
594 | 71042 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
595 | 71042 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
596 |
2/2✓ Branch 0 taken 882560 times.
✓ Branch 1 taken 35521 times.
|
1836162 | for (y = 0; y < height; y++) { |
597 |
2/2✓ Branch 0 taken 30984848 times.
✓ Branch 1 taken 882560 times.
|
63734816 | for (x = 0; x < width; x++) |
598 | 61969696 | dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
599 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
600 | 1765120 | tmp += MAX_PB_SIZE; | |
601 | 1765120 | dst += dststride; | |
602 | 1765120 | src2 += MAX_PB_SIZE; | |
603 | } | ||
604 | 71042 | } | |
605 | |||
606 | //////////////////////////////////////////////////////////////////////////////// | ||
607 | // | ||
608 | //////////////////////////////////////////////////////////////////////////////// | ||
609 | #define EPEL_FILTER(src, stride) \ | ||
610 | (filter[0] * src[x - stride] + \ | ||
611 | filter[1] * src[x] + \ | ||
612 | filter[2] * src[x + stride] + \ | ||
613 | filter[3] * src[x + 2 * stride]) | ||
614 | |||
615 | 2154950 | static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
616 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
617 | int height, intptr_t mx, intptr_t my, int width) | ||
618 | { | ||
619 | int x, y; | ||
620 | 2154950 | const pixel *src = (const pixel *)_src; | |
621 | 2154950 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
622 | 2154950 | pixel *dst = (pixel *)_dst; | |
623 | 2154950 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
624 | 2154950 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
625 | 2154950 | int shift = 14 + 1 - BIT_DEPTH; | |
626 | #if BIT_DEPTH < 14 | ||
627 | 2154950 | int offset = 1 << (shift - 1); | |
628 | #else | ||
629 | int offset = 0; | ||
630 | #endif | ||
631 | |||
632 |
2/2✓ Branch 0 taken 12739858 times.
✓ Branch 1 taken 1077475 times.
|
27634666 | for (y = 0; y < height; y++) { |
633 |
2/2✓ Branch 0 taken 216603220 times.
✓ Branch 1 taken 12739858 times.
|
458686156 | for (x = 0; x < width; x++) { |
634 | 433206440 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
635 | } | ||
636 | 25479716 | dst += dststride; | |
637 | 25479716 | src += srcstride; | |
638 | 25479716 | src2 += MAX_PB_SIZE; | |
639 | } | ||
640 | 2154950 | } | |
641 | |||
642 | 1497610 | static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
643 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
644 | int height, intptr_t mx, intptr_t my, int width) | ||
645 | { | ||
646 | int x, y; | ||
647 | 1497610 | const pixel *src = (const pixel *)_src; | |
648 | 1497610 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
649 | 1497610 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
650 | 1497610 | pixel *dst = (pixel *)_dst; | |
651 | 1497610 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
652 | 1497610 | int shift = 14 + 1 - BIT_DEPTH; | |
653 | #if BIT_DEPTH < 14 | ||
654 | 1497610 | int offset = 1 << (shift - 1); | |
655 | #else | ||
656 | int offset = 0; | ||
657 | #endif | ||
658 | |||
659 |
2/2✓ Branch 0 taken 9046810 times.
✓ Branch 1 taken 748805 times.
|
19591230 | for (y = 0; y < height; y++) { |
660 |
2/2✓ Branch 0 taken 148837300 times.
✓ Branch 1 taken 9046810 times.
|
315768220 | for (x = 0; x < width; x++) |
661 | 297674600 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); | |
662 | 18093620 | dst += dststride; | |
663 | 18093620 | src += srcstride; | |
664 | 18093620 | src2 += MAX_PB_SIZE; | |
665 | } | ||
666 | 1497610 | } | |
667 | |||
668 | 8511098 | static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
669 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
670 | int height, intptr_t mx, intptr_t my, int width) | ||
671 | { | ||
672 | int x, y; | ||
673 | 8511098 | const pixel *src = (const pixel *)_src; | |
674 | 8511098 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
675 | 8511098 | pixel *dst = (pixel *)_dst; | |
676 | 8511098 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
677 | 8511098 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
678 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
679 | 8511098 | int16_t *tmp = tmp_array; | |
680 | 8511098 | int shift = 14 + 1 - BIT_DEPTH; | |
681 | #if BIT_DEPTH < 14 | ||
682 | 8511098 | int offset = 1 << (shift - 1); | |
683 | #else | ||
684 | int offset = 0; | ||
685 | #endif | ||
686 | |||
687 | 8511098 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
688 | |||
689 |
2/2✓ Branch 0 taken 61618213 times.
✓ Branch 1 taken 4255549 times.
|
131747524 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
690 |
2/2✓ Branch 0 taken 917947938 times.
✓ Branch 1 taken 61618213 times.
|
1959132302 | for (x = 0; x < width; x++) |
691 | 1835895876 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
692 | 123236426 | src += srcstride; | |
693 | 123236426 | tmp += MAX_PB_SIZE; | |
694 | } | ||
695 | |||
696 | 8511098 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
697 | 8511098 | filter = ff_hevc_epel_filters[my]; | |
698 | |||
699 |
2/2✓ Branch 0 taken 48851566 times.
✓ Branch 1 taken 4255549 times.
|
106214230 | for (y = 0; y < height; y++) { |
700 |
2/2✓ Branch 0 taken 781027188 times.
✓ Branch 1 taken 48851566 times.
|
1659757508 | for (x = 0; x < width; x++) |
701 | 1562054376 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); | |
702 | 97703132 | tmp += MAX_PB_SIZE; | |
703 | 97703132 | dst += dststride; | |
704 | 97703132 | src2 += MAX_PB_SIZE; | |
705 | } | ||
706 | 8511098 | } | |
707 | |||
708 | 33080 | static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, | |
709 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
710 | int height, int denom, int wx0, int wx1, | ||
711 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
712 | { | ||
713 | int x, y; | ||
714 | 33080 | const pixel *src = (const pixel *)_src; | |
715 | 33080 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
716 | 33080 | pixel *dst = (pixel *)_dst; | |
717 | 33080 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
718 | 33080 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
719 | 33080 | int shift = 14 + 1 - BIT_DEPTH; | |
720 | 33080 | int log2Wd = denom + shift - 1; | |
721 | |||
722 | 33080 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
723 | 33080 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
724 |
2/2✓ Branch 0 taken 233792 times.
✓ Branch 1 taken 16540 times.
|
500664 | for (y = 0; y < height; y++) { |
725 |
2/2✓ Branch 0 taken 5306152 times.
✓ Branch 1 taken 233792 times.
|
11079888 | for (x = 0; x < width; x++) |
726 | 10612304 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
727 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
728 | 467584 | src += srcstride; | |
729 | 467584 | dst += dststride; | |
730 | 467584 | src2 += MAX_PB_SIZE; | |
731 | } | ||
732 | 33080 | } | |
733 | |||
734 | 24552 | static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, | |
735 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
736 | int height, int denom, int wx0, int wx1, | ||
737 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
738 | { | ||
739 | int x, y; | ||
740 | 24552 | const pixel *src = (const pixel *)_src; | |
741 | 24552 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
742 | 24552 | const int8_t *filter = ff_hevc_epel_filters[my]; | |
743 | 24552 | pixel *dst = (pixel *)_dst; | |
744 | 24552 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
745 | 24552 | int shift = 14 + 1 - BIT_DEPTH; | |
746 | 24552 | int log2Wd = denom + shift - 1; | |
747 | |||
748 | 24552 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
749 | 24552 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
750 |
2/2✓ Branch 0 taken 167956 times.
✓ Branch 1 taken 12276 times.
|
360464 | for (y = 0; y < height; y++) { |
751 |
2/2✓ Branch 0 taken 3922696 times.
✓ Branch 1 taken 167956 times.
|
8181304 | for (x = 0; x < width; x++) |
752 | 7845392 | dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + | |
753 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
754 | 335912 | src += srcstride; | |
755 | 335912 | dst += dststride; | |
756 | 335912 | src2 += MAX_PB_SIZE; | |
757 | } | ||
758 | 24552 | } | |
759 | |||
760 | 166916 | static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, | |
761 | const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, | ||
762 | int height, int denom, int wx0, int wx1, | ||
763 | int ox0, int ox1, intptr_t mx, intptr_t my, int width) | ||
764 | { | ||
765 | int x, y; | ||
766 | 166916 | const pixel *src = (const pixel *)_src; | |
767 | 166916 | ptrdiff_t srcstride = _srcstride / sizeof(pixel); | |
768 | 166916 | pixel *dst = (pixel *)_dst; | |
769 | 166916 | ptrdiff_t dststride = _dststride / sizeof(pixel); | |
770 | 166916 | const int8_t *filter = ff_hevc_epel_filters[mx]; | |
771 | int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; | ||
772 | 166916 | int16_t *tmp = tmp_array; | |
773 | 166916 | int shift = 14 + 1 - BIT_DEPTH; | |
774 | 166916 | int log2Wd = denom + shift - 1; | |
775 | |||
776 | 166916 | src -= EPEL_EXTRA_BEFORE * srcstride; | |
777 | |||
778 |
2/2✓ Branch 0 taken 1288910 times.
✓ Branch 1 taken 83458 times.
|
2744736 | for (y = 0; y < height + EPEL_EXTRA; y++) { |
779 |
2/2✓ Branch 0 taken 21782220 times.
✓ Branch 1 taken 1288910 times.
|
46142260 | for (x = 0; x < width; x++) |
780 | 43564440 | tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
781 | 2577820 | src += srcstride; | |
782 | 2577820 | tmp += MAX_PB_SIZE; | |
783 | } | ||
784 | |||
785 | 166916 | tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; | |
786 | 166916 | filter = ff_hevc_epel_filters[my]; | |
787 | |||
788 | 166916 | ox0 = ox0 * (1 << (BIT_DEPTH - 8)); | |
789 | 166916 | ox1 = ox1 * (1 << (BIT_DEPTH - 8)); | |
790 |
2/2✓ Branch 0 taken 1038536 times.
✓ Branch 1 taken 83458 times.
|
2243988 | for (y = 0; y < height; y++) { |
791 |
2/2✓ Branch 0 taken 18780552 times.
✓ Branch 1 taken 1038536 times.
|
39638176 | for (x = 0; x < width; x++) |
792 | 37561104 | dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + | |
793 | ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); | ||
794 | 2077072 | tmp += MAX_PB_SIZE; | |
795 | 2077072 | dst += dststride; | |
796 | 2077072 | src2 += MAX_PB_SIZE; | |
797 | } | ||
798 | 166916 | } | |
799 | |||
800 | // line zero | ||
801 | #define P3 pix[-4 * xstride] | ||
802 | #define P2 pix[-3 * xstride] | ||
803 | #define P1 pix[-2 * xstride] | ||
804 | #define P0 pix[-1 * xstride] | ||
805 | #define Q0 pix[0 * xstride] | ||
806 | #define Q1 pix[1 * xstride] | ||
807 | #define Q2 pix[2 * xstride] | ||
808 | #define Q3 pix[3 * xstride] | ||
809 | |||
810 | // line three. used only for deblocking decision | ||
811 | #define TP3 pix[-4 * xstride + 3 * ystride] | ||
812 | #define TP2 pix[-3 * xstride + 3 * ystride] | ||
813 | #define TP1 pix[-2 * xstride + 3 * ystride] | ||
814 | #define TP0 pix[-1 * xstride + 3 * ystride] | ||
815 | #define TQ0 pix[0 * xstride + 3 * ystride] | ||
816 | #define TQ1 pix[1 * xstride + 3 * ystride] | ||
817 | #define TQ2 pix[2 * xstride + 3 * ystride] | ||
818 | #define TQ3 pix[3 * xstride + 3 * ystride] | ||
819 | |||
820 | #include "h26x/h2656_deblock_template.c" | ||
821 | |||
822 | 75367524 | static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, | |
823 | ptrdiff_t _xstride, ptrdiff_t _ystride, | ||
824 | int beta, const int *_tc, | ||
825 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
826 | { | ||
827 | 75367524 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
828 | 75367524 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
829 | |||
830 | 75367524 | beta <<= BIT_DEPTH - 8; | |
831 | |||
832 |
2/2✓ Branch 0 taken 75367524 times.
✓ Branch 1 taken 37683762 times.
|
226102572 | for (int j = 0; j < 2; j++) { |
833 | 150735048 | pixel* pix = (pixel*)_pix + j * 4 * ystride; | |
834 | 150735048 | const int dp0 = abs(P2 - 2 * P1 + P0); | |
835 | 150735048 | const int dq0 = abs(Q2 - 2 * Q1 + Q0); | |
836 | 150735048 | const int dp3 = abs(TP2 - 2 * TP1 + TP0); | |
837 | 150735048 | const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0); | |
838 | 150735048 | const int d0 = dp0 + dq0; | |
839 | 150735048 | const int d3 = dp3 + dq3; | |
840 | 150735048 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
841 | 150735048 | const int no_p = _no_p[j]; | |
842 | 150735048 | const int no_q = _no_q[j]; | |
843 | |||
844 |
2/2✓ Branch 0 taken 56237427 times.
✓ Branch 1 taken 19130097 times.
|
150735048 | if (d0 + d3 < beta) { |
845 | 112474854 | const int beta_3 = beta >> 3; | |
846 | 112474854 | const int beta_2 = beta >> 2; | |
847 | 112474854 | const int tc25 = ((tc * 5 + 1) >> 1); | |
848 | |||
849 |
4/4✓ Branch 0 taken 16482694 times.
✓ Branch 1 taken 39754733 times.
✓ Branch 2 taken 16017745 times.
✓ Branch 3 taken 464949 times.
|
112474854 | if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && |
850 |
4/4✓ Branch 0 taken 12173674 times.
✓ Branch 1 taken 3844071 times.
✓ Branch 2 taken 12077143 times.
✓ Branch 3 taken 96531 times.
|
32035490 | abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && |
851 |
4/4✓ Branch 0 taken 11664460 times.
✓ Branch 1 taken 412683 times.
✓ Branch 2 taken 11410554 times.
✓ Branch 3 taken 253906 times.
|
46975394 | (d0 << 1) < beta_2 && (d3 << 1) < beta_2) { |
852 | 22821108 | const int tc2 = tc << 1; | |
853 | 22821108 | FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc2, tc2, tc2, no_p, no_q); | |
854 | } else { | ||
855 | 89653746 | int nd_p = 1; | |
856 | 89653746 | int nd_q = 1; | |
857 |
2/2✓ Branch 0 taken 31748090 times.
✓ Branch 1 taken 13078783 times.
|
89653746 | if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3)) |
858 | 63496180 | nd_p = 2; | |
859 |
2/2✓ Branch 0 taken 30914522 times.
✓ Branch 1 taken 13912351 times.
|
89653746 | if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3)) |
860 | 61829044 | nd_q = 2; | |
861 | 89653746 | FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q); | |
862 | } | ||
863 | } | ||
864 | } | ||
865 | 75367524 | } | |
866 | |||
867 | 23893564 | static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride, | |
868 | ptrdiff_t _ystride, const int *_tc, | ||
869 | const uint8_t *_no_p, const uint8_t *_no_q) | ||
870 | { | ||
871 | int no_p, no_q; | ||
872 | 23893564 | ptrdiff_t xstride = _xstride / sizeof(pixel); | |
873 | 23893564 | ptrdiff_t ystride = _ystride / sizeof(pixel); | |
874 | 23893564 | const int size = 4; | |
875 | |||
876 |
2/2✓ Branch 0 taken 23893564 times.
✓ Branch 1 taken 11946782 times.
|
71680692 | for (int j = 0; j < 2; j++) { |
877 | 47787128 | pixel *pix = (pixel *)_pix + j * size * ystride; | |
878 | 47787128 | const int tc = _tc[j] << (BIT_DEPTH - 8); | |
879 |
2/2✓ Branch 0 taken 21484855 times.
✓ Branch 1 taken 2408709 times.
|
47787128 | if (tc > 0) { |
880 | 42969710 | no_p = _no_p[j]; | |
881 | 42969710 | no_q = _no_q[j]; | |
882 | |||
883 | 42969710 | FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q); | |
884 | } | ||
885 | } | ||
886 | 23893564 | } | |
887 | |||
888 | 11654090 | static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
889 | const int32_t *tc, const uint8_t *no_p, | ||
890 | const uint8_t *no_q) | ||
891 | { | ||
892 | 11654090 | FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q); | |
893 | 11654090 | } | |
894 | |||
895 | 12239474 | static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, | |
896 | const int32_t *tc, const uint8_t *no_p, | ||
897 | const uint8_t *no_q) | ||
898 | { | ||
899 | 12239474 | FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q); | |
900 | 12239474 | } | |
901 | |||
902 | 38025412 | static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
903 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
904 | const uint8_t *no_q) | ||
905 | { | ||
906 | 38025412 | FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), | |
907 | beta, tc, no_p, no_q); | ||
908 | 38025412 | } | |
909 | |||
910 | 37342112 | static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, | |
911 | int beta, const int32_t *tc, const uint8_t *no_p, | ||
912 | const uint8_t *no_q) | ||
913 | { | ||
914 | 37342112 | FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, | |
915 | beta, tc, no_p, no_q); | ||
916 | 37342112 | } | |
917 | |||
918 | #undef P3 | ||
919 | #undef P2 | ||
920 | #undef P1 | ||
921 | #undef P0 | ||
922 | #undef Q0 | ||
923 | #undef Q1 | ||
924 | #undef Q2 | ||
925 | #undef Q3 | ||
926 | |||
927 | #undef TP3 | ||
928 | #undef TP2 | ||
929 | #undef TP1 | ||
930 | #undef TP0 | ||
931 | #undef TQ0 | ||
932 | #undef TQ1 | ||
933 | #undef TQ2 | ||
934 | #undef TQ3 | ||
935 |