GCC Code Coverage Report | |||||||||||||||||||||
|
|||||||||||||||||||||
Line | Branch | Exec | Source |
1 |
/* |
||
2 |
* HEVC video decoder |
||
3 |
* |
||
4 |
* Copyright (C) 2012 - 2013 Guillaume Martres |
||
5 |
* |
||
6 |
* This file is part of FFmpeg. |
||
7 |
* |
||
8 |
* FFmpeg is free software; you can redistribute it and/or |
||
9 |
* modify it under the terms of the GNU Lesser General Public |
||
10 |
* License as published by the Free Software Foundation; either |
||
11 |
* version 2.1 of the License, or (at your option) any later version. |
||
12 |
* |
||
13 |
* FFmpeg is distributed in the hope that it will be useful, |
||
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 |
* Lesser General Public License for more details. |
||
17 |
* |
||
18 |
* You should have received a copy of the GNU Lesser General Public |
||
19 |
* License along with FFmpeg; if not, write to the Free Software |
||
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 |
*/ |
||
22 |
|||
23 |
#include "get_bits.h" |
||
24 |
#include "hevcdec.h" |
||
25 |
|||
26 |
#include "bit_depth_template.c" |
||
27 |
#include "hevcdsp.h" |
||
28 |
|||
29 |
74598 |
static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height, |
|
30 |
GetBitContext *gb, int pcm_bit_depth) |
||
31 |
{ |
||
32 |
int x, y; |
||
33 |
74598 |
pixel *dst = (pixel *)_dst; |
|
34 |
|||
35 |
74598 |
stride /= sizeof(pixel); |
|
36 |
|||
37 |
✓✓ | 729446 |
for (y = 0; y < height; y++) { |
38 |
✓✓ | 9419008 |
for (x = 0; x < width; x++) |
39 |
8764160 |
dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth); |
|
40 |
654848 |
dst += stride; |
|
41 |
} |
||
42 |
} |
||
43 |
|||
44 |
27910328 |
static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res, |
|
45 |
ptrdiff_t stride, int size) |
||
46 |
{ |
||
47 |
int x, y; |
||
48 |
27910328 |
pixel *dst = (pixel *)_dst; |
|
49 |
|||
50 |
27910328 |
stride /= sizeof(pixel); |
|
51 |
|||
52 |
✓✓ | 240607600 |
for (y = 0; y < size; y++) { |
53 |
✓✓ | 2963853016 |
for (x = 0; x < size; x++) { |
54 |
2751155744 |
dst[x] = av_clip_pixel(dst[x] + *res); |
|
55 |
2751155744 |
res++; |
|
56 |
} |
||
57 |
212697272 |
dst += stride; |
|
58 |
} |
||
59 |
27910328 |
} |
|
60 |
|||
61 |
16173114 |
static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res, |
|
62 |
ptrdiff_t stride) |
||
63 |
{ |
||
64 |
16173114 |
FUNC(add_residual)(_dst, res, stride, 4); |
|
65 |
16173114 |
} |
|
66 |
|||
67 |
7279210 |
static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res, |
|
68 |
ptrdiff_t stride) |
||
69 |
{ |
||
70 |
7279210 |
FUNC(add_residual)(_dst, res, stride, 8); |
|
71 |
7279210 |
} |
|
72 |
|||
73 |
3305312 |
static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res, |
|
74 |
ptrdiff_t stride) |
||
75 |
{ |
||
76 |
3305312 |
FUNC(add_residual)(_dst, res, stride, 16); |
|
77 |
3305312 |
} |
|
78 |
|||
79 |
1152692 |
static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res, |
|
80 |
ptrdiff_t stride) |
||
81 |
{ |
||
82 |
1152692 |
FUNC(add_residual)(_dst, res, stride, 32); |
|
83 |
1152692 |
} |
|
84 |
|||
85 |
44988 |
static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode) |
|
86 |
{ |
||
87 |
44988 |
int16_t *coeffs = (int16_t *) _coeffs; |
|
88 |
int x, y; |
||
89 |
44988 |
int size = 1 << log2_size; |
|
90 |
|||
91 |
✓✓ | 44988 |
if (mode) { |
92 |
15868 |
coeffs += size; |
|
93 |
✓✓ | 116168 |
for (y = 0; y < size - 1; y++) { |
94 |
✓✓ | 1097316 |
for (x = 0; x < size; x++) |
95 |
997016 |
coeffs[x] += coeffs[x - size]; |
|
96 |
100300 |
coeffs += size; |
|
97 |
} |
||
98 |
} else { |
||
99 |
✓✓ | 257712 |
for (y = 0; y < size; y++) { |
100 |
✓✓ | 2266880 |
for (x = 1; x < size; x++) |
101 |
2038288 |
coeffs[x] += coeffs[x - 1]; |
|
102 |
228592 |
coeffs += size; |
|
103 |
} |
||
104 |
} |
||
105 |
} |
||
106 |
|||
107 |
839954 |
static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size) |
|
108 |
{ |
||
109 |
839954 |
int shift = 15 - BIT_DEPTH - log2_size; |
|
110 |
int x, y; |
||
111 |
839954 |
int size = 1 << log2_size; |
|
112 |
|||
113 |
✓✓ | 839954 |
if (shift > 0) { |
114 |
821382 |
int offset = 1 << (shift - 1); |
|
115 |
✓✓ | 4254334 |
for (y = 0; y < size; y++) { |
116 |
✓✓ | 19210648 |
for (x = 0; x < size; x++) { |
117 |
15777696 |
*coeffs = (*coeffs + offset) >> shift; |
|
118 |
15777696 |
coeffs++; |
|
119 |
} |
||
120 |
} |
||
121 |
} else { |
||
122 |
✓✓ | 168556 |
for (y = 0; y < size; y++) { |
123 |
✓✓ | 1376992 |
for (x = 0; x < size; x++) { |
124 |
1227008 |
*coeffs = *(uint16_t*)coeffs << -shift; |
|
125 |
1227008 |
coeffs++; |
|
126 |
} |
||
127 |
} |
||
128 |
} |
||
129 |
} |
||
130 |
|||
131 |
#define SET(dst, x) (dst) = (x) |
||
132 |
#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift) |
||
133 |
|||
134 |
#define TR_4x4_LUMA(dst, src, step, assign) \ |
||
135 |
do { \ |
||
136 |
int c0 = src[0 * step] + src[2 * step]; \ |
||
137 |
int c1 = src[2 * step] + src[3 * step]; \ |
||
138 |
int c2 = src[0 * step] - src[3 * step]; \ |
||
139 |
int c3 = 74 * src[1 * step]; \ |
||
140 |
\ |
||
141 |
assign(dst[2 * step], 74 * (src[0 * step] - \ |
||
142 |
src[2 * step] + \ |
||
143 |
src[3 * step])); \ |
||
144 |
assign(dst[0 * step], 29 * c0 + 55 * c1 + c3); \ |
||
145 |
assign(dst[1 * step], 55 * c2 - 29 * c1 + c3); \ |
||
146 |
assign(dst[3 * step], 55 * c0 + 29 * c2 - c3); \ |
||
147 |
} while (0) |
||
148 |
|||
149 |
7096862 |
static void FUNC(transform_4x4_luma)(int16_t *coeffs) |
|
150 |
{ |
||
151 |
int i; |
||
152 |
7096862 |
int shift = 7; |
|
153 |
7096862 |
int add = 1 << (shift - 1); |
|
154 |
7096862 |
int16_t *src = coeffs; |
|
155 |
|||
156 |
✓✓ | 35484310 |
for (i = 0; i < 4; i++) { |
157 |
28387448 |
TR_4x4_LUMA(src, src, 4, SCALE); |
|
158 |
28387448 |
src++; |
|
159 |
} |
||
160 |
|||
161 |
7096862 |
shift = 20 - BIT_DEPTH; |
|
162 |
7096862 |
add = 1 << (shift - 1); |
|
163 |
✓✓ | 35484310 |
for (i = 0; i < 4; i++) { |
164 |
28387448 |
TR_4x4_LUMA(coeffs, coeffs, 1, SCALE); |
|
165 |
28387448 |
coeffs += 4; |
|
166 |
} |
||
167 |
} |
||
168 |
|||
169 |
#undef TR_4x4_LUMA |
||
170 |
|||
171 |
#define TR_4(dst, src, dstep, sstep, assign, end) \ |
||
172 |
do { \ |
||
173 |
const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \ |
||
174 |
const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \ |
||
175 |
const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \ |
||
176 |
const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \ |
||
177 |
\ |
||
178 |
assign(dst[0 * dstep], e0 + o0); \ |
||
179 |
assign(dst[1 * dstep], e1 + o1); \ |
||
180 |
assign(dst[2 * dstep], e1 - o1); \ |
||
181 |
assign(dst[3 * dstep], e0 - o0); \ |
||
182 |
} while (0) |
||
183 |
|||
184 |
#define TR_8(dst, src, dstep, sstep, assign, end) \ |
||
185 |
do { \ |
||
186 |
int i, j; \ |
||
187 |
int e_8[4]; \ |
||
188 |
int o_8[4] = { 0 }; \ |
||
189 |
for (i = 0; i < 4; i++) \ |
||
190 |
for (j = 1; j < end; j += 2) \ |
||
191 |
o_8[i] += transform[4 * j][i] * src[j * sstep]; \ |
||
192 |
TR_4(e_8, src, 1, 2 * sstep, SET, 4); \ |
||
193 |
\ |
||
194 |
for (i = 0; i < 4; i++) { \ |
||
195 |
assign(dst[i * dstep], e_8[i] + o_8[i]); \ |
||
196 |
assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]); \ |
||
197 |
} \ |
||
198 |
} while (0) |
||
199 |
|||
200 |
#define TR_16(dst, src, dstep, sstep, assign, end) \ |
||
201 |
do { \ |
||
202 |
int i, j; \ |
||
203 |
int e_16[8]; \ |
||
204 |
int o_16[8] = { 0 }; \ |
||
205 |
for (i = 0; i < 8; i++) \ |
||
206 |
for (j = 1; j < end; j += 2) \ |
||
207 |
o_16[i] += transform[2 * j][i] * src[j * sstep]; \ |
||
208 |
TR_8(e_16, src, 1, 2 * sstep, SET, 8); \ |
||
209 |
\ |
||
210 |
for (i = 0; i < 8; i++) { \ |
||
211 |
assign(dst[i * dstep], e_16[i] + o_16[i]); \ |
||
212 |
assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]); \ |
||
213 |
} \ |
||
214 |
} while (0) |
||
215 |
|||
216 |
#define TR_32(dst, src, dstep, sstep, assign, end) \ |
||
217 |
do { \ |
||
218 |
int i, j; \ |
||
219 |
int e_32[16]; \ |
||
220 |
int o_32[16] = { 0 }; \ |
||
221 |
for (i = 0; i < 16; i++) \ |
||
222 |
for (j = 1; j < end; j += 2) \ |
||
223 |
o_32[i] += transform[j][i] * src[j * sstep]; \ |
||
224 |
TR_16(e_32, src, 1, 2 * sstep, SET, end / 2); \ |
||
225 |
\ |
||
226 |
for (i = 0; i < 16; i++) { \ |
||
227 |
assign(dst[i * dstep], e_32[i] + o_32[i]); \ |
||
228 |
assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]); \ |
||
229 |
} \ |
||
230 |
} while (0) |
||
231 |
|||
232 |
#define IDCT_VAR4(H) \ |
||
233 |
int limit2 = FFMIN(col_limit + 4, H) |
||
234 |
#define IDCT_VAR8(H) \ |
||
235 |
int limit = FFMIN(col_limit, H); \ |
||
236 |
int limit2 = FFMIN(col_limit + 4, H) |
||
237 |
#define IDCT_VAR16(H) IDCT_VAR8(H) |
||
238 |
#define IDCT_VAR32(H) IDCT_VAR8(H) |
||
239 |
|||
240 |
#define IDCT(H) \ |
||
241 |
static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs, \ |
||
242 |
int col_limit) \ |
||
243 |
{ \ |
||
244 |
int i; \ |
||
245 |
int shift = 7; \ |
||
246 |
int add = 1 << (shift - 1); \ |
||
247 |
int16_t *src = coeffs; \ |
||
248 |
IDCT_VAR ## H(H); \ |
||
249 |
\ |
||
250 |
for (i = 0; i < H; i++) { \ |
||
251 |
TR_ ## H(src, src, H, H, SCALE, limit2); \ |
||
252 |
if (limit2 < H && i%4 == 0 && !!i) \ |
||
253 |
limit2 -= 4; \ |
||
254 |
src++; \ |
||
255 |
} \ |
||
256 |
\ |
||
257 |
shift = 20 - BIT_DEPTH; \ |
||
258 |
add = 1 << (shift - 1); \ |
||
259 |
for (i = 0; i < H; i++) { \ |
||
260 |
TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit); \ |
||
261 |
coeffs += H; \ |
||
262 |
} \ |
||
263 |
} |
||
264 |
|||
265 |
#define IDCT_DC(H) \ |
||
266 |
static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs) \ |
||
267 |
{ \ |
||
268 |
int i, j; \ |
||
269 |
int shift = 14 - BIT_DEPTH; \ |
||
270 |
int add = 1 << (shift - 1); \ |
||
271 |
int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift; \ |
||
272 |
\ |
||
273 |
for (j = 0; j < H; j++) { \ |
||
274 |
for (i = 0; i < H; i++) { \ |
||
275 |
coeffs[i + j * H] = coeff; \ |
||
276 |
} \ |
||
277 |
} \ |
||
278 |
} |
||
279 |
|||
280 |
✗✓✗✗ ✗✗✓✓ ✓✓ |
47279016 |
IDCT( 4) |
281 |
✓✓✓✓ ✓✓✗✓ ✗✗✗✗ ✓✓✓✓ ✓✓✓✓ ✓✓ |
1031684809 |
IDCT( 8) |
282 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓ |
3201471182 |
IDCT(16) |
283 |
✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓✓✓ ✓✓ |
5741044080 |
IDCT(32) |
284 |
|||
285 |
✓✓✓✓ |
45543708 |
IDCT_DC( 4) |
286 |
✓✓✓✓ |
53429795 |
IDCT_DC( 8) |
287 |
✓✓✓✓ |
69366570 |
IDCT_DC(16) |
288 |
✓✓✓✓ |
81981977 |
IDCT_DC(32) |
289 |
|||
290 |
#undef TR_4 |
||
291 |
#undef TR_8 |
||
292 |
#undef TR_16 |
||
293 |
#undef TR_32 |
||
294 |
|||
295 |
#undef SET |
||
296 |
#undef SCALE |
||
297 |
|||
298 |
221694 |
static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src, |
|
299 |
ptrdiff_t stride_dst, ptrdiff_t stride_src, |
||
300 |
int16_t *sao_offset_val, int sao_left_class, |
||
301 |
int width, int height) |
||
302 |
{ |
||
303 |
221694 |
pixel *dst = (pixel *)_dst; |
|
304 |
221694 |
pixel *src = (pixel *)_src; |
|
305 |
221694 |
int offset_table[32] = { 0 }; |
|
306 |
int k, y, x; |
||
307 |
221694 |
int shift = BIT_DEPTH - 5; |
|
308 |
|||
309 |
221694 |
stride_dst /= sizeof(pixel); |
|
310 |
221694 |
stride_src /= sizeof(pixel); |
|
311 |
|||
312 |
✓✓ | 1108470 |
for (k = 0; k < 4; k++) |
313 |
886776 |
offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; |
|
314 |
✓✓ | 8551886 |
for (y = 0; y < height; y++) { |
315 |
✓✓ | 416079440 |
for (x = 0; x < width; x++) |
316 |
407749248 |
dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); |
|
317 |
8330192 |
dst += stride_dst; |
|
318 |
8330192 |
src += stride_src; |
|
319 |
} |
||
320 |
} |
||
321 |
|||
322 |
#define CMP(a, b) (((a) > (b)) - ((a) < (b))) |
||
323 |
|||
324 |
742604 |
static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, |
|
325 |
int eo, int width, int height) { |
||
326 |
|||
327 |
static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; |
||
328 |
static const int8_t pos[4][2][2] = { |
||
329 |
{ { -1, 0 }, { 1, 0 } }, // horizontal |
||
330 |
{ { 0, -1 }, { 0, 1 } }, // vertical |
||
331 |
{ { -1, -1 }, { 1, 1 } }, // 45 degree |
||
332 |
{ { 1, -1 }, { -1, 1 } }, // 135 degree |
||
333 |
}; |
||
334 |
742604 |
pixel *dst = (pixel *)_dst; |
|
335 |
742604 |
pixel *src = (pixel *)_src; |
|
336 |
int a_stride, b_stride; |
||
337 |
int x, y; |
||
338 |
742604 |
ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel); |
|
339 |
742604 |
stride_dst /= sizeof(pixel); |
|
340 |
|||
341 |
742604 |
a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; |
|
342 |
742604 |
b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; |
|
343 |
✓✓ | 39034556 |
for (y = 0; y < height; y++) { |
344 |
✓✓ | 2148834160 |
for (x = 0; x < width; x++) { |
345 |
2110542208 |
int diff0 = CMP(src[x], src[x + a_stride]); |
|
346 |
2110542208 |
int diff1 = CMP(src[x], src[x + b_stride]); |
|
347 |
2110542208 |
int offset_val = edge_idx[2 + diff0 + diff1]; |
|
348 |
2110542208 |
dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); |
|
349 |
} |
||
350 |
38291952 |
src += stride_src; |
|
351 |
38291952 |
dst += stride_dst; |
|
352 |
} |
||
353 |
} |
||
354 |
|||
355 |
710080 |
static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src, |
|
356 |
ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, |
||
357 |
int *borders, int _width, int _height, |
||
358 |
int c_idx, uint8_t *vert_edge, |
||
359 |
uint8_t *horiz_edge, uint8_t *diag_edge) |
||
360 |
{ |
||
361 |
int x, y; |
||
362 |
710080 |
pixel *dst = (pixel *)_dst; |
|
363 |
710080 |
pixel *src = (pixel *)_src; |
|
364 |
710080 |
int16_t *sao_offset_val = sao->offset_val[c_idx]; |
|
365 |
710080 |
int sao_eo_class = sao->eo_class[c_idx]; |
|
366 |
710080 |
int init_x = 0, width = _width, height = _height; |
|
367 |
|||
368 |
710080 |
stride_dst /= sizeof(pixel); |
|
369 |
710080 |
stride_src /= sizeof(pixel); |
|
370 |
|||
371 |
✓✓ | 710080 |
if (sao_eo_class != SAO_EO_VERT) { |
372 |
✓✓ | 556646 |
if (borders[0]) { |
373 |
26254 |
int offset_val = sao_offset_val[0]; |
|
374 |
✓✓ | 1327870 |
for (y = 0; y < height; y++) { |
375 |
1301616 |
dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); |
|
376 |
} |
||
377 |
26254 |
init_x = 1; |
|
378 |
} |
||
379 |
✓✓ | 556646 |
if (borders[2]) { |
380 |
26836 |
int offset_val = sao_offset_val[0]; |
|
381 |
26836 |
int offset = width - 1; |
|
382 |
✓✓ | 1332740 |
for (x = 0; x < height; x++) { |
383 |
1305904 |
dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); |
|
384 |
} |
||
385 |
26836 |
width--; |
|
386 |
} |
||
387 |
} |
||
388 |
✓✓ | 710080 |
if (sao_eo_class != SAO_EO_HORIZ) { |
389 |
✓✓ | 586416 |
if (borders[1]) { |
390 |
36540 |
int offset_val = sao_offset_val[0]; |
|
391 |
✓✓ | 1835782 |
for (x = init_x; x < width; x++) |
392 |
1799242 |
dst[x] = av_clip_pixel(src[x] + offset_val); |
|
393 |
} |
||
394 |
✓✓ | 586416 |
if (borders[3]) { |
395 |
46416 |
int offset_val = sao_offset_val[0]; |
|
396 |
46416 |
ptrdiff_t y_stride_dst = stride_dst * (height - 1); |
|
397 |
46416 |
ptrdiff_t y_stride_src = stride_src * (height - 1); |
|
398 |
✓✓ | 2260510 |
for (x = init_x; x < width; x++) |
399 |
2214094 |
dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); |
|
400 |
46416 |
height--; |
|
401 |
} |
||
402 |
} |
||
403 |
} |
||
404 |
|||
405 |
37316 |
static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src, |
|
406 |
ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao, |
||
407 |
int *borders, int _width, int _height, |
||
408 |
int c_idx, uint8_t *vert_edge, |
||
409 |
uint8_t *horiz_edge, uint8_t *diag_edge) |
||
410 |
{ |
||
411 |
int x, y; |
||
412 |
37316 |
pixel *dst = (pixel *)_dst; |
|
413 |
37316 |
pixel *src = (pixel *)_src; |
|
414 |
37316 |
int16_t *sao_offset_val = sao->offset_val[c_idx]; |
|
415 |
37316 |
int sao_eo_class = sao->eo_class[c_idx]; |
|
416 |
37316 |
int init_x = 0, init_y = 0, width = _width, height = _height; |
|
417 |
|||
418 |
37316 |
stride_dst /= sizeof(pixel); |
|
419 |
37316 |
stride_src /= sizeof(pixel); |
|
420 |
|||
421 |
✓✓ | 37316 |
if (sao_eo_class != SAO_EO_VERT) { |
422 |
✓✓ | 26094 |
if (borders[0]) { |
423 |
2504 |
int offset_val = sao_offset_val[0]; |
|
424 |
✓✓ | 125448 |
for (y = 0; y < height; y++) { |
425 |
122944 |
dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val); |
|
426 |
} |
||
427 |
2504 |
init_x = 1; |
|
428 |
} |
||
429 |
✓✓ | 26094 |
if (borders[2]) { |
430 |
1590 |
int offset_val = sao_offset_val[0]; |
|
431 |
1590 |
int offset = width - 1; |
|
432 |
✓✓ | 76198 |
for (x = 0; x < height; x++) { |
433 |
74608 |
dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val); |
|
434 |
} |
||
435 |
1590 |
width--; |
|
436 |
} |
||
437 |
} |
||
438 |
✓✓ | 37316 |
if (sao_eo_class != SAO_EO_HORIZ) { |
439 |
✓✓ | 26678 |
if (borders[1]) { |
440 |
3926 |
int offset_val = sao_offset_val[0]; |
|
441 |
✓✓ | 192650 |
for (x = init_x; x < width; x++) |
442 |
188724 |
dst[x] = av_clip_pixel(src[x] + offset_val); |
|
443 |
3926 |
init_y = 1; |
|
444 |
} |
||
445 |
✓✓ | 26678 |
if (borders[3]) { |
446 |
3168 |
int offset_val = sao_offset_val[0]; |
|
447 |
3168 |
ptrdiff_t y_stride_dst = stride_dst * (height - 1); |
|
448 |
3168 |
ptrdiff_t y_stride_src = stride_src * (height - 1); |
|
449 |
✓✓ | 150026 |
for (x = init_x; x < width; x++) |
450 |
146858 |
dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val); |
|
451 |
3168 |
height--; |
|
452 |
} |
||
453 |
} |
||
454 |
|||
455 |
{ |
||
456 |
✓✓✓✓ ✓✓✓✓ |
37316 |
int save_upper_left = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1]; |
457 |
✓✓✓✓ ✓✓✓✓ |
37316 |
int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D && !borders[1] && !borders[2]; |
458 |
✓✓✓✓ ✓✓✓✓ |
37316 |
int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3]; |
459 |
✓✓✓✓ ✓✓✓✓ |
37316 |
int save_lower_left = !diag_edge[3] && sao_eo_class == SAO_EO_45D && !borders[0] && !borders[3]; |
460 |
|||
461 |
// Restore pixels that can't be modified |
||
462 |
✓✓✓✓ |
37316 |
if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) { |
463 |
✓✓ | 2508 |
for(y = init_y+save_upper_left; y< height-save_lower_left; y++) |
464 |
2432 |
dst[y*stride_dst] = src[y*stride_src]; |
|
465 |
} |
||
466 |
✓✓✓✗ |
37316 |
if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) { |
467 |
✓✓ | 5808 |
for(y = init_y+save_upper_right; y< height-save_lower_right; y++) |
468 |
5624 |
dst[y*stride_dst+width-1] = src[y*stride_src+width-1]; |
|
469 |
} |
||
470 |
|||
471 |
✓✓✓✓ |
37316 |
if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) { |
472 |
✓✓ | 7632 |
for(x = init_x+save_upper_left; x < width-save_upper_right; x++) |
473 |
7400 |
dst[x] = src[x]; |
|
474 |
} |
||
475 |
✓✓✓✓ |
37316 |
if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) { |
476 |
✓✓ | 11844 |
for(x = init_x+save_lower_left; x < width-save_lower_right; x++) |
477 |
11484 |
dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x]; |
|
478 |
} |
||
479 |
✓✓✓✓ |
37316 |
if(diag_edge[0] && sao_eo_class == SAO_EO_135D) |
480 |
112 |
dst[0] = src[0]; |
|
481 |
✓✓✓✓ |
37316 |
if(diag_edge[1] && sao_eo_class == SAO_EO_45D) |
482 |
212 |
dst[width-1] = src[width-1]; |
|
483 |
✓✓✓✓ |
37316 |
if(diag_edge[2] && sao_eo_class == SAO_EO_135D) |
484 |
216 |
dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1]; |
|
485 |
✓✓✓✓ |
37316 |
if(diag_edge[3] && sao_eo_class == SAO_EO_45D) |
486 |
168 |
dst[stride_dst*(height-1)] = src[stride_src*(height-1)]; |
|
487 |
|||
488 |
} |
||
489 |
} |
||
490 |
|||
491 |
#undef CMP |
||
492 |
|||
493 |
//////////////////////////////////////////////////////////////////////////////// |
||
494 |
// |
||
495 |
//////////////////////////////////////////////////////////////////////////////// |
||
496 |
4751256 |
static void FUNC(put_hevc_pel_pixels)(int16_t *dst, |
|
497 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
498 |
int height, intptr_t mx, intptr_t my, int width) |
||
499 |
{ |
||
500 |
int x, y; |
||
501 |
4751256 |
pixel *src = (pixel *)_src; |
|
502 |
4751256 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
503 |
|||
504 |
✓✓ | 75375560 |
for (y = 0; y < height; y++) { |
505 |
✓✓ | 1867228720 |
for (x = 0; x < width; x++) |
506 |
1796604416 |
dst[x] = src[x] << (14 - BIT_DEPTH); |
|
507 |
70624304 |
src += srcstride; |
|
508 |
70624304 |
dst += MAX_PB_SIZE; |
|
509 |
} |
||
510 |
} |
||
511 |
|||
512 |
4773528 |
static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
513 |
int height, intptr_t mx, intptr_t my, int width) |
||
514 |
{ |
||
515 |
int y; |
||
516 |
4773528 |
pixel *src = (pixel *)_src; |
|
517 |
4773528 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
518 |
4773528 |
pixel *dst = (pixel *)_dst; |
|
519 |
4773528 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
520 |
|||
521 |
✓✓ | 55217536 |
for (y = 0; y < height; y++) { |
522 |
50444008 |
memcpy(dst, src, width * sizeof(pixel)); |
|
523 |
50444008 |
src += srcstride; |
|
524 |
50444008 |
dst += dststride; |
|
525 |
} |
||
526 |
} |
||
527 |
|||
528 |
5198024 |
static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
529 |
int16_t *src2, |
||
530 |
int height, intptr_t mx, intptr_t my, int width) |
||
531 |
{ |
||
532 |
int x, y; |
||
533 |
5198024 |
pixel *src = (pixel *)_src; |
|
534 |
5198024 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
535 |
5198024 |
pixel *dst = (pixel *)_dst; |
|
536 |
5198024 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
537 |
|||
538 |
5198024 |
int shift = 14 + 1 - BIT_DEPTH; |
|
539 |
#if BIT_DEPTH < 14 |
||
540 |
5198024 |
int offset = 1 << (shift - 1); |
|
541 |
#else |
||
542 |
int offset = 0; |
||
543 |
#endif |
||
544 |
|||
545 |
✓✓ | 81995176 |
for (y = 0; y < height; y++) { |
546 |
✓✓ | 2011815904 |
for (x = 0; x < width; x++) |
547 |
1935018752 |
dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift); |
|
548 |
76797152 |
src += srcstride; |
|
549 |
76797152 |
dst += dststride; |
|
550 |
76797152 |
src2 += MAX_PB_SIZE; |
|
551 |
} |
||
552 |
} |
||
553 |
|||
554 |
141742 |
static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
555 |
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
||
556 |
{ |
||
557 |
int x, y; |
||
558 |
141742 |
pixel *src = (pixel *)_src; |
|
559 |
141742 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
560 |
141742 |
pixel *dst = (pixel *)_dst; |
|
561 |
141742 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
562 |
141742 |
int shift = denom + 14 - BIT_DEPTH; |
|
563 |
#if BIT_DEPTH < 14 |
||
564 |
141742 |
int offset = 1 << (shift - 1); |
|
565 |
#else |
||
566 |
int offset = 0; |
||
567 |
#endif |
||
568 |
|||
569 |
141742 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
570 |
✓✓ | 3322246 |
for (y = 0; y < height; y++) { |
571 |
✓✓ | 111009656 |
for (x = 0; x < width; x++) |
572 |
107829152 |
dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox); |
|
573 |
3180504 |
src += srcstride; |
|
574 |
3180504 |
dst += dststride; |
|
575 |
} |
||
576 |
} |
||
577 |
|||
578 |
59942 |
static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
579 |
int16_t *src2, |
||
580 |
int height, int denom, int wx0, int wx1, |
||
581 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
582 |
{ |
||
583 |
int x, y; |
||
584 |
59942 |
pixel *src = (pixel *)_src; |
|
585 |
59942 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
586 |
59942 |
pixel *dst = (pixel *)_dst; |
|
587 |
59942 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
588 |
|||
589 |
59942 |
int shift = 14 + 1 - BIT_DEPTH; |
|
590 |
59942 |
int log2Wd = denom + shift - 1; |
|
591 |
|||
592 |
59942 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
593 |
59942 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
594 |
✓✓ | 1301790 |
for (y = 0; y < height; y++) { |
595 |
✓✓ | 41037816 |
for (x = 0; x < width; x++) { |
596 |
39795968 |
dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1)); |
|
597 |
} |
||
598 |
1241848 |
src += srcstride; |
|
599 |
1241848 |
dst += dststride; |
|
600 |
1241848 |
src2 += MAX_PB_SIZE; |
|
601 |
} |
||
602 |
} |
||
603 |
|||
604 |
//////////////////////////////////////////////////////////////////////////////// |
||
605 |
// |
||
606 |
//////////////////////////////////////////////////////////////////////////////// |
||
607 |
#define QPEL_FILTER(src, stride) \ |
||
608 |
(filter[0] * src[x - 3 * stride] + \ |
||
609 |
filter[1] * src[x - 2 * stride] + \ |
||
610 |
filter[2] * src[x - stride] + \ |
||
611 |
filter[3] * src[x ] + \ |
||
612 |
filter[4] * src[x + stride] + \ |
||
613 |
filter[5] * src[x + 2 * stride] + \ |
||
614 |
filter[6] * src[x + 3 * stride] + \ |
||
615 |
filter[7] * src[x + 4 * stride]) |
||
616 |
|||
617 |
1366966 |
static void FUNC(put_hevc_qpel_h)(int16_t *dst, |
|
618 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
619 |
int height, intptr_t mx, intptr_t my, int width) |
||
620 |
{ |
||
621 |
int x, y; |
||
622 |
1366966 |
pixel *src = (pixel*)_src; |
|
623 |
1366966 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
624 |
1366966 |
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
|
625 |
✓✓ | 30947430 |
for (y = 0; y < height; y++) { |
626 |
✓✓ | 1003044528 |
for (x = 0; x < width; x++) |
627 |
973464064 |
dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
628 |
29580464 |
src += srcstride; |
|
629 |
29580464 |
dst += MAX_PB_SIZE; |
|
630 |
} |
||
631 |
} |
||
632 |
|||
633 |
1070684 |
static void FUNC(put_hevc_qpel_v)(int16_t *dst, |
|
634 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
635 |
int height, intptr_t mx, intptr_t my, int width) |
||
636 |
{ |
||
637 |
int x, y; |
||
638 |
1070684 |
pixel *src = (pixel*)_src; |
|
639 |
1070684 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
640 |
1070684 |
const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
|
641 |
✓✓ | 24166420 |
for (y = 0; y < height; y++) { |
642 |
✓✓ | 781574328 |
for (x = 0; x < width; x++) |
643 |
758478592 |
dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8); |
|
644 |
23095736 |
src += srcstride; |
|
645 |
23095736 |
dst += MAX_PB_SIZE; |
|
646 |
} |
||
647 |
} |
||
648 |
|||
649 |
3342762 |
static void FUNC(put_hevc_qpel_hv)(int16_t *dst, |
|
650 |
uint8_t *_src, |
||
651 |
ptrdiff_t _srcstride, |
||
652 |
int height, intptr_t mx, |
||
653 |
intptr_t my, int width) |
||
654 |
{ |
||
655 |
int x, y; |
||
656 |
const int8_t *filter; |
||
657 |
3342762 |
pixel *src = (pixel*)_src; |
|
658 |
3342762 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
659 |
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
||
660 |
3342762 |
int16_t *tmp = tmp_array; |
|
661 |
|||
662 |
3342762 |
src -= QPEL_EXTRA_BEFORE * srcstride; |
|
663 |
3342762 |
filter = ff_hevc_qpel_filters[mx - 1]; |
|
664 |
✓✓ | 97108712 |
for (y = 0; y < height + QPEL_EXTRA; y++) { |
665 |
✓✓ | 2791874046 |
for (x = 0; x < width; x++) |
666 |
2698108096 |
tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
667 |
93765950 |
src += srcstride; |
|
668 |
93765950 |
tmp += MAX_PB_SIZE; |
|
669 |
} |
||
670 |
|||
671 |
3342762 |
tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
672 |
3342762 |
filter = ff_hevc_qpel_filters[my - 1]; |
|
673 |
✓✓ | 73709378 |
for (y = 0; y < height; y++) { |
674 |
✓✓ | 2281752728 |
for (x = 0; x < width; x++) |
675 |
2211386112 |
dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; |
|
676 |
70366616 |
tmp += MAX_PB_SIZE; |
|
677 |
70366616 |
dst += MAX_PB_SIZE; |
|
678 |
} |
||
679 |
} |
||
680 |
|||
681 |
1569058 |
static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, |
|
682 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
683 |
int height, intptr_t mx, intptr_t my, int width) |
||
684 |
{ |
||
685 |
int x, y; |
||
686 |
1569058 |
pixel *src = (pixel*)_src; |
|
687 |
1569058 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
688 |
1569058 |
pixel *dst = (pixel *)_dst; |
|
689 |
1569058 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
690 |
1569058 |
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
|
691 |
1569058 |
int shift = 14 - BIT_DEPTH; |
|
692 |
|||
693 |
#if BIT_DEPTH < 14 |
||
694 |
1569058 |
int offset = 1 << (shift - 1); |
|
695 |
#else |
||
696 |
int offset = 0; |
||
697 |
#endif |
||
698 |
|||
699 |
✓✓ | 24275466 |
for (y = 0; y < height; y++) { |
700 |
✓✓ | 516845160 |
for (x = 0; x < width; x++) |
701 |
494138752 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); |
|
702 |
22706408 |
src += srcstride; |
|
703 |
22706408 |
dst += dststride; |
|
704 |
} |
||
705 |
} |
||
706 |
|||
707 |
1287120 |
static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
708 |
int16_t *src2, |
||
709 |
int height, intptr_t mx, intptr_t my, int width) |
||
710 |
{ |
||
711 |
int x, y; |
||
712 |
1287120 |
pixel *src = (pixel*)_src; |
|
713 |
1287120 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
714 |
1287120 |
pixel *dst = (pixel *)_dst; |
|
715 |
1287120 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
716 |
|||
717 |
1287120 |
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
|
718 |
|||
719 |
1287120 |
int shift = 14 + 1 - BIT_DEPTH; |
|
720 |
#if BIT_DEPTH < 14 |
||
721 |
1287120 |
int offset = 1 << (shift - 1); |
|
722 |
#else |
||
723 |
int offset = 0; |
||
724 |
#endif |
||
725 |
|||
726 |
✓✓ | 29154688 |
for (y = 0; y < height; y++) { |
727 |
✓✓ | 943104560 |
for (x = 0; x < width; x++) |
728 |
915236992 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
|
729 |
27867568 |
src += srcstride; |
|
730 |
27867568 |
dst += dststride; |
|
731 |
27867568 |
src2 += MAX_PB_SIZE; |
|
732 |
} |
||
733 |
} |
||
734 |
|||
735 |
1370060 |
static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, |
|
736 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
737 |
int height, intptr_t mx, intptr_t my, int width) |
||
738 |
{ |
||
739 |
int x, y; |
||
740 |
1370060 |
pixel *src = (pixel*)_src; |
|
741 |
1370060 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
742 |
1370060 |
pixel *dst = (pixel *)_dst; |
|
743 |
1370060 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
744 |
1370060 |
const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
|
745 |
1370060 |
int shift = 14 - BIT_DEPTH; |
|
746 |
|||
747 |
#if BIT_DEPTH < 14 |
||
748 |
1370060 |
int offset = 1 << (shift - 1); |
|
749 |
#else |
||
750 |
int offset = 0; |
||
751 |
#endif |
||
752 |
|||
753 |
✓✓ | 21748060 |
for (y = 0; y < height; y++) { |
754 |
✓✓ | 494138576 |
for (x = 0; x < width; x++) |
755 |
473760576 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); |
|
756 |
20378000 |
src += srcstride; |
|
757 |
20378000 |
dst += dststride; |
|
758 |
} |
||
759 |
} |
||
760 |
|||
761 |
|||
762 |
1024644 |
static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
763 |
int16_t *src2, |
||
764 |
int height, intptr_t mx, intptr_t my, int width) |
||
765 |
{ |
||
766 |
int x, y; |
||
767 |
1024644 |
pixel *src = (pixel*)_src; |
|
768 |
1024644 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
769 |
1024644 |
pixel *dst = (pixel *)_dst; |
|
770 |
1024644 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
771 |
|||
772 |
1024644 |
const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
|
773 |
|||
774 |
1024644 |
int shift = 14 + 1 - BIT_DEPTH; |
|
775 |
#if BIT_DEPTH < 14 |
||
776 |
1024644 |
int offset = 1 << (shift - 1); |
|
777 |
#else |
||
778 |
int offset = 0; |
||
779 |
#endif |
||
780 |
|||
781 |
✓✓ | 22748020 |
for (y = 0; y < height; y++) { |
782 |
✓✓ | 719076080 |
for (x = 0; x < width; x++) |
783 |
697352704 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
|
784 |
21723376 |
src += srcstride; |
|
785 |
21723376 |
dst += dststride; |
|
786 |
21723376 |
src2 += MAX_PB_SIZE; |
|
787 |
} |
||
788 |
} |
||
789 |
|||
790 |
4325664 |
static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, |
|
791 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
792 |
int height, intptr_t mx, intptr_t my, int width) |
||
793 |
{ |
||
794 |
int x, y; |
||
795 |
const int8_t *filter; |
||
796 |
4325664 |
pixel *src = (pixel*)_src; |
|
797 |
4325664 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
798 |
4325664 |
pixel *dst = (pixel *)_dst; |
|
799 |
4325664 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
800 |
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
||
801 |
4325664 |
int16_t *tmp = tmp_array; |
|
802 |
4325664 |
int shift = 14 - BIT_DEPTH; |
|
803 |
|||
804 |
#if BIT_DEPTH < 14 |
||
805 |
4325664 |
int offset = 1 << (shift - 1); |
|
806 |
#else |
||
807 |
int offset = 0; |
||
808 |
#endif |
||
809 |
|||
810 |
4325664 |
src -= QPEL_EXTRA_BEFORE * srcstride; |
|
811 |
4325664 |
filter = ff_hevc_qpel_filters[mx - 1]; |
|
812 |
✓✓ | 97006464 |
for (y = 0; y < height + QPEL_EXTRA; y++) { |
813 |
✓✓ | 1834736080 |
for (x = 0; x < width; x++) |
814 |
1742055280 |
tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
815 |
92680800 |
src += srcstride; |
|
816 |
92680800 |
tmp += MAX_PB_SIZE; |
|
817 |
} |
||
818 |
|||
819 |
4325664 |
tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
820 |
4325664 |
filter = ff_hevc_qpel_filters[my - 1]; |
|
821 |
|||
822 |
✓✓ | 66726816 |
for (y = 0; y < height; y++) { |
823 |
✓✓ | 1377873856 |
for (x = 0; x < width; x++) |
824 |
1315472704 |
dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); |
|
825 |
62401152 |
tmp += MAX_PB_SIZE; |
|
826 |
62401152 |
dst += dststride; |
|
827 |
} |
||
828 |
} |
||
829 |
|||
830 |
3195454 |
static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
831 |
int16_t *src2, |
||
832 |
int height, intptr_t mx, intptr_t my, int width) |
||
833 |
{ |
||
834 |
int x, y; |
||
835 |
const int8_t *filter; |
||
836 |
3195454 |
pixel *src = (pixel*)_src; |
|
837 |
3195454 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
838 |
3195454 |
pixel *dst = (pixel *)_dst; |
|
839 |
3195454 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
840 |
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
||
841 |
3195454 |
int16_t *tmp = tmp_array; |
|
842 |
3195454 |
int shift = 14 + 1 - BIT_DEPTH; |
|
843 |
#if BIT_DEPTH < 14 |
||
844 |
3195454 |
int offset = 1 << (shift - 1); |
|
845 |
#else |
||
846 |
int offset = 0; |
||
847 |
#endif |
||
848 |
|||
849 |
3195454 |
src -= QPEL_EXTRA_BEFORE * srcstride; |
|
850 |
3195454 |
filter = ff_hevc_qpel_filters[mx - 1]; |
|
851 |
✓✓ | 93088504 |
for (y = 0; y < height + QPEL_EXTRA; y++) { |
852 |
✓✓ | 2698511922 |
for (x = 0; x < width; x++) |
853 |
2608618872 |
tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
854 |
89893050 |
src += srcstride; |
|
855 |
89893050 |
tmp += MAX_PB_SIZE; |
|
856 |
} |
||
857 |
|||
858 |
3195454 |
tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
859 |
3195454 |
filter = ff_hevc_qpel_filters[my - 1]; |
|
860 |
|||
861 |
✓✓ | 70720326 |
for (y = 0; y < height; y++) { |
862 |
✓✓ | 2208754696 |
for (x = 0; x < width; x++) |
863 |
2141229824 |
dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); |
|
864 |
67524872 |
tmp += MAX_PB_SIZE; |
|
865 |
67524872 |
dst += dststride; |
|
866 |
67524872 |
src2 += MAX_PB_SIZE; |
|
867 |
} |
||
868 |
} |
||
869 |
|||
870 |
32126 |
static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, |
|
871 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
872 |
int height, int denom, int wx, int ox, |
||
873 |
intptr_t mx, intptr_t my, int width) |
||
874 |
{ |
||
875 |
int x, y; |
||
876 |
32126 |
pixel *src = (pixel*)_src; |
|
877 |
32126 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
878 |
32126 |
pixel *dst = (pixel *)_dst; |
|
879 |
32126 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
880 |
32126 |
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
|
881 |
32126 |
int shift = denom + 14 - BIT_DEPTH; |
|
882 |
#if BIT_DEPTH < 14 |
||
883 |
32126 |
int offset = 1 << (shift - 1); |
|
884 |
#else |
||
885 |
int offset = 0; |
||
886 |
#endif |
||
887 |
|||
888 |
32126 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
889 |
✓✓ | 695854 |
for (y = 0; y < height; y++) { |
890 |
✓✓ | 21578160 |
for (x = 0; x < width; x++) |
891 |
20914432 |
dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
|
892 |
663728 |
src += srcstride; |
|
893 |
663728 |
dst += dststride; |
|
894 |
} |
||
895 |
} |
||
896 |
|||
897 |
18706 |
static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
898 |
int16_t *src2, |
||
899 |
int height, int denom, int wx0, int wx1, |
||
900 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
901 |
{ |
||
902 |
int x, y; |
||
903 |
18706 |
pixel *src = (pixel*)_src; |
|
904 |
18706 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
905 |
18706 |
pixel *dst = (pixel *)_dst; |
|
906 |
18706 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
907 |
|||
908 |
18706 |
const int8_t *filter = ff_hevc_qpel_filters[mx - 1]; |
|
909 |
|||
910 |
18706 |
int shift = 14 + 1 - BIT_DEPTH; |
|
911 |
18706 |
int log2Wd = denom + shift - 1; |
|
912 |
|||
913 |
18706 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
914 |
18706 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
915 |
✓✓ | 476402 |
for (y = 0; y < height; y++) { |
916 |
✓✓ | 16214496 |
for (x = 0; x < width; x++) |
917 |
15756800 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
|
918 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
919 |
457696 |
src += srcstride; |
|
920 |
457696 |
dst += dststride; |
|
921 |
457696 |
src2 += MAX_PB_SIZE; |
|
922 |
} |
||
923 |
} |
||
924 |
|||
925 |
28846 |
static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, |
|
926 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
927 |
int height, int denom, int wx, int ox, |
||
928 |
intptr_t mx, intptr_t my, int width) |
||
929 |
{ |
||
930 |
int x, y; |
||
931 |
28846 |
pixel *src = (pixel*)_src; |
|
932 |
28846 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
933 |
28846 |
pixel *dst = (pixel *)_dst; |
|
934 |
28846 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
935 |
28846 |
const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
|
936 |
28846 |
int shift = denom + 14 - BIT_DEPTH; |
|
937 |
#if BIT_DEPTH < 14 |
||
938 |
28846 |
int offset = 1 << (shift - 1); |
|
939 |
#else |
||
940 |
int offset = 0; |
||
941 |
#endif |
||
942 |
|||
943 |
28846 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
944 |
✓✓ | 567110 |
for (y = 0; y < height; y++) { |
945 |
✓✓ | 15491160 |
for (x = 0; x < width; x++) |
946 |
14952896 |
dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
|
947 |
538264 |
src += srcstride; |
|
948 |
538264 |
dst += dststride; |
|
949 |
} |
||
950 |
} |
||
951 |
|||
952 |
16344 |
static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
953 |
int16_t *src2, |
||
954 |
int height, int denom, int wx0, int wx1, |
||
955 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
956 |
{ |
||
957 |
int x, y; |
||
958 |
16344 |
pixel *src = (pixel*)_src; |
|
959 |
16344 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
960 |
16344 |
pixel *dst = (pixel *)_dst; |
|
961 |
16344 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
962 |
|||
963 |
16344 |
const int8_t *filter = ff_hevc_qpel_filters[my - 1]; |
|
964 |
|||
965 |
16344 |
int shift = 14 + 1 - BIT_DEPTH; |
|
966 |
16344 |
int log2Wd = denom + shift - 1; |
|
967 |
|||
968 |
16344 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
969 |
16344 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
970 |
✓✓ | 389488 |
for (y = 0; y < height; y++) { |
971 |
✓✓ | 12275608 |
for (x = 0; x < width; x++) |
972 |
11902464 |
dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
|
973 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
974 |
373144 |
src += srcstride; |
|
975 |
373144 |
dst += dststride; |
|
976 |
373144 |
src2 += MAX_PB_SIZE; |
|
977 |
} |
||
978 |
} |
||
979 |
|||
980 |
98774 |
static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, |
|
981 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
982 |
int height, int denom, int wx, int ox, |
||
983 |
intptr_t mx, intptr_t my, int width) |
||
984 |
{ |
||
985 |
int x, y; |
||
986 |
const int8_t *filter; |
||
987 |
98774 |
pixel *src = (pixel*)_src; |
|
988 |
98774 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
989 |
98774 |
pixel *dst = (pixel *)_dst; |
|
990 |
98774 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
991 |
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
||
992 |
98774 |
int16_t *tmp = tmp_array; |
|
993 |
98774 |
int shift = denom + 14 - BIT_DEPTH; |
|
994 |
#if BIT_DEPTH < 14 |
||
995 |
98774 |
int offset = 1 << (shift - 1); |
|
996 |
#else |
||
997 |
int offset = 0; |
||
998 |
#endif |
||
999 |
|||
1000 |
98774 |
src -= QPEL_EXTRA_BEFORE * srcstride; |
|
1001 |
98774 |
filter = ff_hevc_qpel_filters[mx - 1]; |
|
1002 |
✓✓ | 2715608 |
for (y = 0; y < height + QPEL_EXTRA; y++) { |
1003 |
✓✓ | 72914394 |
for (x = 0; x < width; x++) |
1004 |
70297560 |
tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1005 |
2616834 |
src += srcstride; |
|
1006 |
2616834 |
tmp += MAX_PB_SIZE; |
|
1007 |
} |
||
1008 |
|||
1009 |
98774 |
tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1010 |
98774 |
filter = ff_hevc_qpel_filters[my - 1]; |
|
1011 |
|||
1012 |
98774 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
1013 |
✓✓ | 2024190 |
for (y = 0; y < height; y++) { |
1014 |
✓✓ | 59379880 |
for (x = 0; x < width; x++) |
1015 |
57454464 |
dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
|
1016 |
1925416 |
tmp += MAX_PB_SIZE; |
|
1017 |
1925416 |
dst += dststride; |
|
1018 |
} |
||
1019 |
} |
||
1020 |
|||
1021 |
66938 |
static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1022 |
int16_t *src2, |
||
1023 |
int height, int denom, int wx0, int wx1, |
||
1024 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
1025 |
{ |
||
1026 |
int x, y; |
||
1027 |
const int8_t *filter; |
||
1028 |
66938 |
pixel *src = (pixel*)_src; |
|
1029 |
66938 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1030 |
66938 |
pixel *dst = (pixel *)_dst; |
|
1031 |
66938 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1032 |
int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE]; |
||
1033 |
66938 |
int16_t *tmp = tmp_array; |
|
1034 |
66938 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1035 |
66938 |
int log2Wd = denom + shift - 1; |
|
1036 |
|||
1037 |
66938 |
src -= QPEL_EXTRA_BEFORE * srcstride; |
|
1038 |
66938 |
filter = ff_hevc_qpel_filters[mx - 1]; |
|
1039 |
✓✓ | 2201120 |
for (y = 0; y < height + QPEL_EXTRA; y++) { |
1040 |
✓✓ | 71368198 |
for (x = 0; x < width; x++) |
1041 |
69234016 |
tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1042 |
2134182 |
src += srcstride; |
|
1043 |
2134182 |
tmp += MAX_PB_SIZE; |
|
1044 |
} |
||
1045 |
|||
1046 |
66938 |
tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1047 |
66938 |
filter = ff_hevc_qpel_filters[my - 1]; |
|
1048 |
|||
1049 |
66938 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
1050 |
66938 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
1051 |
✓✓ | 1732554 |
for (y = 0; y < height; y++) { |
1052 |
✓✓ | 59653712 |
for (x = 0; x < width; x++) |
1053 |
57988096 |
dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + |
|
1054 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
1055 |
1665616 |
tmp += MAX_PB_SIZE; |
|
1056 |
1665616 |
dst += dststride; |
|
1057 |
1665616 |
src2 += MAX_PB_SIZE; |
|
1058 |
} |
||
1059 |
} |
||
1060 |
|||
1061 |
//////////////////////////////////////////////////////////////////////////////// |
||
1062 |
// |
||
1063 |
//////////////////////////////////////////////////////////////////////////////// |
||
1064 |
#define EPEL_FILTER(src, stride) \ |
||
1065 |
(filter[0] * src[x - stride] + \ |
||
1066 |
filter[1] * src[x] + \ |
||
1067 |
filter[2] * src[x + stride] + \ |
||
1068 |
filter[3] * src[x + 2 * stride]) |
||
1069 |
|||
1070 |
2148368 |
static void FUNC(put_hevc_epel_h)(int16_t *dst, |
|
1071 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
1072 |
int height, intptr_t mx, intptr_t my, int width) |
||
1073 |
{ |
||
1074 |
int x, y; |
||
1075 |
2148368 |
pixel *src = (pixel *)_src; |
|
1076 |
2148368 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1077 |
2148368 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1078 |
✓✓ | 27059384 |
for (y = 0; y < height; y++) { |
1079 |
✓✓ | 427002216 |
for (x = 0; x < width; x++) |
1080 |
402091200 |
dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1081 |
24911016 |
src += srcstride; |
|
1082 |
24911016 |
dst += MAX_PB_SIZE; |
|
1083 |
} |
||
1084 |
} |
||
1085 |
|||
1086 |
1491732 |
static void FUNC(put_hevc_epel_v)(int16_t *dst, |
|
1087 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
1088 |
int height, intptr_t mx, intptr_t my, int width) |
||
1089 |
{ |
||
1090 |
int x, y; |
||
1091 |
1491732 |
pixel *src = (pixel *)_src; |
|
1092 |
1491732 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1093 |
1491732 |
const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
|
1094 |
|||
1095 |
✓✓ | 19681652 |
for (y = 0; y < height; y++) { |
1096 |
✓✓ | 326176096 |
for (x = 0; x < width; x++) |
1097 |
307986176 |
dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8); |
|
1098 |
18189920 |
src += srcstride; |
|
1099 |
18189920 |
dst += MAX_PB_SIZE; |
|
1100 |
} |
||
1101 |
} |
||
1102 |
|||
1103 |
8511196 |
static void FUNC(put_hevc_epel_hv)(int16_t *dst, |
|
1104 |
uint8_t *_src, ptrdiff_t _srcstride, |
||
1105 |
int height, intptr_t mx, intptr_t my, int width) |
||
1106 |
{ |
||
1107 |
int x, y; |
||
1108 |
8511196 |
pixel *src = (pixel *)_src; |
|
1109 |
8511196 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1110 |
8511196 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1111 |
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
||
1112 |
8511196 |
int16_t *tmp = tmp_array; |
|
1113 |
|||
1114 |
8511196 |
src -= EPEL_EXTRA_BEFORE * srcstride; |
|
1115 |
|||
1116 |
✓✓ | 129868592 |
for (y = 0; y < height + EPEL_EXTRA; y++) { |
1117 |
✓✓ | 1893303044 |
for (x = 0; x < width; x++) |
1118 |
1771945648 |
tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1119 |
121357396 |
src += srcstride; |
|
1120 |
121357396 |
tmp += MAX_PB_SIZE; |
|
1121 |
} |
||
1122 |
|||
1123 |
8511196 |
tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1124 |
8511196 |
filter = ff_hevc_epel_filters[my - 1]; |
|
1125 |
|||
1126 |
✓✓ | 104335004 |
for (y = 0; y < height; y++) { |
1127 |
✓✓ | 1598736128 |
for (x = 0; x < width; x++) |
1128 |
1502912320 |
dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6; |
|
1129 |
95823808 |
tmp += MAX_PB_SIZE; |
|
1130 |
95823808 |
dst += MAX_PB_SIZE; |
|
1131 |
} |
||
1132 |
} |
||
1133 |
|||
1134 |
2560764 |
static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1135 |
int height, intptr_t mx, intptr_t my, int width) |
||
1136 |
{ |
||
1137 |
int x, y; |
||
1138 |
2560764 |
pixel *src = (pixel *)_src; |
|
1139 |
2560764 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1140 |
2560764 |
pixel *dst = (pixel *)_dst; |
|
1141 |
2560764 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1142 |
2560764 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1143 |
2560764 |
int shift = 14 - BIT_DEPTH; |
|
1144 |
#if BIT_DEPTH < 14 |
||
1145 |
2560764 |
int offset = 1 << (shift - 1); |
|
1146 |
#else |
||
1147 |
int offset = 0; |
||
1148 |
#endif |
||
1149 |
|||
1150 |
✓✓ | 20986716 |
for (y = 0; y < height; y++) { |
1151 |
✓✓ | 213641216 |
for (x = 0; x < width; x++) |
1152 |
195215264 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); |
|
1153 |
18425952 |
src += srcstride; |
|
1154 |
18425952 |
dst += dststride; |
|
1155 |
} |
||
1156 |
} |
||
1157 |
|||
1158 |
2020436 |
static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1159 |
int16_t *src2, |
||
1160 |
int height, intptr_t mx, intptr_t my, int width) |
||
1161 |
{ |
||
1162 |
int x, y; |
||
1163 |
2020436 |
pixel *src = (pixel *)_src; |
|
1164 |
2020436 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1165 |
2020436 |
pixel *dst = (pixel *)_dst; |
|
1166 |
2020436 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1167 |
2020436 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1168 |
2020436 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1169 |
#if BIT_DEPTH < 14 |
||
1170 |
2020436 |
int offset = 1 << (shift - 1); |
|
1171 |
#else |
||
1172 |
int offset = 0; |
||
1173 |
#endif |
||
1174 |
|||
1175 |
✓✓ | 25062708 |
for (y = 0; y < height; y++) { |
1176 |
✓✓ | 396179552 |
for (x = 0; x < width; x++) { |
1177 |
373137280 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
|
1178 |
} |
||
1179 |
23042272 |
dst += dststride; |
|
1180 |
23042272 |
src += srcstride; |
|
1181 |
23042272 |
src2 += MAX_PB_SIZE; |
|
1182 |
} |
||
1183 |
} |
||
1184 |
|||
1185 |
2090324 |
static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1186 |
int height, intptr_t mx, intptr_t my, int width) |
||
1187 |
{ |
||
1188 |
int x, y; |
||
1189 |
2090324 |
pixel *src = (pixel *)_src; |
|
1190 |
2090324 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1191 |
2090324 |
pixel *dst = (pixel *)_dst; |
|
1192 |
2090324 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1193 |
2090324 |
const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
|
1194 |
2090324 |
int shift = 14 - BIT_DEPTH; |
|
1195 |
#if BIT_DEPTH < 14 |
||
1196 |
2090324 |
int offset = 1 << (shift - 1); |
|
1197 |
#else |
||
1198 |
int offset = 0; |
||
1199 |
#endif |
||
1200 |
|||
1201 |
✓✓ | 17676964 |
for (y = 0; y < height; y++) { |
1202 |
✓✓ | 193634128 |
for (x = 0; x < width; x++) |
1203 |
178047488 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift); |
|
1204 |
15586640 |
src += srcstride; |
|
1205 |
15586640 |
dst += dststride; |
|
1206 |
} |
||
1207 |
} |
||
1208 |
|||
1209 |
1447836 |
static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1210 |
int16_t *src2, |
||
1211 |
int height, intptr_t mx, intptr_t my, int width) |
||
1212 |
{ |
||
1213 |
int x, y; |
||
1214 |
1447836 |
pixel *src = (pixel *)_src; |
|
1215 |
1447836 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1216 |
1447836 |
const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
|
1217 |
1447836 |
pixel *dst = (pixel *)_dst; |
|
1218 |
1447836 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1219 |
1447836 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1220 |
#if BIT_DEPTH < 14 |
||
1221 |
1447836 |
int offset = 1 << (shift - 1); |
|
1222 |
#else |
||
1223 |
int offset = 0; |
||
1224 |
#endif |
||
1225 |
|||
1226 |
✓✓ | 18809476 |
for (y = 0; y < height; y++) { |
1227 |
✓✓ | 299869352 |
for (x = 0; x < width; x++) |
1228 |
282507712 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift); |
|
1229 |
17361640 |
dst += dststride; |
|
1230 |
17361640 |
src += srcstride; |
|
1231 |
17361640 |
src2 += MAX_PB_SIZE; |
|
1232 |
} |
||
1233 |
} |
||
1234 |
|||
1235 |
11433556 |
static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1236 |
int height, intptr_t mx, intptr_t my, int width) |
||
1237 |
{ |
||
1238 |
int x, y; |
||
1239 |
11433556 |
pixel *src = (pixel *)_src; |
|
1240 |
11433556 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1241 |
11433556 |
pixel *dst = (pixel *)_dst; |
|
1242 |
11433556 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1243 |
11433556 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1244 |
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
||
1245 |
11433556 |
int16_t *tmp = tmp_array; |
|
1246 |
11433556 |
int shift = 14 - BIT_DEPTH; |
|
1247 |
#if BIT_DEPTH < 14 |
||
1248 |
11433556 |
int offset = 1 << (shift - 1); |
|
1249 |
#else |
||
1250 |
int offset = 0; |
||
1251 |
#endif |
||
1252 |
|||
1253 |
11433556 |
src -= EPEL_EXTRA_BEFORE * srcstride; |
|
1254 |
|||
1255 |
✓✓ | 128971784 |
for (y = 0; y < height + EPEL_EXTRA; y++) { |
1256 |
✓✓ | 1244313364 |
for (x = 0; x < width; x++) |
1257 |
1126775136 |
tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1258 |
117538228 |
src += srcstride; |
|
1259 |
117538228 |
tmp += MAX_PB_SIZE; |
|
1260 |
} |
||
1261 |
|||
1262 |
11433556 |
tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1263 |
11433556 |
filter = ff_hevc_epel_filters[my - 1]; |
|
1264 |
|||
1265 |
✓✓ | 94671116 |
for (y = 0; y < height; y++) { |
1266 |
✓✓ | 967936792 |
for (x = 0; x < width; x++) |
1267 |
884699232 |
dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); |
|
1268 |
83237560 |
tmp += MAX_PB_SIZE; |
|
1269 |
83237560 |
dst += dststride; |
|
1270 |
} |
||
1271 |
} |
||
1272 |
|||
1273 |
8135608 |
static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1274 |
int16_t *src2, |
||
1275 |
int height, intptr_t mx, intptr_t my, int width) |
||
1276 |
{ |
||
1277 |
int x, y; |
||
1278 |
8135608 |
pixel *src = (pixel *)_src; |
|
1279 |
8135608 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1280 |
8135608 |
pixel *dst = (pixel *)_dst; |
|
1281 |
8135608 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1282 |
8135608 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1283 |
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
||
1284 |
8135608 |
int16_t *tmp = tmp_array; |
|
1285 |
8135608 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1286 |
#if BIT_DEPTH < 14 |
||
1287 |
8135608 |
int offset = 1 << (shift - 1); |
|
1288 |
#else |
||
1289 |
int offset = 0; |
||
1290 |
#endif |
||
1291 |
|||
1292 |
8135608 |
src -= EPEL_EXTRA_BEFORE * srcstride; |
|
1293 |
|||
1294 |
✓✓ | 124499000 |
for (y = 0; y < height + EPEL_EXTRA; y++) { |
1295 |
✓✓ | 1812337320 |
for (x = 0; x < width; x++) |
1296 |
1695973928 |
tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1297 |
116363392 |
src += srcstride; |
|
1298 |
116363392 |
tmp += MAX_PB_SIZE; |
|
1299 |
} |
||
1300 |
|||
1301 |
8135608 |
tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1302 |
8135608 |
filter = ff_hevc_epel_filters[my - 1]; |
|
1303 |
|||
1304 |
✓✓ | 100092176 |
for (y = 0; y < height; y++) { |
1305 |
✓✓ | 1530890520 |
for (x = 0; x < width; x++) |
1306 |
1438933952 |
dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift); |
|
1307 |
91956568 |
tmp += MAX_PB_SIZE; |
|
1308 |
91956568 |
dst += dststride; |
|
1309 |
91956568 |
src2 += MAX_PB_SIZE; |
|
1310 |
} |
||
1311 |
} |
||
1312 |
|||
1313 |
49364 |
static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1314 |
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
||
1315 |
{ |
||
1316 |
int x, y; |
||
1317 |
49364 |
pixel *src = (pixel *)_src; |
|
1318 |
49364 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1319 |
49364 |
pixel *dst = (pixel *)_dst; |
|
1320 |
49364 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1321 |
49364 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1322 |
49364 |
int shift = denom + 14 - BIT_DEPTH; |
|
1323 |
#if BIT_DEPTH < 14 |
||
1324 |
49364 |
int offset = 1 << (shift - 1); |
|
1325 |
#else |
||
1326 |
int offset = 0; |
||
1327 |
#endif |
||
1328 |
|||
1329 |
49364 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
1330 |
✓✓ | 596628 |
for (y = 0; y < height; y++) { |
1331 |
✓✓ | 9689504 |
for (x = 0; x < width; x++) { |
1332 |
9142240 |
dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
|
1333 |
} |
||
1334 |
547264 |
dst += dststride; |
|
1335 |
547264 |
src += srcstride; |
|
1336 |
} |
||
1337 |
} |
||
1338 |
|||
1339 |
28868 |
static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1340 |
int16_t *src2, |
||
1341 |
int height, int denom, int wx0, int wx1, |
||
1342 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
1343 |
{ |
||
1344 |
int x, y; |
||
1345 |
28868 |
pixel *src = (pixel *)_src; |
|
1346 |
28868 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1347 |
28868 |
pixel *dst = (pixel *)_dst; |
|
1348 |
28868 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1349 |
28868 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1350 |
28868 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1351 |
28868 |
int log2Wd = denom + shift - 1; |
|
1352 |
|||
1353 |
28868 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
1354 |
28868 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
1355 |
✓✓ | 396300 |
for (y = 0; y < height; y++) { |
1356 |
✓✓ | 6994248 |
for (x = 0; x < width; x++) |
1357 |
6626816 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
|
1358 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
1359 |
367432 |
src += srcstride; |
|
1360 |
367432 |
dst += dststride; |
|
1361 |
367432 |
src2 += MAX_PB_SIZE; |
|
1362 |
} |
||
1363 |
} |
||
1364 |
|||
1365 |
37788 |
static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1366 |
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
||
1367 |
{ |
||
1368 |
int x, y; |
||
1369 |
37788 |
pixel *src = (pixel *)_src; |
|
1370 |
37788 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1371 |
37788 |
pixel *dst = (pixel *)_dst; |
|
1372 |
37788 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1373 |
37788 |
const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
|
1374 |
37788 |
int shift = denom + 14 - BIT_DEPTH; |
|
1375 |
#if BIT_DEPTH < 14 |
||
1376 |
37788 |
int offset = 1 << (shift - 1); |
|
1377 |
#else |
||
1378 |
int offset = 0; |
||
1379 |
#endif |
||
1380 |
|||
1381 |
37788 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
1382 |
✓✓ | 411260 |
for (y = 0; y < height; y++) { |
1383 |
✓✓ | 5916800 |
for (x = 0; x < width; x++) { |
1384 |
5543328 |
dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); |
|
1385 |
} |
||
1386 |
373472 |
dst += dststride; |
|
1387 |
373472 |
src += srcstride; |
|
1388 |
} |
||
1389 |
} |
||
1390 |
|||
1391 |
20340 |
static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1392 |
int16_t *src2, |
||
1393 |
int height, int denom, int wx0, int wx1, |
||
1394 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
1395 |
{ |
||
1396 |
int x, y; |
||
1397 |
20340 |
pixel *src = (pixel *)_src; |
|
1398 |
20340 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1399 |
20340 |
const int8_t *filter = ff_hevc_epel_filters[my - 1]; |
|
1400 |
20340 |
pixel *dst = (pixel *)_dst; |
|
1401 |
20340 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1402 |
20340 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1403 |
20340 |
int log2Wd = denom + shift - 1; |
|
1404 |
|||
1405 |
20340 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
1406 |
20340 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
1407 |
✓✓ | 256100 |
for (y = 0; y < height; y++) { |
1408 |
✓✓ | 4095664 |
for (x = 0; x < width; x++) |
1409 |
3859904 |
dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + |
|
1410 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
1411 |
235760 |
src += srcstride; |
|
1412 |
235760 |
dst += dststride; |
|
1413 |
235760 |
src2 += MAX_PB_SIZE; |
|
1414 |
} |
||
1415 |
} |
||
1416 |
|||
1417 |
246988 |
static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1418 |
int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width) |
||
1419 |
{ |
||
1420 |
int x, y; |
||
1421 |
246988 |
pixel *src = (pixel *)_src; |
|
1422 |
246988 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1423 |
246988 |
pixel *dst = (pixel *)_dst; |
|
1424 |
246988 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1425 |
246988 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1426 |
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
||
1427 |
246988 |
int16_t *tmp = tmp_array; |
|
1428 |
246988 |
int shift = denom + 14 - BIT_DEPTH; |
|
1429 |
#if BIT_DEPTH < 14 |
||
1430 |
246988 |
int offset = 1 << (shift - 1); |
|
1431 |
#else |
||
1432 |
int offset = 0; |
||
1433 |
#endif |
||
1434 |
|||
1435 |
246988 |
src -= EPEL_EXTRA_BEFORE * srcstride; |
|
1436 |
|||
1437 |
✓✓ | 3328600 |
for (y = 0; y < height + EPEL_EXTRA; y++) { |
1438 |
✓✓ | 43597044 |
for (x = 0; x < width; x++) |
1439 |
40515432 |
tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1440 |
3081612 |
src += srcstride; |
|
1441 |
3081612 |
tmp += MAX_PB_SIZE; |
|
1442 |
} |
||
1443 |
|||
1444 |
246988 |
tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1445 |
246988 |
filter = ff_hevc_epel_filters[my - 1]; |
|
1446 |
|||
1447 |
246988 |
ox = ox * (1 << (BIT_DEPTH - 8)); |
|
1448 |
✓✓ | 2587636 |
for (y = 0; y < height; y++) { |
1449 |
✓✓ | 36176712 |
for (x = 0; x < width; x++) |
1450 |
33836064 |
dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); |
|
1451 |
2340648 |
tmp += MAX_PB_SIZE; |
|
1452 |
2340648 |
dst += dststride; |
|
1453 |
} |
||
1454 |
} |
||
1455 |
|||
1456 |
162704 |
static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, |
|
1457 |
int16_t *src2, |
||
1458 |
int height, int denom, int wx0, int wx1, |
||
1459 |
int ox0, int ox1, intptr_t mx, intptr_t my, int width) |
||
1460 |
{ |
||
1461 |
int x, y; |
||
1462 |
162704 |
pixel *src = (pixel *)_src; |
|
1463 |
162704 |
ptrdiff_t srcstride = _srcstride / sizeof(pixel); |
|
1464 |
162704 |
pixel *dst = (pixel *)_dst; |
|
1465 |
162704 |
ptrdiff_t dststride = _dststride / sizeof(pixel); |
|
1466 |
162704 |
const int8_t *filter = ff_hevc_epel_filters[mx - 1]; |
|
1467 |
int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE]; |
||
1468 |
162704 |
int16_t *tmp = tmp_array; |
|
1469 |
162704 |
int shift = 14 + 1 - BIT_DEPTH; |
|
1470 |
162704 |
int log2Wd = denom + shift - 1; |
|
1471 |
|||
1472 |
162704 |
src -= EPEL_EXTRA_BEFORE * srcstride; |
|
1473 |
|||
1474 |
✓✓ | 2627736 |
for (y = 0; y < height + EPEL_EXTRA; y++) { |
1475 |
✓✓ | 41743528 |
for (x = 0; x < width; x++) |
1476 |
39278496 |
tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8); |
|
1477 |
2465032 |
src += srcstride; |
|
1478 |
2465032 |
tmp += MAX_PB_SIZE; |
|
1479 |
} |
||
1480 |
|||
1481 |
162704 |
tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; |
|
1482 |
162704 |
filter = ff_hevc_epel_filters[my - 1]; |
|
1483 |
|||
1484 |
162704 |
ox0 = ox0 * (1 << (BIT_DEPTH - 8)); |
|
1485 |
162704 |
ox1 = ox1 * (1 << (BIT_DEPTH - 8)); |
|
1486 |
✓✓ | 2139624 |
for (y = 0; y < height; y++) { |
1487 |
✓✓ | 35552536 |
for (x = 0; x < width; x++) |
1488 |
33575616 |
dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + |
|
1489 |
((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); |
||
1490 |
1976920 |
tmp += MAX_PB_SIZE; |
|
1491 |
1976920 |
dst += dststride; |
|
1492 |
1976920 |
src2 += MAX_PB_SIZE; |
|
1493 |
} |
||
1494 |
} |
||
1495 |
|||
1496 |
// line zero |
||
1497 |
#define P3 pix[-4 * xstride] |
||
1498 |
#define P2 pix[-3 * xstride] |
||
1499 |
#define P1 pix[-2 * xstride] |
||
1500 |
#define P0 pix[-1 * xstride] |
||
1501 |
#define Q0 pix[0 * xstride] |
||
1502 |
#define Q1 pix[1 * xstride] |
||
1503 |
#define Q2 pix[2 * xstride] |
||
1504 |
#define Q3 pix[3 * xstride] |
||
1505 |
|||
1506 |
// line three. used only for deblocking decision |
||
1507 |
#define TP3 pix[-4 * xstride + 3 * ystride] |
||
1508 |
#define TP2 pix[-3 * xstride + 3 * ystride] |
||
1509 |
#define TP1 pix[-2 * xstride + 3 * ystride] |
||
1510 |
#define TP0 pix[-1 * xstride + 3 * ystride] |
||
1511 |
#define TQ0 pix[0 * xstride + 3 * ystride] |
||
1512 |
#define TQ1 pix[1 * xstride + 3 * ystride] |
||
1513 |
#define TQ2 pix[2 * xstride + 3 * ystride] |
||
1514 |
#define TQ3 pix[3 * xstride + 3 * ystride] |
||
1515 |
|||
1516 |
69248556 |
static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix, |
|
1517 |
ptrdiff_t _xstride, ptrdiff_t _ystride, |
||
1518 |
int beta, int *_tc, |
||
1519 |
uint8_t *_no_p, uint8_t *_no_q) |
||
1520 |
{ |
||
1521 |
int d, j; |
||
1522 |
69248556 |
pixel *pix = (pixel *)_pix; |
|
1523 |
69248556 |
ptrdiff_t xstride = _xstride / sizeof(pixel); |
|
1524 |
69248556 |
ptrdiff_t ystride = _ystride / sizeof(pixel); |
|
1525 |
|||
1526 |
69248556 |
beta <<= BIT_DEPTH - 8; |
|
1527 |
|||
1528 |
✓✓ | 207745668 |
for (j = 0; j < 2; j++) { |
1529 |
138497112 |
const int dp0 = abs(P2 - 2 * P1 + P0); |
|
1530 |
138497112 |
const int dq0 = abs(Q2 - 2 * Q1 + Q0); |
|
1531 |
138497112 |
const int dp3 = abs(TP2 - 2 * TP1 + TP0); |
|
1532 |
138497112 |
const int dq3 = abs(TQ2 - 2 * TQ1 + TQ0); |
|
1533 |
138497112 |
const int d0 = dp0 + dq0; |
|
1534 |
138497112 |
const int d3 = dp3 + dq3; |
|
1535 |
138497112 |
const int tc = _tc[j] << (BIT_DEPTH - 8); |
|
1536 |
138497112 |
const int no_p = _no_p[j]; |
|
1537 |
138497112 |
const int no_q = _no_q[j]; |
|
1538 |
|||
1539 |
✓✓ | 138497112 |
if (d0 + d3 >= beta) { |
1540 |
35885660 |
pix += 4 * ystride; |
|
1541 |
35885660 |
continue; |
|
1542 |
} else { |
||
1543 |
102611452 |
const int beta_3 = beta >> 3; |
|
1544 |
102611452 |
const int beta_2 = beta >> 2; |
|
1545 |
102611452 |
const int tc25 = ((tc * 5 + 1) >> 1); |
|
1546 |
|||
1547 |
✓✓✓✓ |
102611452 |
if (abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 && |
1548 |
✓✓✓✓ |
28056778 |
abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 && |
1549 |
✓✓✓✓ |
40476538 |
(d0 << 1) < beta_2 && (d3 << 1) < beta_2) { |
1550 |
// strong filtering |
||
1551 |
19666922 |
const int tc2 = tc << 1; |
|
1552 |
✓✓ | 98334610 |
for (d = 0; d < 4; d++) { |
1553 |
78667688 |
const int p3 = P3; |
|
1554 |
78667688 |
const int p2 = P2; |
|
1555 |
78667688 |
const int p1 = P1; |
|
1556 |
78667688 |
const int p0 = P0; |
|
1557 |
78667688 |
const int q0 = Q0; |
|
1558 |
78667688 |
const int q1 = Q1; |
|
1559 |
78667688 |
const int q2 = Q2; |
|
1560 |
78667688 |
const int q3 = Q3; |
|
1561 |
✓✓ | 78667688 |
if (!no_p) { |
1562 |
78656600 |
P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2); |
|
1563 |
78656600 |
P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2); |
|
1564 |
78656600 |
P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2); |
|
1565 |
} |
||
1566 |
✓✓ | 78667688 |
if (!no_q) { |
1567 |
78656344 |
Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2); |
|
1568 |
78656344 |
Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2); |
|
1569 |
78656344 |
Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2); |
|
1570 |
} |
||
1571 |
78667688 |
pix += ystride; |
|
1572 |
} |
||
1573 |
} else { // normal filtering |
||
1574 |
82944530 |
int nd_p = 1; |
|
1575 |
82944530 |
int nd_q = 1; |
|
1576 |
82944530 |
const int tc_2 = tc >> 1; |
|
1577 |
✓✓ | 82944530 |
if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3)) |
1578 |
58298980 |
nd_p = 2; |
|
1579 |
✓✓ | 82944530 |
if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3)) |
1580 |
56755326 |
nd_q = 2; |
|
1581 |
|||
1582 |
✓✓ | 414722650 |
for (d = 0; d < 4; d++) { |
1583 |
331778120 |
const int p2 = P2; |
|
1584 |
331778120 |
const int p1 = P1; |
|
1585 |
331778120 |
const int p0 = P0; |
|
1586 |
331778120 |
const int q0 = Q0; |
|
1587 |
331778120 |
const int q1 = Q1; |
|
1588 |
331778120 |
const int q2 = Q2; |
|
1589 |
331778120 |
int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4; |
|
1590 |
✓✓ | 331778120 |
if (abs(delta0) < 10 * tc) { |
1591 |
328105740 |
delta0 = av_clip(delta0, -tc, tc); |
|
1592 |
✓✓ | 328105740 |
if (!no_p) |
1593 |
327475106 |
P0 = av_clip_pixel(p0 + delta0); |
|
1594 |
✓✓ | 328105740 |
if (!no_q) |
1595 |
327471244 |
Q0 = av_clip_pixel(q0 - delta0); |
|
1596 |
✓✓✓✓ |
328105740 |
if (!no_p && nd_p > 1) { |
1597 |
230806682 |
const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2); |
|
1598 |
230806682 |
P1 = av_clip_pixel(p1 + deltap1); |
|
1599 |
} |
||
1600 |
✓✓✓✓ |
328105740 |
if (!no_q && nd_q > 1) { |
1601 |
224675056 |
const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2); |
|
1602 |
224675056 |
Q1 = av_clip_pixel(q1 + deltaq1); |
|
1603 |
} |
||
1604 |
} |
||
1605 |
331778120 |
pix += ystride; |
|
1606 |
} |
||
1607 |
} |
||
1608 |
} |
||
1609 |
} |
||
1610 |
} |
||
1611 |
|||
1612 |
21716640 |
static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride, |
|
1613 |
ptrdiff_t _ystride, int *_tc, |
||
1614 |
uint8_t *_no_p, uint8_t *_no_q) |
||
1615 |
{ |
||
1616 |
int d, j, no_p, no_q; |
||
1617 |
21716640 |
pixel *pix = (pixel *)_pix; |
|
1618 |
21716640 |
ptrdiff_t xstride = _xstride / sizeof(pixel); |
|
1619 |
21716640 |
ptrdiff_t ystride = _ystride / sizeof(pixel); |
|
1620 |
|||
1621 |
✓✓ | 65149920 |
for (j = 0; j < 2; j++) { |
1622 |
43433280 |
const int tc = _tc[j] << (BIT_DEPTH - 8); |
|
1623 |
✓✓ | 43433280 |
if (tc <= 0) { |
1624 |
4548810 |
pix += 4 * ystride; |
|
1625 |
4548810 |
continue; |
|
1626 |
} |
||
1627 |
38884470 |
no_p = _no_p[j]; |
|
1628 |
38884470 |
no_q = _no_q[j]; |
|
1629 |
|||
1630 |
✓✓ | 194422350 |
for (d = 0; d < 4; d++) { |
1631 |
int delta0; |
||
1632 |
155537880 |
const int p1 = P1; |
|
1633 |
155537880 |
const int p0 = P0; |
|
1634 |
155537880 |
const int q0 = Q0; |
|
1635 |
155537880 |
const int q1 = Q1; |
|
1636 |
155537880 |
delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc); |
|
1637 |
✓✓ | 155537880 |
if (!no_p) |
1638 |
155012696 |
P0 = av_clip_pixel(p0 + delta0); |
|
1639 |
✓✓ | 155537880 |
if (!no_q) |
1640 |
155004984 |
Q0 = av_clip_pixel(q0 - delta0); |
|
1641 |
155537880 |
pix += ystride; |
|
1642 |
} |
||
1643 |
} |
||
1644 |
} |
||
1645 |
|||
1646 |
10573420 |
static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, |
|
1647 |
int32_t *tc, uint8_t *no_p, |
||
1648 |
uint8_t *no_q) |
||
1649 |
{ |
||
1650 |
10573420 |
FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q); |
|
1651 |
} |
||
1652 |
|||
1653 |
11143220 |
static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, |
|
1654 |
int32_t *tc, uint8_t *no_p, |
||
1655 |
uint8_t *no_q) |
||
1656 |
{ |
||
1657 |
11143220 |
FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q); |
|
1658 |
} |
||
1659 |
|||
1660 |
34958030 |
static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, |
|
1661 |
int beta, int32_t *tc, uint8_t *no_p, |
||
1662 |
uint8_t *no_q) |
||
1663 |
{ |
||
1664 |
34958030 |
FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel), |
|
1665 |
beta, tc, no_p, no_q); |
||
1666 |
} |
||
1667 |
|||
1668 |
34290526 |
static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, |
|
1669 |
int beta, int32_t *tc, uint8_t *no_p, |
||
1670 |
uint8_t *no_q) |
||
1671 |
{ |
||
1672 |
34290526 |
FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride, |
|
1673 |
beta, tc, no_p, no_q); |
||
1674 |
} |
||
1675 |
|||
1676 |
#undef P3 |
||
1677 |
#undef P2 |
||
1678 |
#undef P1 |
||
1679 |
#undef P0 |
||
1680 |
#undef Q0 |
||
1681 |
#undef Q1 |
||
1682 |
#undef Q2 |
||
1683 |
#undef Q3 |
||
1684 |
|||
1685 |
#undef TP3 |
||
1686 |
#undef TP2 |
||
1687 |
#undef TP1 |
||
1688 |
#undef TP0 |
||
1689 |
#undef TQ0 |
||
1690 |
#undef TQ1 |
||
1691 |
#undef TQ2 |
||
1692 |
#undef TQ3 |
Generated by: GCOVR (Version 4.2) |