Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * inter prediction template for HEVC/VVC | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * Copyright (C) 2024 Wu Jianhua | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | #define CHROMA_EXTRA_BEFORE 1 | ||
25 | #define CHROMA_EXTRA 3 | ||
26 | #define LUMA_EXTRA_BEFORE 3 | ||
27 | #define LUMA_EXTRA 7 | ||
28 | |||
29 | 12962068 | static void FUNC(put_pixels)(int16_t *dst, | |
30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
32 | { | ||
33 | 12962068 | const pixel *src = (const pixel *)_src; | |
34 | 12962068 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
35 | |||
36 |
2/2✓ Branch 0 taken 82672524 times.
✓ Branch 1 taken 6481034 times.
|
178307116 | for (int y = 0; y < height; y++) { |
37 |
2/2✓ Branch 0 taken 1670069736 times.
✓ Branch 1 taken 82672524 times.
|
3505484520 | for (int x = 0; x < width; x++) |
38 | 3340139472 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
39 | 165345048 | src += src_stride; | |
40 | 165345048 | dst += MAX_PB_SIZE; | |
41 | } | ||
42 | 12962068 | } | |
43 | |||
44 | 7319326 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
46 | const int8_t *hf, const int8_t *vf, const int width) | ||
47 | { | ||
48 | 7319326 | const pixel *src = (const pixel *)_src; | |
49 | 7319326 | pixel *dst = (pixel *)_dst; | |
50 | 7319326 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
51 | 7319326 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
52 | |||
53 |
2/2✓ Branch 0 taken 33892578 times.
✓ Branch 1 taken 3659663 times.
|
75104482 | for (int y = 0; y < height; y++) { |
54 | 67785156 | memcpy(dst, src, width * sizeof(pixel)); | |
55 | 67785156 | src += src_stride; | |
56 | 67785156 | dst += dst_stride; | |
57 | } | ||
58 | 7319326 | } | |
59 | |||
60 | 149460 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
63 | const int width) | ||
64 | { | ||
65 | 149460 | const pixel *src = (const pixel *)_src; | |
66 | 149460 | pixel *dst = (pixel *)_dst; | |
67 | 149460 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
68 | 149460 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
69 | 149460 | const int shift = denom + 14 - BIT_DEPTH; | |
70 | #if BIT_DEPTH < 14 | ||
71 | 149460 | const int offset = 1 << (shift - 1); | |
72 | #else | ||
73 | const int offset = 0; | ||
74 | #endif | ||
75 | 149460 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
76 | |||
77 |
2/2✓ Branch 0 taken 1660344 times.
✓ Branch 1 taken 74730 times.
|
3470148 | for (int y = 0; y < height; y++) { |
78 |
2/2✓ Branch 0 taken 56107752 times.
✓ Branch 1 taken 1660344 times.
|
115536192 | for (int x = 0; x < width; x++) { |
79 | 112215504 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
80 | 112215504 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
81 | } | ||
82 | 3320688 | src += src_stride; | |
83 | 3320688 | dst += dst_stride; | |
84 | } | ||
85 | 149460 | } | |
86 | |||
87 | #define LUMA_FILTER(src, stride) \ | ||
88 | (filter[0] * src[x - 3 * stride] + \ | ||
89 | filter[1] * src[x - 2 * stride] + \ | ||
90 | filter[2] * src[x - stride] + \ | ||
91 | filter[3] * src[x ] + \ | ||
92 | filter[4] * src[x + stride] + \ | ||
93 | filter[5] * src[x + 2 * stride] + \ | ||
94 | filter[6] * src[x + 3 * stride] + \ | ||
95 | filter[7] * src[x + 4 * stride]) | ||
96 | |||
97 | 2464916 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
99 | { | ||
100 | 2464916 | const pixel *src = (const pixel*)_src; | |
101 | 2464916 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
102 | 2464916 | const int8_t *filter = hf; | |
103 | |||
104 |
2/2✓ Branch 0 taken 19706604 times.
✓ Branch 1 taken 1232458 times.
|
41878124 | for (int y = 0; y < height; y++) { |
105 |
2/2✓ Branch 0 taken 562931848 times.
✓ Branch 1 taken 19706604 times.
|
1165276904 | for (int x = 0; x < width; x++) |
106 | 1125863696 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
107 | 39413208 | src += src_stride; | |
108 | 39413208 | dst += MAX_PB_SIZE; | |
109 | } | ||
110 | 2464916 | } | |
111 | |||
112 | 2441116 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
114 | { | ||
115 | 2441116 | const pixel *src = (pixel*)_src; | |
116 | 2441116 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
117 | 2441116 | const int8_t *filter = vf; | |
118 | |||
119 |
2/2✓ Branch 0 taken 17862932 times.
✓ Branch 1 taken 1220558 times.
|
38166980 | for (int y = 0; y < height; y++) { |
120 |
2/2✓ Branch 0 taken 475306216 times.
✓ Branch 1 taken 17862932 times.
|
986338296 | for (int x = 0; x < width; x++) |
121 | 950612432 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
122 | 35725864 | src += src_stride; | |
123 | 35725864 | dst += MAX_PB_SIZE; | |
124 | } | ||
125 | 2441116 | } | |
126 | |||
127 | 11530228 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
129 | { | ||
130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
131 | 11530228 | int16_t *tmp = tmp_array; | |
132 | 11530228 | const pixel *src = (const pixel*)_src; | |
133 | 11530228 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
134 | 11530228 | const int8_t *filter = hf; | |
135 | |||
136 | 11530228 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
137 |
2/2✓ Branch 0 taken 103597410 times.
✓ Branch 1 taken 5765114 times.
|
218725048 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 |
2/2✓ Branch 0 taken 1857180064 times.
✓ Branch 1 taken 103597410 times.
|
3921554948 | for (int x = 0; x < width; x++) |
139 | 3714360128 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
140 | 207194820 | src += src_stride; | |
141 | 207194820 | tmp += MAX_PB_SIZE; | |
142 | } | ||
143 | |||
144 | 11530228 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
145 | 11530228 | filter = vf; | |
146 |
2/2✓ Branch 0 taken 63241612 times.
✓ Branch 1 taken 5765114 times.
|
138013452 | for (int y = 0; y < height; y++) { |
147 |
2/2✓ Branch 0 taken 1418355832 times.
✓ Branch 1 taken 63241612 times.
|
2963194888 | for (int x = 0; x < width; x++) |
148 | 2836711664 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
149 | 126483224 | tmp += MAX_PB_SIZE; | |
150 | 126483224 | dst += MAX_PB_SIZE; | |
151 | } | ||
152 | 11530228 | } | |
153 | |||
154 | 1668234 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
157 | { | ||
158 | 1668234 | const pixel *src = (const pixel*)_src; | |
159 | 1668234 | pixel *dst = (pixel *)_dst; | |
160 | 1668234 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
161 | 1668234 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
162 | 1668234 | const int8_t *filter = hf; | |
163 | 1668234 | const int shift = 14 - BIT_DEPTH; | |
164 | #if BIT_DEPTH < 14 | ||
165 | 1668234 | const int offset = 1 << (shift - 1); | |
166 | #else | ||
167 | const int offset = 0; | ||
168 | #endif | ||
169 | |||
170 |
2/2✓ Branch 0 taken 11928208 times.
✓ Branch 1 taken 834117 times.
|
25524650 | for (int y = 0; y < height; y++) { |
171 |
2/2✓ Branch 0 taken 259479928 times.
✓ Branch 1 taken 11928208 times.
|
542816272 | for (int x = 0; x < width; x++) { |
172 | 518959856 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
173 | 518959856 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
174 | } | ||
175 | 23856416 | src += src_stride; | |
176 | 23856416 | dst += dst_stride; | |
177 | } | ||
178 | 1668234 | } | |
179 | |||
180 | 1489564 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
183 | { | ||
184 | |||
185 | 1489564 | const pixel *src = (const pixel*)_src; | |
186 | 1489564 | pixel *dst = (pixel *)_dst; | |
187 | 1489564 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
188 | 1489564 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
189 | 1489564 | const int8_t *filter = vf; | |
190 | 1489564 | const int shift = 14 - BIT_DEPTH; | |
191 | #if BIT_DEPTH < 14 | ||
192 | 1489564 | const int offset = 1 << (shift - 1); | |
193 | #else | ||
194 | const int offset = 0; | ||
195 | #endif | ||
196 | |||
197 |
2/2✓ Branch 0 taken 10851004 times.
✓ Branch 1 taken 744782 times.
|
23191572 | for (int y = 0; y < height; y++) { |
198 |
2/2✓ Branch 0 taken 250826648 times.
✓ Branch 1 taken 10851004 times.
|
523355304 | for (int x = 0; x < width; x++) { |
199 | 501653296 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
200 | 501653296 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
201 | } | ||
202 | 21702008 | src += src_stride; | |
203 | 21702008 | dst += dst_stride; | |
204 | } | ||
205 | 1489564 | } | |
206 | |||
207 | 4694436 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
210 | { | ||
211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
212 | 4694436 | int16_t *tmp = tmp_array; | |
213 | 4694436 | const pixel *src = (const pixel*)_src; | |
214 | 4694436 | pixel *dst = (pixel *)_dst; | |
215 | 4694436 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
216 | 4694436 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
217 | 4694436 | const int8_t *filter = hf; | |
218 | 4694436 | const int shift = 14 - BIT_DEPTH; | |
219 | #if BIT_DEPTH < 14 | ||
220 | 4694436 | const int offset = 1 << (shift - 1); | |
221 | #else | ||
222 | const int offset = 0; | ||
223 | #endif | ||
224 | |||
225 | 4694436 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
226 |
2/2✓ Branch 0 taken 49650974 times.
✓ Branch 1 taken 2347218 times.
|
103996384 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 |
2/2✓ Branch 0 taken 919926312 times.
✓ Branch 1 taken 49650974 times.
|
1939154572 | for (int x = 0; x < width; x++) |
228 | 1839852624 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
229 | 99301948 | src += src_stride; | |
230 | 99301948 | tmp += MAX_PB_SIZE; | |
231 | } | ||
232 | |||
233 | 4694436 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
234 | 4694436 | filter = vf; | |
235 | |||
236 |
2/2✓ Branch 0 taken 33220448 times.
✓ Branch 1 taken 2347218 times.
|
71135332 | for (int y = 0; y < height; y++) { |
237 |
2/2✓ Branch 0 taken 693101784 times.
✓ Branch 1 taken 33220448 times.
|
1452644464 | for (int x = 0; x < width; x++) { |
238 | 1386203568 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
239 | 1386203568 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
240 | } | ||
241 | 66440896 | tmp += MAX_PB_SIZE; | |
242 | 66440896 | dst += dst_stride; | |
243 | } | ||
244 | |||
245 | 4694436 | } | |
246 | |||
247 | 37764 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
250 | const int width) | ||
251 | { | ||
252 | 37764 | const pixel *src = (const pixel*)_src; | |
253 | 37764 | pixel *dst = (pixel *)_dst; | |
254 | 37764 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
255 | 37764 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
256 | 37764 | const int8_t *filter = hf; | |
257 | 37764 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
258 | 37764 | const int shift = denom + 14 - BIT_DEPTH; | |
259 | #if BIT_DEPTH < 14 | ||
260 | 37764 | const int offset = 1 << (shift - 1); | |
261 | #else | ||
262 | const int offset = 0; | ||
263 | #endif | ||
264 | |||
265 |
2/2✓ Branch 0 taken 390972 times.
✓ Branch 1 taken 18882 times.
|
819708 | for (int y = 0; y < height; y++) { |
266 |
2/2✓ Branch 0 taken 12549360 times.
✓ Branch 1 taken 390972 times.
|
25880664 | for (int x = 0; x < width; x++) |
267 | 25098720 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
268 | 781944 | src += src_stride; | |
269 | 781944 | dst += dst_stride; | |
270 | } | ||
271 | 37764 | } | |
272 | |||
273 | 34350 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
276 | const int width) | ||
277 | { | ||
278 | 34350 | const pixel *src = (const pixel*)_src; | |
279 | 34350 | pixel *dst = (pixel *)_dst; | |
280 | 34350 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
281 | 34350 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
282 | 34350 | const int8_t *filter = vf; | |
283 | 34350 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
284 | 34350 | const int shift = denom + 14 - BIT_DEPTH; | |
285 | #if BIT_DEPTH < 14 | ||
286 | 34350 | const int offset = 1 << (shift - 1); | |
287 | #else | ||
288 | const int offset = 0; | ||
289 | #endif | ||
290 | |||
291 |
2/2✓ Branch 0 taken 327044 times.
✓ Branch 1 taken 17175 times.
|
688438 | for (int y = 0; y < height; y++) { |
292 |
2/2✓ Branch 0 taken 9553008 times.
✓ Branch 1 taken 327044 times.
|
19760104 | for (int x = 0; x < width; x++) |
293 | 19106016 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
294 | 654088 | src += src_stride; | |
295 | 654088 | dst += dst_stride; | |
296 | } | ||
297 | 34350 | } | |
298 | |||
299 | 107858 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
302 | { | ||
303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
304 | 107858 | int16_t *tmp = tmp_array; | |
305 | 107858 | const pixel *src = (const pixel*)_src; | |
306 | 107858 | pixel *dst = (pixel *)_dst; | |
307 | 107858 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
308 | 107858 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
309 | 107858 | const int8_t *filter = hf; | |
310 | 107858 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
311 | 107858 | const int shift = denom + 14 - BIT_DEPTH; | |
312 | #if BIT_DEPTH < 14 | ||
313 | 107858 | const int offset = 1 << (shift - 1); | |
314 | #else | ||
315 | const int offset = 0; | ||
316 | #endif | ||
317 | |||
318 | 107858 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
319 |
2/2✓ Branch 0 taken 1418399 times.
✓ Branch 1 taken 53929 times.
|
2944656 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 |
2/2✓ Branch 0 taken 37973996 times.
✓ Branch 1 taken 1418399 times.
|
78784790 | for (int x = 0; x < width; x++) |
321 | 75947992 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
322 | 2836798 | src += src_stride; | |
323 | 2836798 | tmp += MAX_PB_SIZE; | |
324 | } | ||
325 | |||
326 | 107858 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
327 | 107858 | filter = vf; | |
328 |
2/2✓ Branch 0 taken 1040896 times.
✓ Branch 1 taken 53929 times.
|
2189650 | for (int y = 0; y < height; y++) { |
329 |
2/2✓ Branch 0 taken 31034896 times.
✓ Branch 1 taken 1040896 times.
|
64151584 | for (int x = 0; x < width; x++) |
330 | 62069792 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
331 | 2081792 | tmp += MAX_PB_SIZE; | |
332 | 2081792 | dst += dst_stride; | |
333 | } | ||
334 | 107858 | } | |
335 | |||
336 | #define CHROMA_FILTER(src, stride) \ | ||
337 | (filter[0] * src[x - stride] + \ | ||
338 | filter[1] * src[x] + \ | ||
339 | filter[2] * src[x + stride] + \ | ||
340 | filter[3] * src[x + 2 * stride]) | ||
341 | |||
342 | 3278232 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
344 | { | ||
345 | 3278232 | const pixel *src = (const pixel *)_src; | |
346 | 3278232 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
347 | 3278232 | const int8_t *filter = hf; | |
348 | |||
349 |
2/2✓ Branch 0 taken 17236928 times.
✓ Branch 1 taken 1639116 times.
|
37752088 | for (int y = 0; y < height; y++) { |
350 |
2/2✓ Branch 0 taken 279919224 times.
✓ Branch 1 taken 17236928 times.
|
594312304 | for (int x = 0; x < width; x++) |
351 | 559838448 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
352 | 34473856 | src += src_stride; | |
353 | 34473856 | dst += MAX_PB_SIZE; | |
354 | } | ||
355 | 3278232 | } | |
356 | |||
357 | 3191148 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
359 | { | ||
360 | 3191148 | const pixel *src = (const pixel *)_src; | |
361 | 3191148 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
362 | 3191148 | const int8_t *filter = vf; | |
363 | |||
364 |
2/2✓ Branch 0 taken 15924420 times.
✓ Branch 1 taken 1595574 times.
|
35039988 | for (int y = 0; y < height; y++) { |
365 |
2/2✓ Branch 0 taken 257079896 times.
✓ Branch 1 taken 15924420 times.
|
546008632 | for (int x = 0; x < width; x++) |
366 | 514159792 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
367 | 31848840 | src += src_stride; | |
368 | 31848840 | dst += MAX_PB_SIZE; | |
369 | } | ||
370 | 3191148 | } | |
371 | |||
372 | 15766516 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
374 | { | ||
375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
376 | 15766516 | int16_t *tmp = tmp_array; | |
377 | 15766516 | const pixel *src = (const pixel *)_src; | |
378 | 15766516 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
379 | 15766516 | const int8_t *filter = hf; | |
380 | |||
381 | 15766516 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
382 | |||
383 |
2/2✓ Branch 0 taken 97009426 times.
✓ Branch 1 taken 7883258 times.
|
209785368 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 |
2/2✓ Branch 0 taken 1228229856 times.
✓ Branch 1 taken 97009426 times.
|
2650478564 | for (int x = 0; x < width; x++) |
385 | 2456459712 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
386 | 194018852 | src += src_stride; | |
387 | 194018852 | tmp += MAX_PB_SIZE; | |
388 | } | ||
389 | |||
390 | 15766516 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
391 | 15766516 | filter = vf; | |
392 | |||
393 |
2/2✓ Branch 0 taken 73359652 times.
✓ Branch 1 taken 7883258 times.
|
162485820 | for (int y = 0; y < height; y++) { |
394 |
2/2✓ Branch 0 taken 1017755544 times.
✓ Branch 1 taken 73359652 times.
|
2182230392 | for (int x = 0; x < width; x++) |
395 | 2035511088 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
396 | 146719304 | tmp += MAX_PB_SIZE; | |
397 | 146719304 | dst += MAX_PB_SIZE; | |
398 | } | ||
399 | 15766516 | } | |
400 | |||
401 | 2974122 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
404 | { | ||
405 | 2974122 | const pixel *src = (const pixel *)_src; | |
406 | 2974122 | pixel *dst = (pixel *)_dst; | |
407 | 2974122 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
408 | 2974122 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
409 | 2974122 | const int8_t *filter = hf; | |
410 | 2974122 | const int shift = 14 - BIT_DEPTH; | |
411 | #if BIT_DEPTH < 14 | ||
412 | 2974122 | const int offset = 1 << (shift - 1); | |
413 | #else | ||
414 | const int offset = 0; | ||
415 | #endif | ||
416 | |||
417 |
2/2✓ Branch 0 taken 10327506 times.
✓ Branch 1 taken 1487061 times.
|
23629134 | for (int y = 0; y < height; y++) { |
418 |
2/2✓ Branch 0 taken 111382516 times.
✓ Branch 1 taken 10327506 times.
|
243420044 | for (int x = 0; x < width; x++) |
419 | 222765032 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
420 | 20655012 | src += src_stride; | |
421 | 20655012 | dst += dst_stride; | |
422 | } | ||
423 | 2974122 | } | |
424 | |||
425 | 2500162 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
428 | { | ||
429 | 2500162 | const pixel *src = (const pixel *)_src; | |
430 | 2500162 | pixel *dst = (pixel *)_dst; | |
431 | 2500162 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
432 | 2500162 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
433 | 2500162 | const int8_t *filter = vf; | |
434 | 2500162 | const int shift = 14 - BIT_DEPTH; | |
435 | #if BIT_DEPTH < 14 | ||
436 | 2500162 | const int offset = 1 << (shift - 1); | |
437 | #else | ||
438 | const int offset = 0; | ||
439 | #endif | ||
440 | |||
441 |
2/2✓ Branch 0 taken 8909882 times.
✓ Branch 1 taken 1250081 times.
|
20319926 | for (int y = 0; y < height; y++) { |
442 |
2/2✓ Branch 0 taken 103158404 times.
✓ Branch 1 taken 8909882 times.
|
224136572 | for (int x = 0; x < width; x++) |
443 | 206316808 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
444 | 17819764 | src += src_stride; | |
445 | 17819764 | dst += dst_stride; | |
446 | } | ||
447 | 2500162 | } | |
448 | |||
449 | 14453034 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
452 | { | ||
453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
454 | 14453034 | int16_t *tmp = tmp_array; | |
455 | 14453034 | const pixel *src = (const pixel *)_src; | |
456 | 14453034 | pixel *dst = (pixel *)_dst; | |
457 | 14453034 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
458 | 14453034 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
459 | 14453034 | const int8_t *filter = hf; | |
460 | 14453034 | const int shift = 14 - BIT_DEPTH; | |
461 | #if BIT_DEPTH < 14 | ||
462 | 14453034 | const int offset = 1 << (shift - 1); | |
463 | #else | ||
464 | const int offset = 0; | ||
465 | #endif | ||
466 | |||
467 | 14453034 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
468 | |||
469 |
2/2✓ Branch 0 taken 70251325 times.
✓ Branch 1 taken 7226517 times.
|
154955684 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 |
2/2✓ Branch 0 taken 634129614 times.
✓ Branch 1 taken 70251325 times.
|
1408761878 | for (int x = 0; x < width; x++) |
471 | 1268259228 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
472 | 140502650 | src += src_stride; | |
473 | 140502650 | tmp += MAX_PB_SIZE; | |
474 | } | ||
475 | |||
476 | 14453034 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
477 | 14453034 | filter = vf; | |
478 | |||
479 |
2/2✓ Branch 0 taken 48571774 times.
✓ Branch 1 taken 7226517 times.
|
111596582 | for (int y = 0; y < height; y++) { |
480 |
2/2✓ Branch 0 taken 492487956 times.
✓ Branch 1 taken 48571774 times.
|
1082119460 | for (int x = 0; x < width; x++) |
481 | 984975912 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
482 | 97143548 | tmp += MAX_PB_SIZE; | |
483 | 97143548 | dst += dst_stride; | |
484 | } | ||
485 | 14453034 | } | |
486 | |||
487 | 56740 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
489 | const int8_t *hf, const int8_t *vf, int width) | ||
490 | { | ||
491 | 56740 | const pixel *src = (const pixel *)_src; | |
492 | 56740 | pixel *dst = (pixel *)_dst; | |
493 | 56740 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
494 | 56740 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
495 | 56740 | const int8_t *filter = hf; | |
496 | 56740 | const int shift = denom + 14 - BIT_DEPTH; | |
497 | #if BIT_DEPTH < 14 | ||
498 | 56740 | const int offset = 1 << (shift - 1); | |
499 | #else | ||
500 | const int offset = 0; | ||
501 | #endif | ||
502 | |||
503 | 56740 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
504 |
2/2✓ Branch 0 taken 333452 times.
✓ Branch 1 taken 28370 times.
|
723644 | for (int y = 0; y < height; y++) { |
505 |
2/2✓ Branch 0 taken 6618968 times.
✓ Branch 1 taken 333452 times.
|
13904840 | for (int x = 0; x < width; x++) { |
506 | 13237936 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
507 | } | ||
508 | 666904 | dst += dst_stride; | |
509 | 666904 | src += src_stride; | |
510 | } | ||
511 | 56740 | } | |
512 | |||
513 | 44236 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
516 | const int width) | ||
517 | { | ||
518 | 44236 | const pixel *src = (const pixel *)_src; | |
519 | 44236 | pixel *dst = (pixel *)_dst; | |
520 | 44236 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
521 | 44236 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
522 | 44236 | const int8_t *filter = vf; | |
523 | 44236 | const int shift = denom + 14 - BIT_DEPTH; | |
524 | 44236 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
525 | #if BIT_DEPTH < 14 | ||
526 | 44236 | int offset = 1 << (shift - 1); | |
527 | #else | ||
528 | int offset = 0; | ||
529 | #endif | ||
530 | |||
531 |
2/2✓ Branch 0 taken 244340 times.
✓ Branch 1 taken 22118 times.
|
532916 | for (int y = 0; y < height; y++) { |
532 |
2/2✓ Branch 0 taken 4803880 times.
✓ Branch 1 taken 244340 times.
|
10096440 | for (int x = 0; x < width; x++) { |
533 | 9607760 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
534 | } | ||
535 | 488680 | dst += dst_stride; | |
536 | 488680 | src += src_stride; | |
537 | } | ||
538 | 44236 | } | |
539 | |||
540 | 269156 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
542 | const int8_t *hf, const int8_t *vf, int width) | ||
543 | { | ||
544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
545 | 269156 | int16_t *tmp = tmp_array; | |
546 | 269156 | const pixel *src = (const pixel *)_src; | |
547 | 269156 | pixel *dst = (pixel *)_dst; | |
548 | 269156 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
549 | 269156 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
550 | 269156 | const int8_t *filter = hf; | |
551 | 269156 | const int shift = denom + 14 - BIT_DEPTH; | |
552 | #if BIT_DEPTH < 14 | ||
553 | 269156 | const int offset = 1 << (shift - 1); | |
554 | #else | ||
555 | const int offset = 0; | ||
556 | #endif | ||
557 | |||
558 | 269156 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
559 | |||
560 |
2/2✓ Branch 0 taken 1673334 times.
✓ Branch 1 taken 134578 times.
|
3615824 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 |
2/2✓ Branch 0 taken 22787316 times.
✓ Branch 1 taken 1673334 times.
|
48921300 | for (int x = 0; x < width; x++) |
562 | 45574632 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
563 | 3346668 | src += src_stride; | |
564 | 3346668 | tmp += MAX_PB_SIZE; | |
565 | } | ||
566 | |||
567 | 269156 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
568 | 269156 | filter = vf; | |
569 | |||
570 | 269156 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
571 |
2/2✓ Branch 0 taken 1269600 times.
✓ Branch 1 taken 134578 times.
|
2808356 | for (int y = 0; y < height; y++) { |
572 |
2/2✓ Branch 0 taken 19169592 times.
✓ Branch 1 taken 1269600 times.
|
40878384 | for (int x = 0; x < width; x++) |
573 | 38339184 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
574 | 2539200 | tmp += MAX_PB_SIZE; | |
575 | 2539200 | dst += dst_stride; | |
576 | } | ||
577 | 269156 | } | |
578 |