Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * inter prediction template for HEVC/VVC | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * Copyright (C) 2024 Wu Jianhua | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | #define CHROMA_EXTRA_BEFORE 1 | ||
25 | #define CHROMA_EXTRA 3 | ||
26 | #define LUMA_EXTRA_BEFORE 3 | ||
27 | #define LUMA_EXTRA 7 | ||
28 | |||
29 | 8217242 | static void FUNC(put_pixels)(int16_t *dst, | |
30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
32 | { | ||
33 | 8217242 | const pixel *src = (const pixel *)_src; | |
34 | 8217242 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
35 | |||
36 |
2/2✓ Branch 0 taken 47387950 times.
✓ Branch 1 taken 4108621 times.
|
102993142 | for (int y = 0; y < height; y++) { |
37 |
2/2✓ Branch 0 taken 772193012 times.
✓ Branch 1 taken 47387950 times.
|
1639161924 | for (int x = 0; x < width; x++) |
38 | 1544386024 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
39 | 94775900 | src += src_stride; | |
40 | 94775900 | dst += MAX_PB_SIZE; | |
41 | } | ||
42 | 8217242 | } | |
43 | |||
44 | 2520752 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
46 | const int8_t *hf, const int8_t *vf, const int width) | ||
47 | { | ||
48 | 2520752 | const pixel *src = (const pixel *)_src; | |
49 | 2520752 | pixel *dst = (pixel *)_dst; | |
50 | 2520752 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
51 | 2520752 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
52 | |||
53 |
2/2✓ Branch 0 taken 8512756 times.
✓ Branch 1 taken 1260376 times.
|
19546264 | for (int y = 0; y < height; y++) { |
54 | 17025512 | memcpy(dst, src, width * sizeof(pixel)); | |
55 | 17025512 | src += src_stride; | |
56 | 17025512 | dst += dst_stride; | |
57 | } | ||
58 | 2520752 | } | |
59 | |||
60 | 3502 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
63 | const int width) | ||
64 | { | ||
65 | 3502 | const pixel *src = (const pixel *)_src; | |
66 | 3502 | pixel *dst = (pixel *)_dst; | |
67 | 3502 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
68 | 3502 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
69 | 3502 | const int shift = denom + 14 - BIT_DEPTH; | |
70 | #if BIT_DEPTH < 14 | ||
71 | 3502 | const int offset = 1 << (shift - 1); | |
72 | #else | ||
73 | const int offset = 0; | ||
74 | #endif | ||
75 | 3502 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
76 | |||
77 |
2/2✓ Branch 0 taken 20008 times.
✓ Branch 1 taken 1751 times.
|
43518 | for (int y = 0; y < height; y++) { |
78 |
2/2✓ Branch 0 taken 200416 times.
✓ Branch 1 taken 20008 times.
|
440848 | for (int x = 0; x < width; x++) { |
79 | 400832 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
80 | 400832 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
81 | } | ||
82 | 40016 | src += src_stride; | |
83 | 40016 | dst += dst_stride; | |
84 | } | ||
85 | 3502 | } | |
86 | |||
87 | #define LUMA_FILTER(src, stride) \ | ||
88 | (filter[0] * src[x - 3 * stride] + \ | ||
89 | filter[1] * src[x - 2 * stride] + \ | ||
90 | filter[2] * src[x - stride] + \ | ||
91 | filter[3] * src[x ] + \ | ||
92 | filter[4] * src[x + stride] + \ | ||
93 | filter[5] * src[x + 2 * stride] + \ | ||
94 | filter[6] * src[x + 3 * stride] + \ | ||
95 | filter[7] * src[x + 4 * stride]) | ||
96 | |||
97 | 1101516 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
99 | { | ||
100 | 1101516 | const pixel *src = (const pixel*)_src; | |
101 | 1101516 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
102 | 1101516 | const int8_t *filter = hf; | |
103 | |||
104 |
2/2✓ Branch 0 taken 4926328 times.
✓ Branch 1 taken 550758 times.
|
10954172 | for (int y = 0; y < height; y++) { |
105 |
2/2✓ Branch 0 taken 76296160 times.
✓ Branch 1 taken 4926328 times.
|
162444976 | for (int x = 0; x < width; x++) |
106 | 152592320 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
107 | 9852656 | src += src_stride; | |
108 | 9852656 | dst += MAX_PB_SIZE; | |
109 | } | ||
110 | 1101516 | } | |
111 | |||
112 | 1374096 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
114 | { | ||
115 | 1374096 | const pixel *src = (pixel*)_src; | |
116 | 1374096 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
117 | 1374096 | const int8_t *filter = vf; | |
118 | |||
119 |
2/2✓ Branch 0 taken 6323084 times.
✓ Branch 1 taken 687048 times.
|
14020264 | for (int y = 0; y < height; y++) { |
120 |
2/2✓ Branch 0 taken 96067680 times.
✓ Branch 1 taken 6323084 times.
|
204781528 | for (int x = 0; x < width; x++) |
121 | 192135360 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
122 | 12646168 | src += src_stride; | |
123 | 12646168 | dst += MAX_PB_SIZE; | |
124 | } | ||
125 | 1374096 | } | |
126 | |||
127 | 8235604 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
129 | { | ||
130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
131 | 8235604 | int16_t *tmp = tmp_array; | |
132 | 8235604 | const pixel *src = (const pixel*)_src; | |
133 | 8235604 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
134 | 8235604 | const int8_t *filter = hf; | |
135 | |||
136 | 8235604 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
137 |
2/2✓ Branch 0 taken 56987154 times.
✓ Branch 1 taken 4117802 times.
|
122209912 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 |
2/2✓ Branch 0 taken 509475724 times.
✓ Branch 1 taken 56987154 times.
|
1132925756 | for (int x = 0; x < width; x++) |
139 | 1018951448 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
140 | 113974308 | src += src_stride; | |
141 | 113974308 | tmp += MAX_PB_SIZE; | |
142 | } | ||
143 | |||
144 | 8235604 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
145 | 8235604 | filter = vf; | |
146 |
2/2✓ Branch 0 taken 28162540 times.
✓ Branch 1 taken 4117802 times.
|
64560684 | for (int y = 0; y < height; y++) { |
147 |
2/2✓ Branch 0 taken 313294480 times.
✓ Branch 1 taken 28162540 times.
|
682914040 | for (int x = 0; x < width; x++) |
148 | 626588960 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
149 | 56325080 | tmp += MAX_PB_SIZE; | |
150 | 56325080 | dst += MAX_PB_SIZE; | |
151 | } | ||
152 | 8235604 | } | |
153 | |||
154 | 97472 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
157 | { | ||
158 | 97472 | const pixel *src = (const pixel*)_src; | |
159 | 97472 | pixel *dst = (pixel *)_dst; | |
160 | 97472 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
161 | 97472 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
162 | 97472 | const int8_t *filter = hf; | |
163 | 97472 | const int shift = 14 - BIT_DEPTH; | |
164 | #if BIT_DEPTH < 14 | ||
165 | 97472 | const int offset = 1 << (shift - 1); | |
166 | #else | ||
167 | const int offset = 0; | ||
168 | #endif | ||
169 | |||
170 |
2/2✓ Branch 0 taken 564472 times.
✓ Branch 1 taken 48736 times.
|
1226416 | for (int y = 0; y < height; y++) { |
171 |
2/2✓ Branch 0 taken 12205136 times.
✓ Branch 1 taken 564472 times.
|
25539216 | for (int x = 0; x < width; x++) { |
172 | 24410272 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
173 | 24410272 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
174 | } | ||
175 | 1128944 | src += src_stride; | |
176 | 1128944 | dst += dst_stride; | |
177 | } | ||
178 | 97472 | } | |
179 | |||
180 | 118312 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
183 | { | ||
184 | |||
185 | 118312 | const pixel *src = (const pixel*)_src; | |
186 | 118312 | pixel *dst = (pixel *)_dst; | |
187 | 118312 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
188 | 118312 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
189 | 118312 | const int8_t *filter = vf; | |
190 | 118312 | const int shift = 14 - BIT_DEPTH; | |
191 | #if BIT_DEPTH < 14 | ||
192 | 118312 | const int offset = 1 << (shift - 1); | |
193 | #else | ||
194 | const int offset = 0; | ||
195 | #endif | ||
196 | |||
197 |
2/2✓ Branch 0 taken 654048 times.
✓ Branch 1 taken 59156 times.
|
1426408 | for (int y = 0; y < height; y++) { |
198 |
2/2✓ Branch 0 taken 13747440 times.
✓ Branch 1 taken 654048 times.
|
28802976 | for (int x = 0; x < width; x++) { |
199 | 27494880 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
200 | 27494880 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
201 | } | ||
202 | 1308096 | src += src_stride; | |
203 | 1308096 | dst += dst_stride; | |
204 | } | ||
205 | 118312 | } | |
206 | |||
207 | 367422 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
210 | { | ||
211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
212 | 367422 | int16_t *tmp = tmp_array; | |
213 | 367422 | const pixel *src = (const pixel*)_src; | |
214 | 367422 | pixel *dst = (pixel *)_dst; | |
215 | 367422 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
216 | 367422 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
217 | 367422 | const int8_t *filter = hf; | |
218 | 367422 | const int shift = 14 - BIT_DEPTH; | |
219 | #if BIT_DEPTH < 14 | ||
220 | 367422 | const int offset = 1 << (shift - 1); | |
221 | #else | ||
222 | const int offset = 0; | ||
223 | #endif | ||
224 | |||
225 | 367422 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
226 |
2/2✓ Branch 0 taken 3303829 times.
✓ Branch 1 taken 183711 times.
|
6975080 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 |
2/2✓ Branch 0 taken 49009332 times.
✓ Branch 1 taken 3303829 times.
|
104626322 | for (int x = 0; x < width; x++) |
228 | 98018664 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
229 | 6607658 | src += src_stride; | |
230 | 6607658 | tmp += MAX_PB_SIZE; | |
231 | } | ||
232 | |||
233 | 367422 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
234 | 367422 | filter = vf; | |
235 | |||
236 |
2/2✓ Branch 0 taken 2017852 times.
✓ Branch 1 taken 183711 times.
|
4403126 | for (int y = 0; y < height; y++) { |
237 |
2/2✓ Branch 0 taken 35505520 times.
✓ Branch 1 taken 2017852 times.
|
75046744 | for (int x = 0; x < width; x++) { |
238 | 71011040 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
239 | 71011040 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
240 | } | ||
241 | 4035704 | tmp += MAX_PB_SIZE; | |
242 | 4035704 | dst += dst_stride; | |
243 | } | ||
244 | |||
245 | 367422 | } | |
246 | |||
247 | 1534 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
250 | const int width) | ||
251 | { | ||
252 | 1534 | const pixel *src = (const pixel*)_src; | |
253 | 1534 | pixel *dst = (pixel *)_dst; | |
254 | 1534 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
255 | 1534 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
256 | 1534 | const int8_t *filter = hf; | |
257 | 1534 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
258 | 1534 | const int shift = denom + 14 - BIT_DEPTH; | |
259 | #if BIT_DEPTH < 14 | ||
260 | 1534 | const int offset = 1 << (shift - 1); | |
261 | #else | ||
262 | const int offset = 0; | ||
263 | #endif | ||
264 | |||
265 |
2/2✓ Branch 0 taken 9356 times.
✓ Branch 1 taken 767 times.
|
20246 | for (int y = 0; y < height; y++) { |
266 |
2/2✓ Branch 0 taken 101344 times.
✓ Branch 1 taken 9356 times.
|
221400 | for (int x = 0; x < width; x++) |
267 | 202688 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
268 | 18712 | src += src_stride; | |
269 | 18712 | dst += dst_stride; | |
270 | } | ||
271 | 1534 | } | |
272 | |||
273 | 1400 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
276 | const int width) | ||
277 | { | ||
278 | 1400 | const pixel *src = (const pixel*)_src; | |
279 | 1400 | pixel *dst = (pixel *)_dst; | |
280 | 1400 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
281 | 1400 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
282 | 1400 | const int8_t *filter = vf; | |
283 | 1400 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
284 | 1400 | const int shift = denom + 14 - BIT_DEPTH; | |
285 | #if BIT_DEPTH < 14 | ||
286 | 1400 | const int offset = 1 << (shift - 1); | |
287 | #else | ||
288 | const int offset = 0; | ||
289 | #endif | ||
290 | |||
291 |
2/2✓ Branch 0 taken 8160 times.
✓ Branch 1 taken 700 times.
|
17720 | for (int y = 0; y < height; y++) { |
292 |
2/2✓ Branch 0 taken 85760 times.
✓ Branch 1 taken 8160 times.
|
187840 | for (int x = 0; x < width; x++) |
293 | 171520 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
294 | 16320 | src += src_stride; | |
295 | 16320 | dst += dst_stride; | |
296 | } | ||
297 | 1400 | } | |
298 | |||
299 | 4980 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
302 | { | ||
303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
304 | 4980 | int16_t *tmp = tmp_array; | |
305 | 4980 | const pixel *src = (const pixel*)_src; | |
306 | 4980 | pixel *dst = (pixel *)_dst; | |
307 | 4980 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
308 | 4980 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
309 | 4980 | const int8_t *filter = hf; | |
310 | 4980 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
311 | 4980 | const int shift = denom + 14 - BIT_DEPTH; | |
312 | #if BIT_DEPTH < 14 | ||
313 | 4980 | const int offset = 1 << (shift - 1); | |
314 | #else | ||
315 | const int offset = 0; | ||
316 | #endif | ||
317 | |||
318 | 4980 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
319 |
2/2✓ Branch 0 taken 45866 times.
✓ Branch 1 taken 2490 times.
|
96712 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 |
2/2✓ Branch 0 taken 486152 times.
✓ Branch 1 taken 45866 times.
|
1064036 | for (int x = 0; x < width; x++) |
321 | 972304 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
322 | 91732 | src += src_stride; | |
323 | 91732 | tmp += MAX_PB_SIZE; | |
324 | } | ||
325 | |||
326 | 4980 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
327 | 4980 | filter = vf; | |
328 |
2/2✓ Branch 0 taken 28436 times.
✓ Branch 1 taken 2490 times.
|
61852 | for (int y = 0; y < height; y++) { |
329 |
2/2✓ Branch 0 taken 316864 times.
✓ Branch 1 taken 28436 times.
|
690600 | for (int x = 0; x < width; x++) |
330 | 633728 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
331 | 56872 | tmp += MAX_PB_SIZE; | |
332 | 56872 | dst += dst_stride; | |
333 | } | ||
334 | 4980 | } | |
335 | |||
336 | #define CHROMA_FILTER(src, stride) \ | ||
337 | (filter[0] * src[x - stride] + \ | ||
338 | filter[1] * src[x] + \ | ||
339 | filter[2] * src[x + stride] + \ | ||
340 | filter[3] * src[x + 2 * stride]) | ||
341 | |||
342 | 1132034 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
344 | { | ||
345 | 1132034 | const pixel *src = (const pixel *)_src; | |
346 | 1132034 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
347 | 1132034 | const int8_t *filter = hf; | |
348 | |||
349 |
2/2✓ Branch 0 taken 4787682 times.
✓ Branch 1 taken 566017 times.
|
10707398 | for (int y = 0; y < height; y++) { |
350 |
2/2✓ Branch 0 taken 78857956 times.
✓ Branch 1 taken 4787682 times.
|
167291276 | for (int x = 0; x < width; x++) |
351 | 157715912 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
352 | 9575364 | src += src_stride; | |
353 | 9575364 | dst += MAX_PB_SIZE; | |
354 | } | ||
355 | 1132034 | } | |
356 | |||
357 | 1700266 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
359 | { | ||
360 | 1700266 | const pixel *src = (const pixel *)_src; | |
361 | 1700266 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
362 | 1700266 | const int8_t *filter = vf; | |
363 | |||
364 |
2/2✓ Branch 0 taken 6830698 times.
✓ Branch 1 taken 850133 times.
|
15361662 | for (int y = 0; y < height; y++) { |
365 |
2/2✓ Branch 0 taken 103018660 times.
✓ Branch 1 taken 6830698 times.
|
219698716 | for (int x = 0; x < width; x++) |
366 | 206037320 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
367 | 13661396 | src += src_stride; | |
368 | 13661396 | dst += MAX_PB_SIZE; | |
369 | } | ||
370 | 1700266 | } | |
371 | |||
372 | 7262254 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
374 | { | ||
375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
376 | 7262254 | int16_t *tmp = tmp_array; | |
377 | 7262254 | const pixel *src = (const pixel *)_src; | |
378 | 7262254 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
379 | 7262254 | const int8_t *filter = hf; | |
380 | |||
381 | 7262254 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
382 | |||
383 |
2/2✓ Branch 0 taken 36364515 times.
✓ Branch 1 taken 3631127 times.
|
79991284 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 |
2/2✓ Branch 0 taken 342504810 times.
✓ Branch 1 taken 36364515 times.
|
757738650 | for (int x = 0; x < width; x++) |
385 | 685009620 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
386 | 72729030 | src += src_stride; | |
387 | 72729030 | tmp += MAX_PB_SIZE; | |
388 | } | ||
389 | |||
390 | 7262254 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
391 | 7262254 | filter = vf; | |
392 | |||
393 |
2/2✓ Branch 0 taken 25471134 times.
✓ Branch 1 taken 3631127 times.
|
58204522 | for (int y = 0; y < height; y++) { |
394 |
2/2✓ Branch 0 taken 266484996 times.
✓ Branch 1 taken 25471134 times.
|
583912260 | for (int x = 0; x < width; x++) |
395 | 532969992 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
396 | 50942268 | tmp += MAX_PB_SIZE; | |
397 | 50942268 | dst += MAX_PB_SIZE; | |
398 | } | ||
399 | 7262254 | } | |
400 | |||
401 | 410532 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
404 | { | ||
405 | 410532 | const pixel *src = (const pixel *)_src; | |
406 | 410532 | pixel *dst = (pixel *)_dst; | |
407 | 410532 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
408 | 410532 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
409 | 410532 | const int8_t *filter = hf; | |
410 | 410532 | const int shift = 14 - BIT_DEPTH; | |
411 | #if BIT_DEPTH < 14 | ||
412 | 410532 | const int offset = 1 << (shift - 1); | |
413 | #else | ||
414 | const int offset = 0; | ||
415 | #endif | ||
416 | |||
417 |
2/2✓ Branch 0 taken 1104136 times.
✓ Branch 1 taken 205266 times.
|
2618804 | for (int y = 0; y < height; y++) { |
418 |
2/2✓ Branch 0 taken 13596000 times.
✓ Branch 1 taken 1104136 times.
|
29400272 | for (int x = 0; x < width; x++) |
419 | 27192000 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
420 | 2208272 | src += src_stride; | |
421 | 2208272 | dst += dst_stride; | |
422 | } | ||
423 | 410532 | } | |
424 | |||
425 | 408188 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
428 | { | ||
429 | 408188 | const pixel *src = (const pixel *)_src; | |
430 | 408188 | pixel *dst = (pixel *)_dst; | |
431 | 408188 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
432 | 408188 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
433 | 408188 | const int8_t *filter = vf; | |
434 | 408188 | const int shift = 14 - BIT_DEPTH; | |
435 | #if BIT_DEPTH < 14 | ||
436 | 408188 | const int offset = 1 << (shift - 1); | |
437 | #else | ||
438 | const int offset = 0; | ||
439 | #endif | ||
440 | |||
441 |
2/2✓ Branch 0 taken 1109312 times.
✓ Branch 1 taken 204094 times.
|
2626812 | for (int y = 0; y < height; y++) { |
442 |
2/2✓ Branch 0 taken 13977200 times.
✓ Branch 1 taken 1109312 times.
|
30173024 | for (int x = 0; x < width; x++) |
443 | 27954400 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
444 | 2218624 | src += src_stride; | |
445 | 2218624 | dst += dst_stride; | |
446 | } | ||
447 | 408188 | } | |
448 | |||
449 | 3036436 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
452 | { | ||
453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
454 | 3036436 | int16_t *tmp = tmp_array; | |
455 | 3036436 | const pixel *src = (const pixel *)_src; | |
456 | 3036436 | pixel *dst = (pixel *)_dst; | |
457 | 3036436 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
458 | 3036436 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
459 | 3036436 | const int8_t *filter = hf; | |
460 | 3036436 | const int shift = 14 - BIT_DEPTH; | |
461 | #if BIT_DEPTH < 14 | ||
462 | 3036436 | const int offset = 1 << (shift - 1); | |
463 | #else | ||
464 | const int offset = 0; | ||
465 | #endif | ||
466 | |||
467 | 3036436 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
468 | |||
469 |
2/2✓ Branch 0 taken 11545574 times.
✓ Branch 1 taken 1518218 times.
|
26127584 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 |
2/2✓ Branch 0 taken 71034812 times.
✓ Branch 1 taken 11545574 times.
|
165160772 | for (int x = 0; x < width; x++) |
471 | 142069624 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
472 | 23091148 | src += src_stride; | |
473 | 23091148 | tmp += MAX_PB_SIZE; | |
474 | } | ||
475 | |||
476 | 3036436 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
477 | 3036436 | filter = vf; | |
478 | |||
479 |
2/2✓ Branch 0 taken 6990920 times.
✓ Branch 1 taken 1518218 times.
|
17018276 | for (int y = 0; y < height; y++) { |
480 |
2/2✓ Branch 0 taken 50327168 times.
✓ Branch 1 taken 6990920 times.
|
114636176 | for (int x = 0; x < width; x++) |
481 | 100654336 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
482 | 13981840 | tmp += MAX_PB_SIZE; | |
483 | 13981840 | dst += dst_stride; | |
484 | } | ||
485 | 3036436 | } | |
486 | |||
487 | 3160 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
489 | const int8_t *hf, const int8_t *vf, int width) | ||
490 | { | ||
491 | 3160 | const pixel *src = (const pixel *)_src; | |
492 | 3160 | pixel *dst = (pixel *)_dst; | |
493 | 3160 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
494 | 3160 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
495 | 3160 | const int8_t *filter = hf; | |
496 | 3160 | const int shift = denom + 14 - BIT_DEPTH; | |
497 | #if BIT_DEPTH < 14 | ||
498 | 3160 | const int offset = 1 << (shift - 1); | |
499 | #else | ||
500 | const int offset = 0; | ||
501 | #endif | ||
502 | |||
503 | 3160 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
504 |
2/2✓ Branch 0 taken 9736 times.
✓ Branch 1 taken 1580 times.
|
22632 | for (int y = 0; y < height; y++) { |
505 |
2/2✓ Branch 0 taken 55088 times.
✓ Branch 1 taken 9736 times.
|
129648 | for (int x = 0; x < width; x++) { |
506 | 110176 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
507 | } | ||
508 | 19472 | dst += dst_stride; | |
509 | 19472 | src += src_stride; | |
510 | } | ||
511 | 3160 | } | |
512 | |||
513 | 2236 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
516 | const int width) | ||
517 | { | ||
518 | 2236 | const pixel *src = (const pixel *)_src; | |
519 | 2236 | pixel *dst = (pixel *)_dst; | |
520 | 2236 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
521 | 2236 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
522 | 2236 | const int8_t *filter = vf; | |
523 | 2236 | const int shift = denom + 14 - BIT_DEPTH; | |
524 | 2236 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
525 | #if BIT_DEPTH < 14 | ||
526 | 2236 | int offset = 1 << (shift - 1); | |
527 | #else | ||
528 | int offset = 0; | ||
529 | #endif | ||
530 | |||
531 |
2/2✓ Branch 0 taken 7528 times.
✓ Branch 1 taken 1118 times.
|
17292 | for (int y = 0; y < height; y++) { |
532 |
2/2✓ Branch 0 taken 39472 times.
✓ Branch 1 taken 7528 times.
|
94000 | for (int x = 0; x < width; x++) { |
533 | 78944 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
534 | } | ||
535 | 15056 | dst += dst_stride; | |
536 | 15056 | src += src_stride; | |
537 | } | ||
538 | 2236 | } | |
539 | |||
540 | 17956 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
542 | const int8_t *hf, const int8_t *vf, int width) | ||
543 | { | ||
544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
545 | 17956 | int16_t *tmp = tmp_array; | |
546 | 17956 | const pixel *src = (const pixel *)_src; | |
547 | 17956 | pixel *dst = (pixel *)_dst; | |
548 | 17956 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
549 | 17956 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
550 | 17956 | const int8_t *filter = hf; | |
551 | 17956 | const int shift = denom + 14 - BIT_DEPTH; | |
552 | #if BIT_DEPTH < 14 | ||
553 | 17956 | const int offset = 1 << (shift - 1); | |
554 | #else | ||
555 | const int offset = 0; | ||
556 | #endif | ||
557 | |||
558 | 17956 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
559 | |||
560 |
2/2✓ Branch 0 taken 76134 times.
✓ Branch 1 taken 8978 times.
|
170224 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 |
2/2✓ Branch 0 taken 386628 times.
✓ Branch 1 taken 76134 times.
|
925524 | for (int x = 0; x < width; x++) |
562 | 773256 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
563 | 152268 | src += src_stride; | |
564 | 152268 | tmp += MAX_PB_SIZE; | |
565 | } | ||
566 | |||
567 | 17956 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
568 | 17956 | filter = vf; | |
569 | |||
570 | 17956 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
571 |
2/2✓ Branch 0 taken 49200 times.
✓ Branch 1 taken 8978 times.
|
116356 | for (int y = 0; y < height; y++) { |
572 |
2/2✓ Branch 0 taken 258816 times.
✓ Branch 1 taken 49200 times.
|
616032 | for (int x = 0; x < width; x++) |
573 | 517632 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
574 | 98400 | tmp += MAX_PB_SIZE; | |
575 | 98400 | dst += dst_stride; | |
576 | } | ||
577 | 17956 | } | |
578 |