Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * inter prediction template for HEVC/VVC | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * Copyright (C) 2024 Wu Jianhua | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | #define CHROMA_EXTRA_BEFORE 1 | ||
25 | #define CHROMA_EXTRA 3 | ||
26 | #define LUMA_EXTRA_BEFORE 3 | ||
27 | #define LUMA_EXTRA 7 | ||
28 | |||
29 | 8251664 | static void FUNC(put_pixels)(int16_t *dst, | |
30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
32 | { | ||
33 | 8251664 | const pixel *src = (const pixel *)_src; | |
34 | 8251664 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
35 | |||
36 |
2/2✓ Branch 0 taken 47684590 times.
✓ Branch 1 taken 4125832 times.
|
103620844 | for (int y = 0; y < height; y++) { |
37 |
2/2✓ Branch 0 taken 781459412 times.
✓ Branch 1 taken 47684590 times.
|
1658288004 | for (int x = 0; x < width; x++) |
38 | 1562918824 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
39 | 95369180 | src += src_stride; | |
40 | 95369180 | dst += MAX_PB_SIZE; | |
41 | } | ||
42 | 8251664 | } | |
43 | |||
44 | 2527502 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
46 | const int8_t *hf, const int8_t *vf, const int width) | ||
47 | { | ||
48 | 2527502 | const pixel *src = (const pixel *)_src; | |
49 | 2527502 | pixel *dst = (pixel *)_dst; | |
50 | 2527502 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
51 | 2527502 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
52 | |||
53 |
2/2✓ Branch 0 taken 8548784 times.
✓ Branch 1 taken 1263751 times.
|
19625070 | for (int y = 0; y < height; y++) { |
54 | 17097568 | memcpy(dst, src, width * sizeof(pixel)); | |
55 | 17097568 | src += src_stride; | |
56 | 17097568 | dst += dst_stride; | |
57 | } | ||
58 | 2527502 | } | |
59 | |||
60 | 3502 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
63 | const int width) | ||
64 | { | ||
65 | 3502 | const pixel *src = (const pixel *)_src; | |
66 | 3502 | pixel *dst = (pixel *)_dst; | |
67 | 3502 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
68 | 3502 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
69 | 3502 | const int shift = denom + 14 - BIT_DEPTH; | |
70 | #if BIT_DEPTH < 14 | ||
71 | 3502 | const int offset = 1 << (shift - 1); | |
72 | #else | ||
73 | const int offset = 0; | ||
74 | #endif | ||
75 | 3502 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
76 | |||
77 |
2/2✓ Branch 0 taken 20008 times.
✓ Branch 1 taken 1751 times.
|
43518 | for (int y = 0; y < height; y++) { |
78 |
2/2✓ Branch 0 taken 200416 times.
✓ Branch 1 taken 20008 times.
|
440848 | for (int x = 0; x < width; x++) { |
79 | 400832 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
80 | 400832 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
81 | } | ||
82 | 40016 | src += src_stride; | |
83 | 40016 | dst += dst_stride; | |
84 | } | ||
85 | 3502 | } | |
86 | |||
87 | #define LUMA_FILTER(src, stride) \ | ||
88 | (filter[0] * src[x - 3 * stride] + \ | ||
89 | filter[1] * src[x - 2 * stride] + \ | ||
90 | filter[2] * src[x - stride] + \ | ||
91 | filter[3] * src[x ] + \ | ||
92 | filter[4] * src[x + stride] + \ | ||
93 | filter[5] * src[x + 2 * stride] + \ | ||
94 | filter[6] * src[x + 3 * stride] + \ | ||
95 | filter[7] * src[x + 4 * stride]) | ||
96 | |||
97 | 1103116 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
99 | { | ||
100 | 1103116 | const pixel *src = (const pixel*)_src; | |
101 | 1103116 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
102 | 1103116 | const int8_t *filter = hf; | |
103 | |||
104 |
2/2✓ Branch 0 taken 4935056 times.
✓ Branch 1 taken 551558 times.
|
10973228 | for (int y = 0; y < height; y++) { |
105 |
2/2✓ Branch 0 taken 76425408 times.
✓ Branch 1 taken 4935056 times.
|
162720928 | for (int x = 0; x < width; x++) |
106 | 152850816 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
107 | 9870112 | src += src_stride; | |
108 | 9870112 | dst += MAX_PB_SIZE; | |
109 | } | ||
110 | 1103116 | } | |
111 | |||
112 | 1375752 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
114 | { | ||
115 | 1375752 | const pixel *src = (pixel*)_src; | |
116 | 1375752 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
117 | 1375752 | const int8_t *filter = vf; | |
118 | |||
119 |
2/2✓ Branch 0 taken 6332944 times.
✓ Branch 1 taken 687876 times.
|
14041640 | for (int y = 0; y < height; y++) { |
120 |
2/2✓ Branch 0 taken 96212592 times.
✓ Branch 1 taken 6332944 times.
|
205091072 | for (int x = 0; x < width; x++) |
121 | 192425184 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
122 | 12665888 | src += src_stride; | |
123 | 12665888 | dst += MAX_PB_SIZE; | |
124 | } | ||
125 | 1375752 | } | |
126 | |||
127 | 8242634 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
129 | { | ||
130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
131 | 8242634 | int16_t *tmp = tmp_array; | |
132 | 8242634 | const pixel *src = (const pixel*)_src; | |
133 | 8242634 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
134 | 8242634 | const int8_t *filter = hf; | |
135 | |||
136 | 8242634 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
137 |
2/2✓ Branch 0 taken 57043963 times.
✓ Branch 1 taken 4121317 times.
|
122330560 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 |
2/2✓ Branch 0 taken 510120368 times.
✓ Branch 1 taken 57043963 times.
|
1134328662 | for (int x = 0; x < width; x++) |
139 | 1020240736 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
140 | 114087926 | src += src_stride; | |
141 | 114087926 | tmp += MAX_PB_SIZE; | |
142 | } | ||
143 | |||
144 | 8242634 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
145 | 8242634 | filter = vf; | |
146 |
2/2✓ Branch 0 taken 28194744 times.
✓ Branch 1 taken 4121317 times.
|
64632122 | for (int y = 0; y < height; y++) { |
147 |
2/2✓ Branch 0 taken 313713696 times.
✓ Branch 1 taken 28194744 times.
|
683816880 | for (int x = 0; x < width; x++) |
148 | 627427392 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
149 | 56389488 | tmp += MAX_PB_SIZE; | |
150 | 56389488 | dst += MAX_PB_SIZE; | |
151 | } | ||
152 | 8242634 | } | |
153 | |||
154 | 97956 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
157 | { | ||
158 | 97956 | const pixel *src = (const pixel*)_src; | |
159 | 97956 | pixel *dst = (pixel *)_dst; | |
160 | 97956 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
161 | 97956 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
162 | 97956 | const int8_t *filter = hf; | |
163 | 97956 | const int shift = 14 - BIT_DEPTH; | |
164 | #if BIT_DEPTH < 14 | ||
165 | 97956 | const int offset = 1 << (shift - 1); | |
166 | #else | ||
167 | const int offset = 0; | ||
168 | #endif | ||
169 | |||
170 |
2/2✓ Branch 0 taken 566512 times.
✓ Branch 1 taken 48978 times.
|
1230980 | for (int y = 0; y < height; y++) { |
171 |
2/2✓ Branch 0 taken 12226768 times.
✓ Branch 1 taken 566512 times.
|
25586560 | for (int x = 0; x < width; x++) { |
172 | 24453536 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
173 | 24453536 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
174 | } | ||
175 | 1133024 | src += src_stride; | |
176 | 1133024 | dst += dst_stride; | |
177 | } | ||
178 | 97956 | } | |
179 | |||
180 | 118844 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
183 | { | ||
184 | |||
185 | 118844 | const pixel *src = (const pixel*)_src; | |
186 | 118844 | pixel *dst = (pixel *)_dst; | |
187 | 118844 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
188 | 118844 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
189 | 118844 | const int8_t *filter = vf; | |
190 | 118844 | const int shift = 14 - BIT_DEPTH; | |
191 | #if BIT_DEPTH < 14 | ||
192 | 118844 | const int offset = 1 << (shift - 1); | |
193 | #else | ||
194 | const int offset = 0; | ||
195 | #endif | ||
196 | |||
197 |
2/2✓ Branch 0 taken 656348 times.
✓ Branch 1 taken 59422 times.
|
1431540 | for (int y = 0; y < height; y++) { |
198 |
2/2✓ Branch 0 taken 13769984 times.
✓ Branch 1 taken 656348 times.
|
28852664 | for (int x = 0; x < width; x++) { |
199 | 27539968 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
200 | 27539968 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
201 | } | ||
202 | 1312696 | src += src_stride; | |
203 | 1312696 | dst += dst_stride; | |
204 | } | ||
205 | 118844 | } | |
206 | |||
207 | 369286 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
210 | { | ||
211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
212 | 369286 | int16_t *tmp = tmp_array; | |
213 | 369286 | const pixel *src = (const pixel*)_src; | |
214 | 369286 | pixel *dst = (pixel *)_dst; | |
215 | 369286 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
216 | 369286 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
217 | 369286 | const int8_t *filter = hf; | |
218 | 369286 | const int shift = 14 - BIT_DEPTH; | |
219 | #if BIT_DEPTH < 14 | ||
220 | 369286 | const int offset = 1 << (shift - 1); | |
221 | #else | ||
222 | const int offset = 0; | ||
223 | #endif | ||
224 | |||
225 | 369286 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
226 |
2/2✓ Branch 0 taken 3316897 times.
✓ Branch 1 taken 184643 times.
|
7003080 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 |
2/2✓ Branch 0 taken 49113924 times.
✓ Branch 1 taken 3316897 times.
|
104861642 | for (int x = 0; x < width; x++) |
228 | 98227848 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
229 | 6633794 | src += src_stride; | |
230 | 6633794 | tmp += MAX_PB_SIZE; | |
231 | } | ||
232 | |||
233 | 369286 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
234 | 369286 | filter = vf; | |
235 | |||
236 |
2/2✓ Branch 0 taken 2024396 times.
✓ Branch 1 taken 184643 times.
|
4418078 | for (int y = 0; y < height; y++) { |
237 |
2/2✓ Branch 0 taken 35564304 times.
✓ Branch 1 taken 2024396 times.
|
75177400 | for (int x = 0; x < width; x++) { |
238 | 71128608 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
239 | 71128608 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
240 | } | ||
241 | 4048792 | tmp += MAX_PB_SIZE; | |
242 | 4048792 | dst += dst_stride; | |
243 | } | ||
244 | |||
245 | 369286 | } | |
246 | |||
247 | 1534 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
250 | const int width) | ||
251 | { | ||
252 | 1534 | const pixel *src = (const pixel*)_src; | |
253 | 1534 | pixel *dst = (pixel *)_dst; | |
254 | 1534 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
255 | 1534 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
256 | 1534 | const int8_t *filter = hf; | |
257 | 1534 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
258 | 1534 | const int shift = denom + 14 - BIT_DEPTH; | |
259 | #if BIT_DEPTH < 14 | ||
260 | 1534 | const int offset = 1 << (shift - 1); | |
261 | #else | ||
262 | const int offset = 0; | ||
263 | #endif | ||
264 | |||
265 |
2/2✓ Branch 0 taken 9356 times.
✓ Branch 1 taken 767 times.
|
20246 | for (int y = 0; y < height; y++) { |
266 |
2/2✓ Branch 0 taken 101344 times.
✓ Branch 1 taken 9356 times.
|
221400 | for (int x = 0; x < width; x++) |
267 | 202688 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
268 | 18712 | src += src_stride; | |
269 | 18712 | dst += dst_stride; | |
270 | } | ||
271 | 1534 | } | |
272 | |||
273 | 1400 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
276 | const int width) | ||
277 | { | ||
278 | 1400 | const pixel *src = (const pixel*)_src; | |
279 | 1400 | pixel *dst = (pixel *)_dst; | |
280 | 1400 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
281 | 1400 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
282 | 1400 | const int8_t *filter = vf; | |
283 | 1400 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
284 | 1400 | const int shift = denom + 14 - BIT_DEPTH; | |
285 | #if BIT_DEPTH < 14 | ||
286 | 1400 | const int offset = 1 << (shift - 1); | |
287 | #else | ||
288 | const int offset = 0; | ||
289 | #endif | ||
290 | |||
291 |
2/2✓ Branch 0 taken 8160 times.
✓ Branch 1 taken 700 times.
|
17720 | for (int y = 0; y < height; y++) { |
292 |
2/2✓ Branch 0 taken 85760 times.
✓ Branch 1 taken 8160 times.
|
187840 | for (int x = 0; x < width; x++) |
293 | 171520 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
294 | 16320 | src += src_stride; | |
295 | 16320 | dst += dst_stride; | |
296 | } | ||
297 | 1400 | } | |
298 | |||
299 | 4980 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
302 | { | ||
303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
304 | 4980 | int16_t *tmp = tmp_array; | |
305 | 4980 | const pixel *src = (const pixel*)_src; | |
306 | 4980 | pixel *dst = (pixel *)_dst; | |
307 | 4980 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
308 | 4980 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
309 | 4980 | const int8_t *filter = hf; | |
310 | 4980 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
311 | 4980 | const int shift = denom + 14 - BIT_DEPTH; | |
312 | #if BIT_DEPTH < 14 | ||
313 | 4980 | const int offset = 1 << (shift - 1); | |
314 | #else | ||
315 | const int offset = 0; | ||
316 | #endif | ||
317 | |||
318 | 4980 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
319 |
2/2✓ Branch 0 taken 45866 times.
✓ Branch 1 taken 2490 times.
|
96712 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 |
2/2✓ Branch 0 taken 486152 times.
✓ Branch 1 taken 45866 times.
|
1064036 | for (int x = 0; x < width; x++) |
321 | 972304 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
322 | 91732 | src += src_stride; | |
323 | 91732 | tmp += MAX_PB_SIZE; | |
324 | } | ||
325 | |||
326 | 4980 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
327 | 4980 | filter = vf; | |
328 |
2/2✓ Branch 0 taken 28436 times.
✓ Branch 1 taken 2490 times.
|
61852 | for (int y = 0; y < height; y++) { |
329 |
2/2✓ Branch 0 taken 316864 times.
✓ Branch 1 taken 28436 times.
|
690600 | for (int x = 0; x < width; x++) |
330 | 633728 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
331 | 56872 | tmp += MAX_PB_SIZE; | |
332 | 56872 | dst += dst_stride; | |
333 | } | ||
334 | 4980 | } | |
335 | |||
336 | #define CHROMA_FILTER(src, stride) \ | ||
337 | (filter[0] * src[x - stride] + \ | ||
338 | filter[1] * src[x] + \ | ||
339 | filter[2] * src[x + stride] + \ | ||
340 | filter[3] * src[x + 2 * stride]) | ||
341 | |||
342 | 1134294 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
344 | { | ||
345 | 1134294 | const pixel *src = (const pixel *)_src; | |
346 | 1134294 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
347 | 1134294 | const int8_t *filter = hf; | |
348 | |||
349 |
2/2✓ Branch 0 taken 4794954 times.
✓ Branch 1 taken 567147 times.
|
10724202 | for (int y = 0; y < height; y++) { |
350 |
2/2✓ Branch 0 taken 78913668 times.
✓ Branch 1 taken 4794954 times.
|
167417244 | for (int x = 0; x < width; x++) |
351 | 157827336 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
352 | 9589908 | src += src_stride; | |
353 | 9589908 | dst += MAX_PB_SIZE; | |
354 | } | ||
355 | 1134294 | } | |
356 | |||
357 | 1702846 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
359 | { | ||
360 | 1702846 | const pixel *src = (const pixel *)_src; | |
361 | 1702846 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
362 | 1702846 | const int8_t *filter = vf; | |
363 | |||
364 |
2/2✓ Branch 0 taken 6838762 times.
✓ Branch 1 taken 851423 times.
|
15380370 | for (int y = 0; y < height; y++) { |
365 |
2/2✓ Branch 0 taken 103080292 times.
✓ Branch 1 taken 6838762 times.
|
219838108 | for (int x = 0; x < width; x++) |
366 | 206160584 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
367 | 13677524 | src += src_stride; | |
368 | 13677524 | dst += MAX_PB_SIZE; | |
369 | } | ||
370 | 1702846 | } | |
371 | |||
372 | 7275310 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
374 | { | ||
375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
376 | 7275310 | int16_t *tmp = tmp_array; | |
377 | 7275310 | const pixel *src = (const pixel *)_src; | |
378 | 7275310 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
379 | 7275310 | const int8_t *filter = hf; | |
380 | |||
381 | 7275310 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
382 | |||
383 |
2/2✓ Branch 0 taken 36423747 times.
✓ Branch 1 taken 3637655 times.
|
80122804 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 |
2/2✓ Branch 0 taken 342909066 times.
✓ Branch 1 taken 36423747 times.
|
758665626 | for (int x = 0; x < width; x++) |
385 | 685818132 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
386 | 72847494 | src += src_stride; | |
387 | 72847494 | tmp += MAX_PB_SIZE; | |
388 | } | ||
389 | |||
390 | 7275310 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
391 | 7275310 | filter = vf; | |
392 | |||
393 |
2/2✓ Branch 0 taken 25510782 times.
✓ Branch 1 taken 3637655 times.
|
58296874 | for (int y = 0; y < height; y++) { |
394 |
2/2✓ Branch 0 taken 266770308 times.
✓ Branch 1 taken 25510782 times.
|
584562180 | for (int x = 0; x < width; x++) |
395 | 533540616 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
396 | 51021564 | tmp += MAX_PB_SIZE; | |
397 | 51021564 | dst += MAX_PB_SIZE; | |
398 | } | ||
399 | 7275310 | } | |
400 | |||
401 | 411272 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
404 | { | ||
405 | 411272 | const pixel *src = (const pixel *)_src; | |
406 | 411272 | pixel *dst = (pixel *)_dst; | |
407 | 411272 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
408 | 411272 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
409 | 411272 | const int8_t *filter = hf; | |
410 | 411272 | const int shift = 14 - BIT_DEPTH; | |
411 | #if BIT_DEPTH < 14 | ||
412 | 411272 | const int offset = 1 << (shift - 1); | |
413 | #else | ||
414 | const int offset = 0; | ||
415 | #endif | ||
416 | |||
417 |
2/2✓ Branch 0 taken 1106040 times.
✓ Branch 1 taken 205636 times.
|
2623352 | for (int y = 0; y < height; y++) { |
418 |
2/2✓ Branch 0 taken 13607200 times.
✓ Branch 1 taken 1106040 times.
|
29426480 | for (int x = 0; x < width; x++) |
419 | 27214400 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
420 | 2212080 | src += src_stride; | |
421 | 2212080 | dst += dst_stride; | |
422 | } | ||
423 | 411272 | } | |
424 | |||
425 | 409020 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
428 | { | ||
429 | 409020 | const pixel *src = (const pixel *)_src; | |
430 | 409020 | pixel *dst = (pixel *)_dst; | |
431 | 409020 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
432 | 409020 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
433 | 409020 | const int8_t *filter = vf; | |
434 | 409020 | const int shift = 14 - BIT_DEPTH; | |
435 | #if BIT_DEPTH < 14 | ||
436 | 409020 | const int offset = 1 << (shift - 1); | |
437 | #else | ||
438 | const int offset = 0; | ||
439 | #endif | ||
440 | |||
441 |
2/2✓ Branch 0 taken 1111240 times.
✓ Branch 1 taken 204510 times.
|
2631500 | for (int y = 0; y < height; y++) { |
442 |
2/2✓ Branch 0 taken 13987024 times.
✓ Branch 1 taken 1111240 times.
|
30196528 | for (int x = 0; x < width; x++) |
443 | 27974048 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
444 | 2222480 | src += src_stride; | |
445 | 2222480 | dst += dst_stride; | |
446 | } | ||
447 | 409020 | } | |
448 | |||
449 | 3039880 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
452 | { | ||
453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
454 | 3039880 | int16_t *tmp = tmp_array; | |
455 | 3039880 | const pixel *src = (const pixel *)_src; | |
456 | 3039880 | pixel *dst = (pixel *)_dst; | |
457 | 3039880 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
458 | 3039880 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
459 | 3039880 | const int8_t *filter = hf; | |
460 | 3039880 | const int shift = 14 - BIT_DEPTH; | |
461 | #if BIT_DEPTH < 14 | ||
462 | 3039880 | const int offset = 1 << (shift - 1); | |
463 | #else | ||
464 | const int offset = 0; | ||
465 | #endif | ||
466 | |||
467 | 3039880 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
468 | |||
469 |
2/2✓ Branch 0 taken 11558756 times.
✓ Branch 1 taken 1519940 times.
|
26157392 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 |
2/2✓ Branch 0 taken 71100140 times.
✓ Branch 1 taken 11558756 times.
|
165317792 | for (int x = 0; x < width; x++) |
471 | 142200280 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
472 | 23117512 | src += src_stride; | |
473 | 23117512 | tmp += MAX_PB_SIZE; | |
474 | } | ||
475 | |||
476 | 3039880 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
477 | 3039880 | filter = vf; | |
478 | |||
479 |
2/2✓ Branch 0 taken 6998936 times.
✓ Branch 1 taken 1519940 times.
|
17037752 | for (int y = 0; y < height; y++) { |
480 |
2/2✓ Branch 0 taken 50368448 times.
✓ Branch 1 taken 6998936 times.
|
114734768 | for (int x = 0; x < width; x++) |
481 | 100736896 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
482 | 13997872 | tmp += MAX_PB_SIZE; | |
483 | 13997872 | dst += dst_stride; | |
484 | } | ||
485 | 3039880 | } | |
486 | |||
487 | 3160 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
489 | const int8_t *hf, const int8_t *vf, int width) | ||
490 | { | ||
491 | 3160 | const pixel *src = (const pixel *)_src; | |
492 | 3160 | pixel *dst = (pixel *)_dst; | |
493 | 3160 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
494 | 3160 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
495 | 3160 | const int8_t *filter = hf; | |
496 | 3160 | const int shift = denom + 14 - BIT_DEPTH; | |
497 | #if BIT_DEPTH < 14 | ||
498 | 3160 | const int offset = 1 << (shift - 1); | |
499 | #else | ||
500 | const int offset = 0; | ||
501 | #endif | ||
502 | |||
503 | 3160 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
504 |
2/2✓ Branch 0 taken 9736 times.
✓ Branch 1 taken 1580 times.
|
22632 | for (int y = 0; y < height; y++) { |
505 |
2/2✓ Branch 0 taken 55088 times.
✓ Branch 1 taken 9736 times.
|
129648 | for (int x = 0; x < width; x++) { |
506 | 110176 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
507 | } | ||
508 | 19472 | dst += dst_stride; | |
509 | 19472 | src += src_stride; | |
510 | } | ||
511 | 3160 | } | |
512 | |||
513 | 2236 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
516 | const int width) | ||
517 | { | ||
518 | 2236 | const pixel *src = (const pixel *)_src; | |
519 | 2236 | pixel *dst = (pixel *)_dst; | |
520 | 2236 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
521 | 2236 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
522 | 2236 | const int8_t *filter = vf; | |
523 | 2236 | const int shift = denom + 14 - BIT_DEPTH; | |
524 | 2236 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
525 | #if BIT_DEPTH < 14 | ||
526 | 2236 | int offset = 1 << (shift - 1); | |
527 | #else | ||
528 | int offset = 0; | ||
529 | #endif | ||
530 | |||
531 |
2/2✓ Branch 0 taken 7528 times.
✓ Branch 1 taken 1118 times.
|
17292 | for (int y = 0; y < height; y++) { |
532 |
2/2✓ Branch 0 taken 39472 times.
✓ Branch 1 taken 7528 times.
|
94000 | for (int x = 0; x < width; x++) { |
533 | 78944 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
534 | } | ||
535 | 15056 | dst += dst_stride; | |
536 | 15056 | src += src_stride; | |
537 | } | ||
538 | 2236 | } | |
539 | |||
540 | 17956 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
542 | const int8_t *hf, const int8_t *vf, int width) | ||
543 | { | ||
544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
545 | 17956 | int16_t *tmp = tmp_array; | |
546 | 17956 | const pixel *src = (const pixel *)_src; | |
547 | 17956 | pixel *dst = (pixel *)_dst; | |
548 | 17956 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
549 | 17956 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
550 | 17956 | const int8_t *filter = hf; | |
551 | 17956 | const int shift = denom + 14 - BIT_DEPTH; | |
552 | #if BIT_DEPTH < 14 | ||
553 | 17956 | const int offset = 1 << (shift - 1); | |
554 | #else | ||
555 | const int offset = 0; | ||
556 | #endif | ||
557 | |||
558 | 17956 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
559 | |||
560 |
2/2✓ Branch 0 taken 76134 times.
✓ Branch 1 taken 8978 times.
|
170224 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 |
2/2✓ Branch 0 taken 386628 times.
✓ Branch 1 taken 76134 times.
|
925524 | for (int x = 0; x < width; x++) |
562 | 773256 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
563 | 152268 | src += src_stride; | |
564 | 152268 | tmp += MAX_PB_SIZE; | |
565 | } | ||
566 | |||
567 | 17956 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
568 | 17956 | filter = vf; | |
569 | |||
570 | 17956 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
571 |
2/2✓ Branch 0 taken 49200 times.
✓ Branch 1 taken 8978 times.
|
116356 | for (int y = 0; y < height; y++) { |
572 |
2/2✓ Branch 0 taken 258816 times.
✓ Branch 1 taken 49200 times.
|
616032 | for (int x = 0; x < width; x++) |
573 | 517632 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
574 | 98400 | tmp += MAX_PB_SIZE; | |
575 | 98400 | dst += dst_stride; | |
576 | } | ||
577 | 17956 | } | |
578 |