Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * inter prediction template for HEVC/VVC | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * Copyright (C) 2024 Wu Jianhua | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | #define CHROMA_EXTRA_BEFORE 1 | ||
25 | #define CHROMA_EXTRA 3 | ||
26 | #define LUMA_EXTRA_BEFORE 3 | ||
27 | #define LUMA_EXTRA 7 | ||
28 | |||
29 | 12915452 | static void FUNC(put_pixels)(int16_t *dst, | |
30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
32 | { | ||
33 | 12915452 | const pixel *src = (const pixel *)_src; | |
34 | 12915452 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
35 | |||
36 |
2/2✓ Branch 0 taken 70099778 times.
✓ Branch 1 taken 6457726 times.
|
153115008 | for (int y = 0; y < height; y++) { |
37 |
2/2✓ Branch 0 taken 1046359332 times.
✓ Branch 1 taken 70099778 times.
|
2232918220 | for (int x = 0; x < width; x++) |
38 | 2092718664 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
39 | 140199556 | src += src_stride; | |
40 | 140199556 | dst += MAX_PB_SIZE; | |
41 | } | ||
42 | 12915452 | } | |
43 | |||
44 | 3529456 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
46 | const int8_t *hf, const int8_t *vf, const int width) | ||
47 | { | ||
48 | 3529456 | const pixel *src = (const pixel *)_src; | |
49 | 3529456 | pixel *dst = (pixel *)_dst; | |
50 | 3529456 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
51 | 3529456 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
52 | |||
53 |
2/2✓ Branch 0 taken 13263964 times.
✓ Branch 1 taken 1764728 times.
|
30057384 | for (int y = 0; y < height; y++) { |
54 | 26527928 | memcpy(dst, src, width * sizeof(pixel)); | |
55 | 26527928 | src += src_stride; | |
56 | 26527928 | dst += dst_stride; | |
57 | } | ||
58 | 3529456 | } | |
59 | |||
60 | 256604 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
63 | const int width) | ||
64 | { | ||
65 | 256604 | const pixel *src = (const pixel *)_src; | |
66 | 256604 | pixel *dst = (pixel *)_dst; | |
67 | 256604 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
68 | 256604 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
69 | 256604 | const int shift = denom + 14 - BIT_DEPTH; | |
70 | #if BIT_DEPTH < 14 | ||
71 | 256604 | const int offset = 1 << (shift - 1); | |
72 | #else | ||
73 | const int offset = 0; | ||
74 | #endif | ||
75 | 256604 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
76 | |||
77 |
2/2✓ Branch 0 taken 637352 times.
✓ Branch 1 taken 128302 times.
|
1531308 | for (int y = 0; y < height; y++) { |
78 |
2/2✓ Branch 0 taken 5833840 times.
✓ Branch 1 taken 637352 times.
|
12942384 | for (int x = 0; x < width; x++) { |
79 | 11667680 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
80 | 11667680 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
81 | } | ||
82 | 1274704 | src += src_stride; | |
83 | 1274704 | dst += dst_stride; | |
84 | } | ||
85 | 256604 | } | |
86 | |||
87 | #define LUMA_FILTER(src, stride) \ | ||
88 | (filter[0] * src[x - 3 * stride] + \ | ||
89 | filter[1] * src[x - 2 * stride] + \ | ||
90 | filter[2] * src[x - stride] + \ | ||
91 | filter[3] * src[x ] + \ | ||
92 | filter[4] * src[x + stride] + \ | ||
93 | filter[5] * src[x + 2 * stride] + \ | ||
94 | filter[6] * src[x + 3 * stride] + \ | ||
95 | filter[7] * src[x + 4 * stride]) | ||
96 | |||
97 | 1174560 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
99 | { | ||
100 | 1174560 | const pixel *src = (const pixel*)_src; | |
101 | 1174560 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
102 | 1174560 | const int8_t *filter = hf; | |
103 | |||
104 |
2/2✓ Branch 0 taken 5187396 times.
✓ Branch 1 taken 587280 times.
|
11549352 | for (int y = 0; y < height; y++) { |
105 |
2/2✓ Branch 0 taken 79756272 times.
✓ Branch 1 taken 5187396 times.
|
169887336 | for (int x = 0; x < width; x++) |
106 | 159512544 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
107 | 10374792 | src += src_stride; | |
108 | 10374792 | dst += MAX_PB_SIZE; | |
109 | } | ||
110 | 1174560 | } | |
111 | |||
112 | 1418012 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
114 | { | ||
115 | 1418012 | const pixel *src = (pixel*)_src; | |
116 | 1418012 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
117 | 1418012 | const int8_t *filter = vf; | |
118 | |||
119 |
2/2✓ Branch 0 taken 6472472 times.
✓ Branch 1 taken 709006 times.
|
14362956 | for (int y = 0; y < height; y++) { |
120 |
2/2✓ Branch 0 taken 97837888 times.
✓ Branch 1 taken 6472472 times.
|
208620720 | for (int x = 0; x < width; x++) |
121 | 195675776 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
122 | 12944944 | src += src_stride; | |
123 | 12944944 | dst += MAX_PB_SIZE; | |
124 | } | ||
125 | 1418012 | } | |
126 | |||
127 | 8386354 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
129 | { | ||
130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
131 | 8386354 | int16_t *tmp = tmp_array; | |
132 | 8386354 | const pixel *src = (const pixel*)_src; | |
133 | 8386354 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
134 | 8386354 | const int8_t *filter = hf; | |
135 | |||
136 | 8386354 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
137 |
2/2✓ Branch 0 taken 57901563 times.
✓ Branch 1 taken 4193177 times.
|
124189480 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 |
2/2✓ Branch 0 taken 515873564 times.
✓ Branch 1 taken 57901563 times.
|
1147550254 | for (int x = 0; x < width; x++) |
139 | 1031747128 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
140 | 115803126 | src += src_stride; | |
141 | 115803126 | tmp += MAX_PB_SIZE; | |
142 | } | ||
143 | |||
144 | 8386354 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
145 | 8386354 | filter = vf; | |
146 |
2/2✓ Branch 0 taken 28549324 times.
✓ Branch 1 taken 4193177 times.
|
65485002 | for (int y = 0; y < height; y++) { |
147 |
2/2✓ Branch 0 taken 316771248 times.
✓ Branch 1 taken 28549324 times.
|
690641144 | for (int x = 0; x < width; x++) |
148 | 633542496 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
149 | 57098648 | tmp += MAX_PB_SIZE; | |
150 | 57098648 | dst += MAX_PB_SIZE; | |
151 | } | ||
152 | 8386354 | } | |
153 | |||
154 | 101504 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
157 | { | ||
158 | 101504 | const pixel *src = (const pixel*)_src; | |
159 | 101504 | pixel *dst = (pixel *)_dst; | |
160 | 101504 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
161 | 101504 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
162 | 101504 | const int8_t *filter = hf; | |
163 | 101504 | const int shift = 14 - BIT_DEPTH; | |
164 | #if BIT_DEPTH < 14 | ||
165 | 101504 | const int offset = 1 << (shift - 1); | |
166 | #else | ||
167 | const int offset = 0; | ||
168 | #endif | ||
169 | |||
170 |
2/2✓ Branch 0 taken 587332 times.
✓ Branch 1 taken 50752 times.
|
1276168 | for (int y = 0; y < height; y++) { |
171 |
2/2✓ Branch 0 taken 12638416 times.
✓ Branch 1 taken 587332 times.
|
26451496 | for (int x = 0; x < width; x++) { |
172 | 25276832 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
173 | 25276832 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
174 | } | ||
175 | 1174664 | src += src_stride; | |
176 | 1174664 | dst += dst_stride; | |
177 | } | ||
178 | 101504 | } | |
179 | |||
180 | 119322 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
183 | { | ||
184 | |||
185 | 119322 | const pixel *src = (const pixel*)_src; | |
186 | 119322 | pixel *dst = (pixel *)_dst; | |
187 | 119322 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
188 | 119322 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
189 | 119322 | const int8_t *filter = vf; | |
190 | 119322 | const int shift = 14 - BIT_DEPTH; | |
191 | #if BIT_DEPTH < 14 | ||
192 | 119322 | const int offset = 1 << (shift - 1); | |
193 | #else | ||
194 | const int offset = 0; | ||
195 | #endif | ||
196 | |||
197 |
2/2✓ Branch 0 taken 660604 times.
✓ Branch 1 taken 59661 times.
|
1440530 | for (int y = 0; y < height; y++) { |
198 |
2/2✓ Branch 0 taken 13897152 times.
✓ Branch 1 taken 660604 times.
|
29115512 | for (int x = 0; x < width; x++) { |
199 | 27794304 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
200 | 27794304 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
201 | } | ||
202 | 1321208 | src += src_stride; | |
203 | 1321208 | dst += dst_stride; | |
204 | } | ||
205 | 119322 | } | |
206 | |||
207 | 368450 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
210 | { | ||
211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
212 | 368450 | int16_t *tmp = tmp_array; | |
213 | 368450 | const pixel *src = (const pixel*)_src; | |
214 | 368450 | pixel *dst = (pixel *)_dst; | |
215 | 368450 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
216 | 368450 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
217 | 368450 | const int8_t *filter = hf; | |
218 | 368450 | const int shift = 14 - BIT_DEPTH; | |
219 | #if BIT_DEPTH < 14 | ||
220 | 368450 | const int offset = 1 << (shift - 1); | |
221 | #else | ||
222 | const int offset = 0; | ||
223 | #endif | ||
224 | |||
225 | 368450 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
226 |
2/2✓ Branch 0 taken 3311855 times.
✓ Branch 1 taken 184225 times.
|
6992160 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 |
2/2✓ Branch 0 taken 49215424 times.
✓ Branch 1 taken 3311855 times.
|
105054558 | for (int x = 0; x < width; x++) |
228 | 98430848 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
229 | 6623710 | src += src_stride; | |
230 | 6623710 | tmp += MAX_PB_SIZE; | |
231 | } | ||
232 | |||
233 | 368450 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
234 | 368450 | filter = vf; | |
235 | |||
236 |
2/2✓ Branch 0 taken 2022280 times.
✓ Branch 1 taken 184225 times.
|
4413010 | for (int y = 0; y < height; y++) { |
237 |
2/2✓ Branch 0 taken 35632400 times.
✓ Branch 1 taken 2022280 times.
|
75309360 | for (int x = 0; x < width; x++) { |
238 | 71264800 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
239 | 71264800 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
240 | } | ||
241 | 4044560 | tmp += MAX_PB_SIZE; | |
242 | 4044560 | dst += dst_stride; | |
243 | } | ||
244 | |||
245 | 368450 | } | |
246 | |||
247 | 22844 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
250 | const int width) | ||
251 | { | ||
252 | 22844 | const pixel *src = (const pixel*)_src; | |
253 | 22844 | pixel *dst = (pixel *)_dst; | |
254 | 22844 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
255 | 22844 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
256 | 22844 | const int8_t *filter = hf; | |
257 | 22844 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
258 | 22844 | const int shift = denom + 14 - BIT_DEPTH; | |
259 | #if BIT_DEPTH < 14 | ||
260 | 22844 | const int offset = 1 << (shift - 1); | |
261 | #else | ||
262 | const int offset = 0; | ||
263 | #endif | ||
264 | |||
265 |
2/2✓ Branch 0 taken 58128 times.
✓ Branch 1 taken 11422 times.
|
139100 | for (int y = 0; y < height; y++) { |
266 |
2/2✓ Branch 0 taken 527680 times.
✓ Branch 1 taken 58128 times.
|
1171616 | for (int x = 0; x < width; x++) |
267 | 1055360 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
268 | 116256 | src += src_stride; | |
269 | 116256 | dst += dst_stride; | |
270 | } | ||
271 | 22844 | } | |
272 | |||
273 | 22448 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
276 | const int width) | ||
277 | { | ||
278 | 22448 | const pixel *src = (const pixel*)_src; | |
279 | 22448 | pixel *dst = (pixel *)_dst; | |
280 | 22448 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
281 | 22448 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
282 | 22448 | const int8_t *filter = vf; | |
283 | 22448 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
284 | 22448 | const int shift = denom + 14 - BIT_DEPTH; | |
285 | #if BIT_DEPTH < 14 | ||
286 | 22448 | const int offset = 1 << (shift - 1); | |
287 | #else | ||
288 | const int offset = 0; | ||
289 | #endif | ||
290 | |||
291 |
2/2✓ Branch 0 taken 54840 times.
✓ Branch 1 taken 11224 times.
|
132128 | for (int y = 0; y < height; y++) { |
292 |
2/2✓ Branch 0 taken 440416 times.
✓ Branch 1 taken 54840 times.
|
990512 | for (int x = 0; x < width; x++) |
293 | 880832 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
294 | 109680 | src += src_stride; | |
295 | 109680 | dst += dst_stride; | |
296 | } | ||
297 | 22448 | } | |
298 | |||
299 | 76152 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
302 | { | ||
303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
304 | 76152 | int16_t *tmp = tmp_array; | |
305 | 76152 | const pixel *src = (const pixel*)_src; | |
306 | 76152 | pixel *dst = (pixel *)_dst; | |
307 | 76152 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
308 | 76152 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
309 | 76152 | const int8_t *filter = hf; | |
310 | 76152 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
311 | 76152 | const int shift = denom + 14 - BIT_DEPTH; | |
312 | #if BIT_DEPTH < 14 | ||
313 | 76152 | const int offset = 1 << (shift - 1); | |
314 | #else | ||
315 | const int offset = 0; | ||
316 | #endif | ||
317 | |||
318 | 76152 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
319 |
2/2✓ Branch 0 taken 456544 times.
✓ Branch 1 taken 38076 times.
|
989240 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 |
2/2✓ Branch 0 taken 3176504 times.
✓ Branch 1 taken 456544 times.
|
7266096 | for (int x = 0; x < width; x++) |
321 | 6353008 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
322 | 913088 | src += src_stride; | |
323 | 913088 | tmp += MAX_PB_SIZE; | |
324 | } | ||
325 | |||
326 | 76152 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
327 | 76152 | filter = vf; | |
328 |
2/2✓ Branch 0 taken 190012 times.
✓ Branch 1 taken 38076 times.
|
456176 | for (int y = 0; y < height; y++) { |
329 |
2/2✓ Branch 0 taken 1834912 times.
✓ Branch 1 taken 190012 times.
|
4049848 | for (int x = 0; x < width; x++) |
330 | 3669824 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
331 | 380024 | tmp += MAX_PB_SIZE; | |
332 | 380024 | dst += dst_stride; | |
333 | } | ||
334 | 76152 | } | |
335 | |||
336 | #define CHROMA_FILTER(src, stride) \ | ||
337 | (filter[0] * src[x - stride] + \ | ||
338 | filter[1] * src[x] + \ | ||
339 | filter[2] * src[x + stride] + \ | ||
340 | filter[3] * src[x + 2 * stride]) | ||
341 | |||
342 | 1234890 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
344 | { | ||
345 | 1234890 | const pixel *src = (const pixel *)_src; | |
346 | 1234890 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
347 | 1234890 | const int8_t *filter = hf; | |
348 | |||
349 |
2/2✓ Branch 0 taken 5194222 times.
✓ Branch 1 taken 617445 times.
|
11623334 | for (int y = 0; y < height; y++) { |
350 |
2/2✓ Branch 0 taken 82645796 times.
✓ Branch 1 taken 5194222 times.
|
175680036 | for (int x = 0; x < width; x++) |
351 | 165291592 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
352 | 10388444 | src += src_stride; | |
353 | 10388444 | dst += MAX_PB_SIZE; | |
354 | } | ||
355 | 1234890 | } | |
356 | |||
357 | 1761006 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
359 | { | ||
360 | 1761006 | const pixel *src = (const pixel *)_src; | |
361 | 1761006 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
362 | 1761006 | const int8_t *filter = vf; | |
363 | |||
364 |
2/2✓ Branch 0 taken 7083514 times.
✓ Branch 1 taken 880503 times.
|
15928034 | for (int y = 0; y < height; y++) { |
365 |
2/2✓ Branch 0 taken 106236580 times.
✓ Branch 1 taken 7083514 times.
|
226640188 | for (int x = 0; x < width; x++) |
366 | 212473160 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
367 | 14167028 | src += src_stride; | |
368 | 14167028 | dst += MAX_PB_SIZE; | |
369 | } | ||
370 | 1761006 | } | |
371 | |||
372 | 7458794 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
374 | { | ||
375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
376 | 7458794 | int16_t *tmp = tmp_array; | |
377 | 7458794 | const pixel *src = (const pixel *)_src; | |
378 | 7458794 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
379 | 7458794 | const int8_t *filter = hf; | |
380 | |||
381 | 7458794 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
382 | |||
383 |
2/2✓ Branch 0 taken 37186013 times.
✓ Branch 1 taken 3729397 times.
|
81830820 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 |
2/2✓ Branch 0 taken 348698362 times.
✓ Branch 1 taken 37186013 times.
|
771768750 | for (int x = 0; x < width; x++) |
385 | 697396724 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
386 | 74372026 | src += src_stride; | |
387 | 74372026 | tmp += MAX_PB_SIZE; | |
388 | } | ||
389 | |||
390 | 7458794 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
391 | 7458794 | filter = vf; | |
392 | |||
393 |
2/2✓ Branch 0 taken 25997822 times.
✓ Branch 1 taken 3729397 times.
|
59454438 | for (int y = 0; y < height; y++) { |
394 |
2/2✓ Branch 0 taken 270930628 times.
✓ Branch 1 taken 25997822 times.
|
593856900 | for (int x = 0; x < width; x++) |
395 | 541861256 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
396 | 51995644 | tmp += MAX_PB_SIZE; | |
397 | 51995644 | dst += MAX_PB_SIZE; | |
398 | } | ||
399 | 7458794 | } | |
400 | |||
401 | 447416 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
404 | { | ||
405 | 447416 | const pixel *src = (const pixel *)_src; | |
406 | 447416 | pixel *dst = (pixel *)_dst; | |
407 | 447416 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
408 | 447416 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
409 | 447416 | const int8_t *filter = hf; | |
410 | 447416 | const int shift = 14 - BIT_DEPTH; | |
411 | #if BIT_DEPTH < 14 | ||
412 | 447416 | const int offset = 1 << (shift - 1); | |
413 | #else | ||
414 | const int offset = 0; | ||
415 | #endif | ||
416 | |||
417 |
2/2✓ Branch 0 taken 1236452 times.
✓ Branch 1 taken 223708 times.
|
2920320 | for (int y = 0; y < height; y++) { |
418 |
2/2✓ Branch 0 taken 14458128 times.
✓ Branch 1 taken 1236452 times.
|
31389160 | for (int x = 0; x < width; x++) |
419 | 28916256 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
420 | 2472904 | src += src_stride; | |
421 | 2472904 | dst += dst_stride; | |
422 | } | ||
423 | 447416 | } | |
424 | |||
425 | 411792 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
428 | { | ||
429 | 411792 | const pixel *src = (const pixel *)_src; | |
430 | 411792 | pixel *dst = (pixel *)_dst; | |
431 | 411792 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
432 | 411792 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
433 | 411792 | const int8_t *filter = vf; | |
434 | 411792 | const int shift = 14 - BIT_DEPTH; | |
435 | #if BIT_DEPTH < 14 | ||
436 | 411792 | const int offset = 1 << (shift - 1); | |
437 | #else | ||
438 | const int offset = 0; | ||
439 | #endif | ||
440 | |||
441 |
2/2✓ Branch 0 taken 1122584 times.
✓ Branch 1 taken 205896 times.
|
2656960 | for (int y = 0; y < height; y++) { |
442 |
2/2✓ Branch 0 taken 14217200 times.
✓ Branch 1 taken 1122584 times.
|
30679568 | for (int x = 0; x < width; x++) |
443 | 28434400 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
444 | 2245168 | src += src_stride; | |
445 | 2245168 | dst += dst_stride; | |
446 | } | ||
447 | 411792 | } | |
448 | |||
449 | 3052496 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
452 | { | ||
453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
454 | 3052496 | int16_t *tmp = tmp_array; | |
455 | 3052496 | const pixel *src = (const pixel *)_src; | |
456 | 3052496 | pixel *dst = (pixel *)_dst; | |
457 | 3052496 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
458 | 3052496 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
459 | 3052496 | const int8_t *filter = hf; | |
460 | 3052496 | const int shift = 14 - BIT_DEPTH; | |
461 | #if BIT_DEPTH < 14 | ||
462 | 3052496 | const int offset = 1 << (shift - 1); | |
463 | #else | ||
464 | const int offset = 0; | ||
465 | #endif | ||
466 | |||
467 | 3052496 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
468 | |||
469 |
2/2✓ Branch 0 taken 11607740 times.
✓ Branch 1 taken 1526248 times.
|
26267976 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 |
2/2✓ Branch 0 taken 71537624 times.
✓ Branch 1 taken 11607740 times.
|
166290728 | for (int x = 0; x < width; x++) |
471 | 143075248 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
472 | 23215480 | src += src_stride; | |
473 | 23215480 | tmp += MAX_PB_SIZE; | |
474 | } | ||
475 | |||
476 | 3052496 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
477 | 3052496 | filter = vf; | |
478 | |||
479 |
2/2✓ Branch 0 taken 7028996 times.
✓ Branch 1 taken 1526248 times.
|
17110488 | for (int y = 0; y < height; y++) { |
480 |
2/2✓ Branch 0 taken 50672816 times.
✓ Branch 1 taken 7028996 times.
|
115403624 | for (int x = 0; x < width; x++) |
481 | 101345632 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
482 | 14057992 | tmp += MAX_PB_SIZE; | |
483 | 14057992 | dst += dst_stride; | |
484 | } | ||
485 | 3052496 | } | |
486 | |||
487 | 19480 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
489 | const int8_t *hf, const int8_t *vf, int width) | ||
490 | { | ||
491 | 19480 | const pixel *src = (const pixel *)_src; | |
492 | 19480 | pixel *dst = (pixel *)_dst; | |
493 | 19480 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
494 | 19480 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
495 | 19480 | const int8_t *filter = hf; | |
496 | 19480 | const int shift = denom + 14 - BIT_DEPTH; | |
497 | #if BIT_DEPTH < 14 | ||
498 | 19480 | const int offset = 1 << (shift - 1); | |
499 | #else | ||
500 | const int offset = 0; | ||
501 | #endif | ||
502 | |||
503 | 19480 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
504 |
2/2✓ Branch 0 taken 46728 times.
✓ Branch 1 taken 9740 times.
|
112936 | for (int y = 0; y < height; y++) { |
505 |
2/2✓ Branch 0 taken 292128 times.
✓ Branch 1 taken 46728 times.
|
677712 | for (int x = 0; x < width; x++) { |
506 | 584256 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
507 | } | ||
508 | 93456 | dst += dst_stride; | |
509 | 93456 | src += src_stride; | |
510 | } | ||
511 | 19480 | } | |
512 | |||
513 | 19836 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
516 | const int width) | ||
517 | { | ||
518 | 19836 | const pixel *src = (const pixel *)_src; | |
519 | 19836 | pixel *dst = (pixel *)_dst; | |
520 | 19836 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
521 | 19836 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
522 | 19836 | const int8_t *filter = vf; | |
523 | 19836 | const int shift = denom + 14 - BIT_DEPTH; | |
524 | 19836 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
525 | #if BIT_DEPTH < 14 | ||
526 | 19836 | int offset = 1 << (shift - 1); | |
527 | #else | ||
528 | int offset = 0; | ||
529 | #endif | ||
530 | |||
531 |
2/2✓ Branch 0 taken 46288 times.
✓ Branch 1 taken 9918 times.
|
112412 | for (int y = 0; y < height; y++) { |
532 |
2/2✓ Branch 0 taken 301520 times.
✓ Branch 1 taken 46288 times.
|
695616 | for (int x = 0; x < width; x++) { |
533 | 603040 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
534 | } | ||
535 | 92576 | dst += dst_stride; | |
536 | 92576 | src += src_stride; | |
537 | } | ||
538 | 19836 | } | |
539 | |||
540 | 110112 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
542 | const int8_t *hf, const int8_t *vf, int width) | ||
543 | { | ||
544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
545 | 110112 | int16_t *tmp = tmp_array; | |
546 | 110112 | const pixel *src = (const pixel *)_src; | |
547 | 110112 | pixel *dst = (pixel *)_dst; | |
548 | 110112 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
549 | 110112 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
550 | 110112 | const int8_t *filter = hf; | |
551 | 110112 | const int shift = denom + 14 - BIT_DEPTH; | |
552 | #if BIT_DEPTH < 14 | ||
553 | 110112 | const int offset = 1 << (shift - 1); | |
554 | #else | ||
555 | const int offset = 0; | ||
556 | #endif | ||
557 | |||
558 | 110112 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
559 | |||
560 |
2/2✓ Branch 0 taken 417916 times.
✓ Branch 1 taken 55056 times.
|
945944 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 |
2/2✓ Branch 0 taken 2399396 times.
✓ Branch 1 taken 417916 times.
|
5634624 | for (int x = 0; x < width; x++) |
562 | 4798792 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
563 | 835832 | src += src_stride; | |
564 | 835832 | tmp += MAX_PB_SIZE; | |
565 | } | ||
566 | |||
567 | 110112 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
568 | 110112 | filter = vf; | |
569 | |||
570 | 110112 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
571 |
2/2✓ Branch 0 taken 252748 times.
✓ Branch 1 taken 55056 times.
|
615608 | for (int y = 0; y < height; y++) { |
572 |
2/2✓ Branch 0 taken 1626656 times.
✓ Branch 1 taken 252748 times.
|
3758808 | for (int x = 0; x < width; x++) |
573 | 3253312 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
574 | 505496 | tmp += MAX_PB_SIZE; | |
575 | 505496 | dst += dst_stride; | |
576 | } | ||
577 | 110112 | } | |
578 |