Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * inter prediction template for HEVC/VVC | ||
3 | * | ||
4 | * Copyright (C) 2022 Nuo Mi | ||
5 | * Copyright (C) 2024 Wu Jianhua | ||
6 | * | ||
7 | * This file is part of FFmpeg. | ||
8 | * | ||
9 | * FFmpeg is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU Lesser General Public | ||
11 | * License as published by the Free Software Foundation; either | ||
12 | * version 2.1 of the License, or (at your option) any later version. | ||
13 | * | ||
14 | * FFmpeg is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
17 | * Lesser General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU Lesser General Public | ||
20 | * License along with FFmpeg; if not, write to the Free Software | ||
21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
22 | */ | ||
23 | |||
24 | #define CHROMA_EXTRA_BEFORE 1 | ||
25 | #define CHROMA_EXTRA 3 | ||
26 | #define LUMA_EXTRA_BEFORE 3 | ||
27 | #define LUMA_EXTRA 7 | ||
28 | |||
29 | 8577778 | static void FUNC(put_pixels)(int16_t *dst, | |
30 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
31 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
32 | { | ||
33 | 8577778 | const pixel *src = (const pixel *)_src; | |
34 | 8577778 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
35 | |||
36 |
2/2✓ Branch 0 taken 48416814 times.
✓ Branch 1 taken 4288889 times.
|
105411406 | for (int y = 0; y < height; y++) { |
37 |
2/2✓ Branch 0 taken 787904596 times.
✓ Branch 1 taken 48416814 times.
|
1672642820 | for (int x = 0; x < width; x++) |
38 | 1575809192 | dst[x] = src[x] << (14 - BIT_DEPTH); | |
39 | 96833628 | src += src_stride; | |
40 | 96833628 | dst += MAX_PB_SIZE; | |
41 | } | ||
42 | 8577778 | } | |
43 | |||
44 | 2517838 | static void FUNC(put_uni_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
45 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
46 | const int8_t *hf, const int8_t *vf, const int width) | ||
47 | { | ||
48 | 2517838 | const pixel *src = (const pixel *)_src; | |
49 | 2517838 | pixel *dst = (pixel *)_dst; | |
50 | 2517838 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
51 | 2517838 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
52 | |||
53 |
2/2✓ Branch 0 taken 8516360 times.
✓ Branch 1 taken 1258919 times.
|
19550558 | for (int y = 0; y < height; y++) { |
54 | 17032720 | memcpy(dst, src, width * sizeof(pixel)); | |
55 | 17032720 | src += src_stride; | |
56 | 17032720 | dst += dst_stride; | |
57 | } | ||
58 | 2517838 | } | |
59 | |||
60 | 256604 | static void FUNC(put_uni_w_pixels)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
61 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
62 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
63 | const int width) | ||
64 | { | ||
65 | 256604 | const pixel *src = (const pixel *)_src; | |
66 | 256604 | pixel *dst = (pixel *)_dst; | |
67 | 256604 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
68 | 256604 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
69 | 256604 | const int shift = denom + 14 - BIT_DEPTH; | |
70 | #if BIT_DEPTH < 14 | ||
71 | 256604 | const int offset = 1 << (shift - 1); | |
72 | #else | ||
73 | const int offset = 0; | ||
74 | #endif | ||
75 | 256604 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
76 | |||
77 |
2/2✓ Branch 0 taken 637352 times.
✓ Branch 1 taken 128302 times.
|
1531308 | for (int y = 0; y < height; y++) { |
78 |
2/2✓ Branch 0 taken 5833840 times.
✓ Branch 1 taken 637352 times.
|
12942384 | for (int x = 0; x < width; x++) { |
79 | 11667680 | const int v = (src[x] << (14 - BIT_DEPTH)); | |
80 | 11667680 | dst[x] = av_clip_pixel(((v * wx + offset) >> shift) + ox); | |
81 | } | ||
82 | 1274704 | src += src_stride; | |
83 | 1274704 | dst += dst_stride; | |
84 | } | ||
85 | 256604 | } | |
86 | |||
87 | #define LUMA_FILTER(src, stride) \ | ||
88 | (filter[0] * src[x - 3 * stride] + \ | ||
89 | filter[1] * src[x - 2 * stride] + \ | ||
90 | filter[2] * src[x - stride] + \ | ||
91 | filter[3] * src[x ] + \ | ||
92 | filter[4] * src[x + stride] + \ | ||
93 | filter[5] * src[x + 2 * stride] + \ | ||
94 | filter[6] * src[x + 3 * stride] + \ | ||
95 | filter[7] * src[x + 4 * stride]) | ||
96 | |||
97 | 1108940 | static void FUNC(put_luma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
98 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
99 | { | ||
100 | 1108940 | const pixel *src = (const pixel*)_src; | |
101 | 1108940 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
102 | 1108940 | const int8_t *filter = hf; | |
103 | |||
104 |
2/2✓ Branch 0 taken 4941960 times.
✓ Branch 1 taken 554470 times.
|
10992860 | for (int y = 0; y < height; y++) { |
105 |
2/2✓ Branch 0 taken 76559088 times.
✓ Branch 1 taken 4941960 times.
|
163002096 | for (int x = 0; x < width; x++) |
106 | 153118176 | dst[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
107 | 9883920 | src += src_stride; | |
108 | 9883920 | dst += MAX_PB_SIZE; | |
109 | } | ||
110 | 1108940 | } | |
111 | |||
112 | 1388424 | static void FUNC(put_luma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
113 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
114 | { | ||
115 | 1388424 | const pixel *src = (pixel*)_src; | |
116 | 1388424 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
117 | 1388424 | const int8_t *filter = vf; | |
118 | |||
119 |
2/2✓ Branch 0 taken 6343752 times.
✓ Branch 1 taken 694212 times.
|
14075928 | for (int y = 0; y < height; y++) { |
120 |
2/2✓ Branch 0 taken 96133776 times.
✓ Branch 1 taken 6343752 times.
|
204955056 | for (int x = 0; x < width; x++) |
121 | 192267552 | dst[x] = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
122 | 12687504 | src += src_stride; | |
123 | 12687504 | dst += MAX_PB_SIZE; | |
124 | } | ||
125 | 1388424 | } | |
126 | |||
127 | 8287660 | static void FUNC(put_luma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
128 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
129 | { | ||
130 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
131 | 8287660 | int16_t *tmp = tmp_array; | |
132 | 8287660 | const pixel *src = (const pixel*)_src; | |
133 | 8287660 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
134 | 8287660 | const int8_t *filter = hf; | |
135 | |||
136 | 8287660 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
137 |
2/2✓ Branch 0 taken 57273658 times.
✓ Branch 1 taken 4143830 times.
|
122834976 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
138 |
2/2✓ Branch 0 taken 511105800 times.
✓ Branch 1 taken 57273658 times.
|
1136758916 | for (int x = 0; x < width; x++) |
139 | 1022211600 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
140 | 114547316 | src += src_stride; | |
141 | 114547316 | tmp += MAX_PB_SIZE; | |
142 | } | ||
143 | |||
144 | 8287660 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
145 | 8287660 | filter = vf; | |
146 |
2/2✓ Branch 0 taken 28266848 times.
✓ Branch 1 taken 4143830 times.
|
64821356 | for (int y = 0; y < height; y++) { |
147 |
2/2✓ Branch 0 taken 314176592 times.
✓ Branch 1 taken 28266848 times.
|
684886880 | for (int x = 0; x < width; x++) |
148 | 628353184 | dst[x] = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
149 | 56533696 | tmp += MAX_PB_SIZE; | |
150 | 56533696 | dst += MAX_PB_SIZE; | |
151 | } | ||
152 | 8287660 | } | |
153 | |||
154 | 97948 | static void FUNC(put_uni_luma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
155 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
156 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
157 | { | ||
158 | 97948 | const pixel *src = (const pixel*)_src; | |
159 | 97948 | pixel *dst = (pixel *)_dst; | |
160 | 97948 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
161 | 97948 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
162 | 97948 | const int8_t *filter = hf; | |
163 | 97948 | const int shift = 14 - BIT_DEPTH; | |
164 | #if BIT_DEPTH < 14 | ||
165 | 97948 | const int offset = 1 << (shift - 1); | |
166 | #else | ||
167 | const int offset = 0; | ||
168 | #endif | ||
169 | |||
170 |
2/2✓ Branch 0 taken 566336 times.
✓ Branch 1 taken 48974 times.
|
1230620 | for (int y = 0; y < height; y++) { |
171 |
2/2✓ Branch 0 taken 12220624 times.
✓ Branch 1 taken 566336 times.
|
25573920 | for (int x = 0; x < width; x++) { |
172 | 24441248 | const int val = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
173 | 24441248 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
174 | } | ||
175 | 1132672 | src += src_stride; | |
176 | 1132672 | dst += dst_stride; | |
177 | } | ||
178 | 97948 | } | |
179 | |||
180 | 118460 | static void FUNC(put_uni_luma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
181 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
182 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
183 | { | ||
184 | |||
185 | 118460 | const pixel *src = (const pixel*)_src; | |
186 | 118460 | pixel *dst = (pixel *)_dst; | |
187 | 118460 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
188 | 118460 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
189 | 118460 | const int8_t *filter = vf; | |
190 | 118460 | const int shift = 14 - BIT_DEPTH; | |
191 | #if BIT_DEPTH < 14 | ||
192 | 118460 | const int offset = 1 << (shift - 1); | |
193 | #else | ||
194 | const int offset = 0; | ||
195 | #endif | ||
196 | |||
197 |
2/2✓ Branch 0 taken 654812 times.
✓ Branch 1 taken 59230 times.
|
1428084 | for (int y = 0; y < height; y++) { |
198 |
2/2✓ Branch 0 taken 13757696 times.
✓ Branch 1 taken 654812 times.
|
28825016 | for (int x = 0; x < width; x++) { |
199 | 27515392 | const int val = LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
200 | 27515392 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
201 | } | ||
202 | 1309624 | src += src_stride; | |
203 | 1309624 | dst += dst_stride; | |
204 | } | ||
205 | 118460 | } | |
206 | |||
207 | 365926 | static void FUNC(put_uni_luma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
208 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
209 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
210 | { | ||
211 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
212 | 365926 | int16_t *tmp = tmp_array; | |
213 | 365926 | const pixel *src = (const pixel*)_src; | |
214 | 365926 | pixel *dst = (pixel *)_dst; | |
215 | 365926 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
216 | 365926 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
217 | 365926 | const int8_t *filter = hf; | |
218 | 365926 | const int shift = 14 - BIT_DEPTH; | |
219 | #if BIT_DEPTH < 14 | ||
220 | 365926 | const int offset = 1 << (shift - 1); | |
221 | #else | ||
222 | const int offset = 0; | ||
223 | #endif | ||
224 | |||
225 | 365926 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
226 |
2/2✓ Branch 0 taken 3291585 times.
✓ Branch 1 taken 182963 times.
|
6949096 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
227 |
2/2✓ Branch 0 taken 48883716 times.
✓ Branch 1 taken 3291585 times.
|
104350602 | for (int x = 0; x < width; x++) |
228 | 97767432 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
229 | 6583170 | src += src_stride; | |
230 | 6583170 | tmp += MAX_PB_SIZE; | |
231 | } | ||
232 | |||
233 | 365926 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
234 | 365926 | filter = vf; | |
235 | |||
236 |
2/2✓ Branch 0 taken 2010844 times.
✓ Branch 1 taken 182963 times.
|
4387614 | for (int y = 0; y < height; y++) { |
237 |
2/2✓ Branch 0 taken 35429520 times.
✓ Branch 1 taken 2010844 times.
|
74880728 | for (int x = 0; x < width; x++) { |
238 | 70859040 | const int val = LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
239 | 70859040 | dst[x] = av_clip_pixel((val + offset) >> shift); | |
240 | } | ||
241 | 4021688 | tmp += MAX_PB_SIZE; | |
242 | 4021688 | dst += dst_stride; | |
243 | } | ||
244 | |||
245 | 365926 | } | |
246 | |||
247 | 22844 | static void FUNC(put_uni_luma_w_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
248 | const uint8_t *_src, const ptrdiff_t _src_stride, int height, | ||
249 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
250 | const int width) | ||
251 | { | ||
252 | 22844 | const pixel *src = (const pixel*)_src; | |
253 | 22844 | pixel *dst = (pixel *)_dst; | |
254 | 22844 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
255 | 22844 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
256 | 22844 | const int8_t *filter = hf; | |
257 | 22844 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
258 | 22844 | const int shift = denom + 14 - BIT_DEPTH; | |
259 | #if BIT_DEPTH < 14 | ||
260 | 22844 | const int offset = 1 << (shift - 1); | |
261 | #else | ||
262 | const int offset = 0; | ||
263 | #endif | ||
264 | |||
265 |
2/2✓ Branch 0 taken 58128 times.
✓ Branch 1 taken 11422 times.
|
139100 | for (int y = 0; y < height; y++) { |
266 |
2/2✓ Branch 0 taken 527680 times.
✓ Branch 1 taken 58128 times.
|
1171616 | for (int x = 0; x < width; x++) |
267 | 1055360 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
268 | 116256 | src += src_stride; | |
269 | 116256 | dst += dst_stride; | |
270 | } | ||
271 | 22844 | } | |
272 | |||
273 | 22448 | static void FUNC(put_uni_luma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
274 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
275 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
276 | const int width) | ||
277 | { | ||
278 | 22448 | const pixel *src = (const pixel*)_src; | |
279 | 22448 | pixel *dst = (pixel *)_dst; | |
280 | 22448 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
281 | 22448 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
282 | 22448 | const int8_t *filter = vf; | |
283 | 22448 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
284 | 22448 | const int shift = denom + 14 - BIT_DEPTH; | |
285 | #if BIT_DEPTH < 14 | ||
286 | 22448 | const int offset = 1 << (shift - 1); | |
287 | #else | ||
288 | const int offset = 0; | ||
289 | #endif | ||
290 | |||
291 |
2/2✓ Branch 0 taken 54840 times.
✓ Branch 1 taken 11224 times.
|
132128 | for (int y = 0; y < height; y++) { |
292 |
2/2✓ Branch 0 taken 440416 times.
✓ Branch 1 taken 54840 times.
|
990512 | for (int x = 0; x < width; x++) |
293 | 880832 | dst[x] = av_clip_pixel((((LUMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
294 | 109680 | src += src_stride; | |
295 | 109680 | dst += dst_stride; | |
296 | } | ||
297 | 22448 | } | |
298 | |||
299 | 76152 | static void FUNC(put_uni_luma_w_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
300 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, const int denom, | ||
301 | const int wx, const int _ox, const int8_t *hf, const int8_t *vf, const int width) | ||
302 | { | ||
303 | int16_t tmp_array[(MAX_PB_SIZE + LUMA_EXTRA) * MAX_PB_SIZE]; | ||
304 | 76152 | int16_t *tmp = tmp_array; | |
305 | 76152 | const pixel *src = (const pixel*)_src; | |
306 | 76152 | pixel *dst = (pixel *)_dst; | |
307 | 76152 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
308 | 76152 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
309 | 76152 | const int8_t *filter = hf; | |
310 | 76152 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
311 | 76152 | const int shift = denom + 14 - BIT_DEPTH; | |
312 | #if BIT_DEPTH < 14 | ||
313 | 76152 | const int offset = 1 << (shift - 1); | |
314 | #else | ||
315 | const int offset = 0; | ||
316 | #endif | ||
317 | |||
318 | 76152 | src -= LUMA_EXTRA_BEFORE * src_stride; | |
319 |
2/2✓ Branch 0 taken 456544 times.
✓ Branch 1 taken 38076 times.
|
989240 | for (int y = 0; y < height + LUMA_EXTRA; y++) { |
320 |
2/2✓ Branch 0 taken 3176504 times.
✓ Branch 1 taken 456544 times.
|
7266096 | for (int x = 0; x < width; x++) |
321 | 6353008 | tmp[x] = LUMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
322 | 913088 | src += src_stride; | |
323 | 913088 | tmp += MAX_PB_SIZE; | |
324 | } | ||
325 | |||
326 | 76152 | tmp = tmp_array + LUMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
327 | 76152 | filter = vf; | |
328 |
2/2✓ Branch 0 taken 190012 times.
✓ Branch 1 taken 38076 times.
|
456176 | for (int y = 0; y < height; y++) { |
329 |
2/2✓ Branch 0 taken 1834912 times.
✓ Branch 1 taken 190012 times.
|
4049848 | for (int x = 0; x < width; x++) |
330 | 3669824 | dst[x] = av_clip_pixel((((LUMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
331 | 380024 | tmp += MAX_PB_SIZE; | |
332 | 380024 | dst += dst_stride; | |
333 | } | ||
334 | 76152 | } | |
335 | |||
336 | #define CHROMA_FILTER(src, stride) \ | ||
337 | (filter[0] * src[x - stride] + \ | ||
338 | filter[1] * src[x] + \ | ||
339 | filter[2] * src[x + stride] + \ | ||
340 | filter[3] * src[x + 2 * stride]) | ||
341 | |||
342 | 1138602 | static void FUNC(put_chroma_h)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
343 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
344 | { | ||
345 | 1138602 | const pixel *src = (const pixel *)_src; | |
346 | 1138602 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
347 | 1138602 | const int8_t *filter = hf; | |
348 | |||
349 |
2/2✓ Branch 0 taken 4803666 times.
✓ Branch 1 taken 569301 times.
|
10745934 | for (int y = 0; y < height; y++) { |
350 |
2/2✓ Branch 0 taken 79042020 times.
✓ Branch 1 taken 4803666 times.
|
167691372 | for (int x = 0; x < width; x++) |
351 | 158084040 | dst[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
352 | 9607332 | src += src_stride; | |
353 | 9607332 | dst += MAX_PB_SIZE; | |
354 | } | ||
355 | 1138602 | } | |
356 | |||
357 | 1705254 | static void FUNC(put_chroma_v)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
358 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
359 | { | ||
360 | 1705254 | const pixel *src = (const pixel *)_src; | |
361 | 1705254 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
362 | 1705254 | const int8_t *filter = vf; | |
363 | |||
364 |
2/2✓ Branch 0 taken 6842242 times.
✓ Branch 1 taken 852627 times.
|
15389738 | for (int y = 0; y < height; y++) { |
365 |
2/2✓ Branch 0 taken 103146756 times.
✓ Branch 1 taken 6842242 times.
|
219977996 | for (int x = 0; x < width; x++) |
366 | 206293512 | dst[x] = CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8); | |
367 | 13684484 | src += src_stride; | |
368 | 13684484 | dst += MAX_PB_SIZE; | |
369 | } | ||
370 | 1705254 | } | |
371 | |||
372 | 7289066 | static void FUNC(put_chroma_hv)(int16_t *dst, const uint8_t *_src, const ptrdiff_t _src_stride, | |
373 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
374 | { | ||
375 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
376 | 7289066 | int16_t *tmp = tmp_array; | |
377 | 7289066 | const pixel *src = (const pixel *)_src; | |
378 | 7289066 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
379 | 7289066 | const int8_t *filter = hf; | |
380 | |||
381 | 7289066 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
382 | |||
383 |
2/2✓ Branch 0 taken 36467509 times.
✓ Branch 1 taken 3644533 times.
|
80224084 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
384 |
2/2✓ Branch 0 taken 343373470 times.
✓ Branch 1 taken 36467509 times.
|
759681958 | for (int x = 0; x < width; x++) |
385 | 686746940 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
386 | 72935018 | src += src_stride; | |
387 | 72935018 | tmp += MAX_PB_SIZE; | |
388 | } | ||
389 | |||
390 | 7289066 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
391 | 7289066 | filter = vf; | |
392 | |||
393 |
2/2✓ Branch 0 taken 25533910 times.
✓ Branch 1 taken 3644533 times.
|
58356886 | for (int y = 0; y < height; y++) { |
394 |
2/2✓ Branch 0 taken 267147460 times.
✓ Branch 1 taken 25533910 times.
|
585362740 | for (int x = 0; x < width; x++) |
395 | 534294920 | dst[x] = CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6; | |
396 | 51067820 | tmp += MAX_PB_SIZE; | |
397 | 51067820 | dst += MAX_PB_SIZE; | |
398 | } | ||
399 | 7289066 | } | |
400 | |||
401 | 409596 | static void FUNC(put_uni_chroma_h)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
402 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
403 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
404 | { | ||
405 | 409596 | const pixel *src = (const pixel *)_src; | |
406 | 409596 | pixel *dst = (pixel *)_dst; | |
407 | 409596 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
408 | 409596 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
409 | 409596 | const int8_t *filter = hf; | |
410 | 409596 | const int shift = 14 - BIT_DEPTH; | |
411 | #if BIT_DEPTH < 14 | ||
412 | 409596 | const int offset = 1 << (shift - 1); | |
413 | #else | ||
414 | const int offset = 0; | ||
415 | #endif | ||
416 | |||
417 |
2/2✓ Branch 0 taken 1102600 times.
✓ Branch 1 taken 204798 times.
|
2614796 | for (int y = 0; y < height; y++) { |
418 |
2/2✓ Branch 0 taken 13591072 times.
✓ Branch 1 taken 1102600 times.
|
29387344 | for (int x = 0; x < width; x++) |
419 | 27182144 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
420 | 2205200 | src += src_stride; | |
421 | 2205200 | dst += dst_stride; | |
422 | } | ||
423 | 409596 | } | |
424 | |||
425 | 407352 | static void FUNC(put_uni_chroma_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
426 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
427 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
428 | { | ||
429 | 407352 | const pixel *src = (const pixel *)_src; | |
430 | 407352 | pixel *dst = (pixel *)_dst; | |
431 | 407352 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
432 | 407352 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
433 | 407352 | const int8_t *filter = vf; | |
434 | 407352 | const int shift = 14 - BIT_DEPTH; | |
435 | #if BIT_DEPTH < 14 | ||
436 | 407352 | const int offset = 1 << (shift - 1); | |
437 | #else | ||
438 | const int offset = 0; | ||
439 | #endif | ||
440 | |||
441 |
2/2✓ Branch 0 taken 1107824 times.
✓ Branch 1 taken 203676 times.
|
2623000 | for (int y = 0; y < height; y++) { |
442 |
2/2✓ Branch 0 taken 13970224 times.
✓ Branch 1 taken 1107824 times.
|
30156096 | for (int x = 0; x < width; x++) |
443 | 27940448 | dst[x] = av_clip_pixel(((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) + offset) >> shift); | |
444 | 2215648 | src += src_stride; | |
445 | 2215648 | dst += dst_stride; | |
446 | } | ||
447 | 407352 | } | |
448 | |||
449 | 3030196 | static void FUNC(put_uni_chroma_hv)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
450 | const uint8_t *_src, const ptrdiff_t _src_stride, | ||
451 | const int height, const int8_t *hf, const int8_t *vf, const int width) | ||
452 | { | ||
453 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
454 | 3030196 | int16_t *tmp = tmp_array; | |
455 | 3030196 | const pixel *src = (const pixel *)_src; | |
456 | 3030196 | pixel *dst = (pixel *)_dst; | |
457 | 3030196 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
458 | 3030196 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
459 | 3030196 | const int8_t *filter = hf; | |
460 | 3030196 | const int shift = 14 - BIT_DEPTH; | |
461 | #if BIT_DEPTH < 14 | ||
462 | 3030196 | const int offset = 1 << (shift - 1); | |
463 | #else | ||
464 | const int offset = 0; | ||
465 | #endif | ||
466 | |||
467 | 3030196 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
468 | |||
469 |
2/2✓ Branch 0 taken 11523798 times.
✓ Branch 1 taken 1515098 times.
|
26077792 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
470 |
2/2✓ Branch 0 taken 70930556 times.
✓ Branch 1 taken 11523798 times.
|
164908708 | for (int x = 0; x < width; x++) |
471 | 141861112 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
472 | 23047596 | src += src_stride; | |
473 | 23047596 | tmp += MAX_PB_SIZE; | |
474 | } | ||
475 | |||
476 | 3030196 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
477 | 3030196 | filter = vf; | |
478 | |||
479 |
2/2✓ Branch 0 taken 6978504 times.
✓ Branch 1 taken 1515098 times.
|
16987204 | for (int y = 0; y < height; y++) { |
480 |
2/2✓ Branch 0 taken 50260544 times.
✓ Branch 1 taken 6978504 times.
|
114478096 | for (int x = 0; x < width; x++) |
481 | 100521088 | dst[x] = av_clip_pixel(((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift); | |
482 | 13957008 | tmp += MAX_PB_SIZE; | |
483 | 13957008 | dst += dst_stride; | |
484 | } | ||
485 | 3030196 | } | |
486 | |||
487 | 19480 | static void FUNC(put_uni_chroma_w_h)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
488 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
489 | const int8_t *hf, const int8_t *vf, int width) | ||
490 | { | ||
491 | 19480 | const pixel *src = (const pixel *)_src; | |
492 | 19480 | pixel *dst = (pixel *)_dst; | |
493 | 19480 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
494 | 19480 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
495 | 19480 | const int8_t *filter = hf; | |
496 | 19480 | const int shift = denom + 14 - BIT_DEPTH; | |
497 | #if BIT_DEPTH < 14 | ||
498 | 19480 | const int offset = 1 << (shift - 1); | |
499 | #else | ||
500 | const int offset = 0; | ||
501 | #endif | ||
502 | |||
503 | 19480 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
504 |
2/2✓ Branch 0 taken 46728 times.
✓ Branch 1 taken 9740 times.
|
112936 | for (int y = 0; y < height; y++) { |
505 |
2/2✓ Branch 0 taken 292128 times.
✓ Branch 1 taken 46728 times.
|
677712 | for (int x = 0; x < width; x++) { |
506 | 584256 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
507 | } | ||
508 | 93456 | dst += dst_stride; | |
509 | 93456 | src += src_stride; | |
510 | } | ||
511 | 19480 | } | |
512 | |||
513 | 19836 | static void FUNC(put_uni_chroma_w_v)(uint8_t *_dst, const ptrdiff_t _dst_stride, | |
514 | const uint8_t *_src, const ptrdiff_t _src_stride, const int height, | ||
515 | const int denom, const int wx, const int _ox, const int8_t *hf, const int8_t *vf, | ||
516 | const int width) | ||
517 | { | ||
518 | 19836 | const pixel *src = (const pixel *)_src; | |
519 | 19836 | pixel *dst = (pixel *)_dst; | |
520 | 19836 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
521 | 19836 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
522 | 19836 | const int8_t *filter = vf; | |
523 | 19836 | const int shift = denom + 14 - BIT_DEPTH; | |
524 | 19836 | const int ox = _ox * (1 << (BIT_DEPTH - 8)); | |
525 | #if BIT_DEPTH < 14 | ||
526 | 19836 | int offset = 1 << (shift - 1); | |
527 | #else | ||
528 | int offset = 0; | ||
529 | #endif | ||
530 | |||
531 |
2/2✓ Branch 0 taken 46288 times.
✓ Branch 1 taken 9918 times.
|
112412 | for (int y = 0; y < height; y++) { |
532 |
2/2✓ Branch 0 taken 301520 times.
✓ Branch 1 taken 46288 times.
|
695616 | for (int x = 0; x < width; x++) { |
533 | 603040 | dst[x] = av_clip_pixel((((CHROMA_FILTER(src, src_stride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox); | |
534 | } | ||
535 | 92576 | dst += dst_stride; | |
536 | 92576 | src += src_stride; | |
537 | } | ||
538 | 19836 | } | |
539 | |||
540 | 110112 | static void FUNC(put_uni_chroma_w_hv)(uint8_t *_dst, ptrdiff_t _dst_stride, | |
541 | const uint8_t *_src, ptrdiff_t _src_stride, int height, int denom, int wx, int ox, | ||
542 | const int8_t *hf, const int8_t *vf, int width) | ||
543 | { | ||
544 | int16_t tmp_array[(MAX_PB_SIZE + CHROMA_EXTRA) * MAX_PB_SIZE]; | ||
545 | 110112 | int16_t *tmp = tmp_array; | |
546 | 110112 | const pixel *src = (const pixel *)_src; | |
547 | 110112 | pixel *dst = (pixel *)_dst; | |
548 | 110112 | const ptrdiff_t src_stride = _src_stride / sizeof(pixel); | |
549 | 110112 | const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); | |
550 | 110112 | const int8_t *filter = hf; | |
551 | 110112 | const int shift = denom + 14 - BIT_DEPTH; | |
552 | #if BIT_DEPTH < 14 | ||
553 | 110112 | const int offset = 1 << (shift - 1); | |
554 | #else | ||
555 | const int offset = 0; | ||
556 | #endif | ||
557 | |||
558 | 110112 | src -= CHROMA_EXTRA_BEFORE * src_stride; | |
559 | |||
560 |
2/2✓ Branch 0 taken 417916 times.
✓ Branch 1 taken 55056 times.
|
945944 | for (int y = 0; y < height + CHROMA_EXTRA; y++) { |
561 |
2/2✓ Branch 0 taken 2399396 times.
✓ Branch 1 taken 417916 times.
|
5634624 | for (int x = 0; x < width; x++) |
562 | 4798792 | tmp[x] = CHROMA_FILTER(src, 1) >> (BIT_DEPTH - 8); | |
563 | 835832 | src += src_stride; | |
564 | 835832 | tmp += MAX_PB_SIZE; | |
565 | } | ||
566 | |||
567 | 110112 | tmp = tmp_array + CHROMA_EXTRA_BEFORE * MAX_PB_SIZE; | |
568 | 110112 | filter = vf; | |
569 | |||
570 | 110112 | ox = ox * (1 << (BIT_DEPTH - 8)); | |
571 |
2/2✓ Branch 0 taken 252748 times.
✓ Branch 1 taken 55056 times.
|
615608 | for (int y = 0; y < height; y++) { |
572 |
2/2✓ Branch 0 taken 1626656 times.
✓ Branch 1 taken 252748 times.
|
3758808 | for (int x = 0; x < width; x++) |
573 | 3253312 | dst[x] = av_clip_pixel((((CHROMA_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox); | |
574 | 505496 | tmp += MAX_PB_SIZE; | |
575 | 505496 | dst += dst_stride; | |
576 | } | ||
577 | 110112 | } | |
578 |