Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder | ||
3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | ||
4 | * | ||
5 | * This file is part of FFmpeg. | ||
6 | * | ||
7 | * FFmpeg is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU Lesser General Public | ||
9 | * License as published by the Free Software Foundation; either | ||
10 | * version 2.1 of the License, or (at your option) any later version. | ||
11 | * | ||
12 | * FFmpeg is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
15 | * Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public | ||
18 | * License along with FFmpeg; if not, write to the Free Software | ||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
20 | */ | ||
21 | |||
22 | /** | ||
23 | * @file | ||
24 | * H.264 / AVC / MPEG-4 part10 macroblock decoding | ||
25 | */ | ||
26 | |||
27 | #include <stdint.h> | ||
28 | |||
29 | #include "config.h" | ||
30 | |||
31 | #include "libavutil/common.h" | ||
32 | #include "libavutil/intreadwrite.h" | ||
33 | #include "avcodec.h" | ||
34 | #include "h264dec.h" | ||
35 | #include "h264_ps.h" | ||
36 | #include "qpeldsp.h" | ||
37 | #include "rectangle.h" | ||
38 | #include "threadframe.h" | ||
39 | |||
40 | 136653 | static inline int get_lowest_part_list_y(H264SliceContext *sl, | |
41 | int n, int height, int y_offset, int list) | ||
42 | { | ||
43 | 136653 | int raw_my = sl->mv_cache[list][scan8[n]][1]; | |
44 |
2/2✓ Branch 0 taken 23509 times.
✓ Branch 1 taken 113144 times.
|
136653 | int filter_height_down = (raw_my & 3) ? 3 : 0; |
45 | 136653 | int full_my = (raw_my >> 2) + y_offset; | |
46 | 136653 | int bottom = full_my + filter_height_down + height; | |
47 | |||
48 | av_assert2(height >= 0); | ||
49 | |||
50 | 136653 | return FFMAX(0, bottom); | |
51 | } | ||
52 | |||
53 | 112897 | static inline void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, | |
54 | int16_t refs[2][48], int n, | ||
55 | int height, int y_offset, int list0, | ||
56 | int list1, int *nrefs) | ||
57 | { | ||
58 | int my; | ||
59 | |||
60 | 112897 | y_offset += 16 * (sl->mb_y >> MB_FIELD(sl)); | |
61 | |||
62 |
2/2✓ Branch 0 taken 105424 times.
✓ Branch 1 taken 7473 times.
|
112897 | if (list0) { |
63 | 105424 | int ref_n = sl->ref_cache[0][scan8[n]]; | |
64 | 105424 | H264Ref *ref = &sl->ref_list[0][ref_n]; | |
65 | |||
66 | // Error resilience puts the current picture in the ref list. | ||
67 | // Don't try to wait on these as it will cause a deadlock. | ||
68 | // Fields can wait on each other, though. | ||
69 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 105424 times.
|
105424 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
70 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
71 | 105424 | my = get_lowest_part_list_y(sl, n, height, y_offset, 0); | |
72 |
2/2✓ Branch 0 taken 92282 times.
✓ Branch 1 taken 13142 times.
|
105424 | if (refs[0][ref_n] < 0) |
73 | 92282 | nrefs[0] += 1; | |
74 | 105424 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); | |
75 | } | ||
76 | } | ||
77 | |||
78 |
2/2✓ Branch 0 taken 31229 times.
✓ Branch 1 taken 81668 times.
|
112897 | if (list1) { |
79 | 31229 | int ref_n = sl->ref_cache[1][scan8[n]]; | |
80 | 31229 | H264Ref *ref = &sl->ref_list[1][ref_n]; | |
81 | |||
82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 31229 times.
|
31229 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
83 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
84 | 31229 | my = get_lowest_part_list_y(sl, n, height, y_offset, 1); | |
85 |
2/2✓ Branch 0 taken 29028 times.
✓ Branch 1 taken 2201 times.
|
31229 | if (refs[1][ref_n] < 0) |
86 | 29028 | nrefs[1] += 1; | |
87 | 31229 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); | |
88 | } | ||
89 | } | ||
90 | 112897 | } | |
91 | |||
92 | /** | ||
93 | * Wait until all reference frames are available for MC operations. | ||
94 | * | ||
95 | * @param h the H.264 context | ||
96 | */ | ||
97 | 95468 | static void await_references(const H264Context *h, H264SliceContext *sl) | |
98 | { | ||
99 | 95468 | const int mb_xy = sl->mb_xy; | |
100 | 95468 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
101 | int16_t refs[2][48]; | ||
102 | 95468 | int nrefs[2] = { 0 }; | |
103 | int ref, list; | ||
104 | |||
105 | 95468 | memset(refs, -1, sizeof(refs)); | |
106 | |||
107 |
2/2✓ Branch 0 taken 85183 times.
✓ Branch 1 taken 10285 times.
|
95468 | if (IS_16X16(mb_type)) { |
108 | 85183 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
109 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
110 |
2/2✓ Branch 0 taken 3423 times.
✓ Branch 1 taken 6862 times.
|
10285 | } else if (IS_16X8(mb_type)) { |
111 | 3423 | get_lowest_part_y(h, sl, refs, 0, 8, 0, | |
112 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
113 | 3423 | get_lowest_part_y(h, sl, refs, 8, 8, 8, | |
114 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
115 |
2/2✓ Branch 0 taken 3290 times.
✓ Branch 1 taken 3572 times.
|
6862 | } else if (IS_8X16(mb_type)) { |
116 | 3290 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
117 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
118 | 3290 | get_lowest_part_y(h, sl, refs, 4, 16, 0, | |
119 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
120 | } else { | ||
121 | int i; | ||
122 | |||
123 | av_assert2(IS_8X8(mb_type)); | ||
124 | |||
125 |
2/2✓ Branch 0 taken 14288 times.
✓ Branch 1 taken 3572 times.
|
17860 | for (i = 0; i < 4; i++) { |
126 | 14288 | const int sub_mb_type = sl->sub_mb_type[i]; | |
127 | 14288 | const int n = 4 * i; | |
128 | 14288 | int y_offset = (i & 2) << 2; | |
129 | |||
130 |
1/2✓ Branch 0 taken 14288 times.
✗ Branch 1 not taken.
|
14288 | if (IS_SUB_8X8(sub_mb_type)) { |
131 | 14288 | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
132 | IS_DIR(sub_mb_type, 0, 0), | ||
133 | IS_DIR(sub_mb_type, 0, 1), | ||
134 | nrefs); | ||
135 | ✗ | } else if (IS_SUB_8X4(sub_mb_type)) { | |
136 | ✗ | get_lowest_part_y(h, sl, refs, n, 4, y_offset, | |
137 | IS_DIR(sub_mb_type, 0, 0), | ||
138 | IS_DIR(sub_mb_type, 0, 1), | ||
139 | nrefs); | ||
140 | ✗ | get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4, | |
141 | IS_DIR(sub_mb_type, 0, 0), | ||
142 | IS_DIR(sub_mb_type, 0, 1), | ||
143 | nrefs); | ||
144 | ✗ | } else if (IS_SUB_4X8(sub_mb_type)) { | |
145 | ✗ | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
146 | IS_DIR(sub_mb_type, 0, 0), | ||
147 | IS_DIR(sub_mb_type, 0, 1), | ||
148 | nrefs); | ||
149 | ✗ | get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset, | |
150 | IS_DIR(sub_mb_type, 0, 0), | ||
151 | IS_DIR(sub_mb_type, 0, 1), | ||
152 | nrefs); | ||
153 | } else { | ||
154 | int j; | ||
155 | av_assert2(IS_SUB_4X4(sub_mb_type)); | ||
156 | ✗ | for (j = 0; j < 4; j++) { | |
157 | ✗ | int sub_y_offset = y_offset + 2 * (j & 2); | |
158 | ✗ | get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset, | |
159 | IS_DIR(sub_mb_type, 0, 0), | ||
160 | IS_DIR(sub_mb_type, 0, 1), | ||
161 | nrefs); | ||
162 | } | ||
163 | } | ||
164 | } | ||
165 | } | ||
166 | |||
167 |
2/2✓ Branch 0 taken 125893 times.
✓ Branch 1 taken 95468 times.
|
221361 | for (list = sl->list_count - 1; list >= 0; list--) |
168 |
3/4✓ Branch 0 taken 255047 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 129154 times.
✓ Branch 3 taken 125893 times.
|
255047 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { |
169 | 129154 | int row = refs[list][ref]; | |
170 |
2/2✓ Branch 0 taken 121310 times.
✓ Branch 1 taken 7844 times.
|
129154 | if (row >= 0) { |
171 | 121310 | H264Ref *ref_pic = &sl->ref_list[list][ref]; | |
172 | 121310 | int ref_field = ref_pic->reference - 1; | |
173 | 121310 | int ref_field_picture = ref_pic->parent->field_picture; | |
174 | 121310 | int pic_height = 16 * h->mb_height >> ref_field_picture; | |
175 | |||
176 | 121310 | row <<= MB_MBAFF(sl); | |
177 | 121310 | nrefs[list]--; | |
178 | |||
179 |
2/4✓ Branch 0 taken 121310 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 121310 times.
|
121310 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
180 | av_assert2((ref_pic->parent->reference & 3) == 3); | ||
181 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
182 | ✗ | FFMIN((row >> 1) - !(row & 1), | |
183 | pic_height - 1), | ||
184 | 1); | ||
185 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
186 | ✗ | FFMIN((row >> 1), pic_height - 1), | |
187 | 0); | ||
188 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
121310 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
189 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
190 | ✗ | FFMIN(row * 2 + ref_field, | |
191 | pic_height - 1), | ||
192 | 0); | ||
193 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
|
121310 | } else if (FIELD_PICTURE(h)) { |
194 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
195 | FFMIN(row, pic_height - 1), | ||
196 | ref_field); | ||
197 | } else { | ||
198 |
2/2✓ Branch 0 taken 2814 times.
✓ Branch 1 taken 118496 times.
|
121310 | ff_thread_await_progress(&ref_pic->parent->tf, |
199 | FFMIN(row, pic_height - 1), | ||
200 | 0); | ||
201 | } | ||
202 | } | ||
203 | } | ||
204 | 95468 | } | |
205 | |||
206 | 28000949 | static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, | |
207 | H264Ref *pic, | ||
208 | int n, int square, int height, | ||
209 | int delta, int list, | ||
210 | uint8_t *dest_y, uint8_t *dest_cb, | ||
211 | uint8_t *dest_cr, | ||
212 | int src_x_offset, int src_y_offset, | ||
213 | const qpel_mc_func *qpix_op, | ||
214 | h264_chroma_mc_func chroma_op, | ||
215 | int pixel_shift, int chroma_idc) | ||
216 | { | ||
217 | 28000949 | const int mx = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8; | |
218 | 28000949 | int my = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8; | |
219 | 28000949 | const int luma_xy = (mx & 3) + ((my & 3) << 2); | |
220 | 28000949 | ptrdiff_t offset = (mx >> 2) * (1 << pixel_shift) + (my >> 2) * sl->mb_linesize; | |
221 | 28000949 | uint8_t *src_y = pic->data[0] + offset; | |
222 | uint8_t *src_cb, *src_cr; | ||
223 | 28000949 | int extra_width = 0; | |
224 | 28000949 | int extra_height = 0; | |
225 | 28000949 | int emu = 0; | |
226 | 28000949 | const int full_mx = mx >> 2; | |
227 | 28000949 | const int full_my = my >> 2; | |
228 | 28000949 | const int pic_width = 16 * h->mb_width; | |
229 | 28000949 | const int pic_height = 16 * h->mb_height >> MB_FIELD(sl); | |
230 | int ysh; | ||
231 | |||
232 |
2/2✓ Branch 0 taken 18866919 times.
✓ Branch 1 taken 9134030 times.
|
28000949 | if (mx & 7) |
233 | 18866919 | extra_width -= 3; | |
234 |
2/2✓ Branch 0 taken 16358087 times.
✓ Branch 1 taken 11642862 times.
|
28000949 | if (my & 7) |
235 | 16358087 | extra_height -= 3; | |
236 | |||
237 |
2/2✓ Branch 0 taken 27872149 times.
✓ Branch 1 taken 128800 times.
|
28000949 | if (full_mx < 0 - extra_width || |
238 |
2/2✓ Branch 0 taken 27512389 times.
✓ Branch 1 taken 359760 times.
|
27872149 | full_my < 0 - extra_height || |
239 |
2/2✓ Branch 0 taken 26918652 times.
✓ Branch 1 taken 593737 times.
|
27512389 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || |
240 |
2/2✓ Branch 0 taken 1032458 times.
✓ Branch 1 taken 25886194 times.
|
26918652 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { |
241 | 2114755 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
242 | 2114755 | src_y - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
243 | sl->mb_linesize, sl->mb_linesize, | ||
244 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, | ||
245 | full_my - 2, pic_width, pic_height); | ||
246 | 2114755 | src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
247 | 2114755 | emu = 1; | |
248 | } | ||
249 | |||
250 | 28000949 | qpix_op[luma_xy](dest_y, src_y, sl->mb_linesize); // FIXME try variable height perhaps? | |
251 |
2/2✓ Branch 0 taken 8858679 times.
✓ Branch 1 taken 19142270 times.
|
28000949 | if (!square) |
252 | 8858679 | qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize); | |
253 | |||
254 | if (CONFIG_GRAY && h->flags & AV_CODEC_FLAG_GRAY) | ||
255 | return; | ||
256 | |||
257 |
2/2✓ Branch 0 taken 157614 times.
✓ Branch 1 taken 27843335 times.
|
28000949 | if (chroma_idc == 3 /* yuv444 */) { |
258 | 157614 | src_cb = pic->data[1] + offset; | |
259 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
260 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
261 | 5488 | src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
262 | sl->mb_linesize, sl->mb_linesize, | ||
263 | 16 + 5, 16 + 5 /*FIXME*/, | ||
264 | full_mx - 2, full_my - 2, | ||
265 | pic_width, pic_height); | ||
266 | 5488 | src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
267 | } | ||
268 | 157614 | qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps? | |
269 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
270 | 12367 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, sl->mb_linesize); | |
271 | |||
272 | 157614 | src_cr = pic->data[2] + offset; | |
273 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
274 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
275 | 5488 | src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
276 | sl->mb_linesize, sl->mb_linesize, | ||
277 | 16 + 5, 16 + 5 /*FIXME*/, | ||
278 | full_mx - 2, full_my - 2, | ||
279 | pic_width, pic_height); | ||
280 | 5488 | src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
281 | } | ||
282 | 157614 | qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps? | |
283 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
284 | 12367 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, sl->mb_linesize); | |
285 | 157614 | return; | |
286 | } | ||
287 | |||
288 |
2/2✓ Branch 0 taken 118650 times.
✓ Branch 1 taken 27724685 times.
|
27843335 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
289 |
4/4✓ Branch 0 taken 27724685 times.
✓ Branch 1 taken 118650 times.
✓ Branch 2 taken 9925286 times.
✓ Branch 3 taken 17799399 times.
|
27843335 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(sl)) { |
290 | // chroma offset when predicting from a field of opposite parity | ||
291 | 9925286 | my += 2 * ((sl->mb_y & 1) - (pic->reference - 1)); | |
292 |
4/4✓ Branch 0 taken 9847406 times.
✓ Branch 1 taken 77880 times.
✓ Branch 2 taken 508817 times.
✓ Branch 3 taken 9338589 times.
|
9925286 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
293 | } | ||
294 | |||
295 | 27843335 | src_cb = pic->data[1] + ((mx >> 3) * (1 << pixel_shift)) + | |
296 | 27843335 | (my >> ysh) * sl->mb_uvlinesize; | |
297 | 27843335 | src_cr = pic->data[2] + ((mx >> 3) * (1 << pixel_shift)) + | |
298 | 27843335 | (my >> ysh) * sl->mb_uvlinesize; | |
299 | |||
300 |
2/2✓ Branch 0 taken 2320450 times.
✓ Branch 1 taken 25522885 times.
|
27843335 | if (emu) { |
301 | 2320450 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb, | |
302 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
303 | 2320450 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
304 | 2320450 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
305 | 2320450 | src_cb = sl->edge_emu_buffer; | |
306 | } | ||
307 | 27843335 | chroma_op(dest_cb, src_cb, sl->mb_uvlinesize, | |
308 | 27843335 | height >> (chroma_idc == 1 /* yuv420 */), | |
309 | 27843335 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
310 | |||
311 |
2/2✓ Branch 0 taken 2320450 times.
✓ Branch 1 taken 25522885 times.
|
27843335 | if (emu) { |
312 | 2320450 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr, | |
313 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
314 | 2320450 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
315 | 2320450 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
316 | 2320450 | src_cr = sl->edge_emu_buffer; | |
317 | } | ||
318 | 27843335 | chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), | |
319 | 27843335 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
320 | } | ||
321 | |||
322 | 20548545 | static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, | |
323 | int n, int square, | ||
324 | int height, int delta, | ||
325 | uint8_t *dest_y, uint8_t *dest_cb, | ||
326 | uint8_t *dest_cr, | ||
327 | int x_offset, int y_offset, | ||
328 | const qpel_mc_func *qpix_put, | ||
329 | h264_chroma_mc_func chroma_put, | ||
330 | const qpel_mc_func *qpix_avg, | ||
331 | h264_chroma_mc_func chroma_avg, | ||
332 | int list0, int list1, | ||
333 | int pixel_shift, int chroma_idc) | ||
334 | { | ||
335 | 20548545 | const qpel_mc_func *qpix_op = qpix_put; | |
336 | 20548545 | h264_chroma_mc_func chroma_op = chroma_put; | |
337 | |||
338 | 20548545 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
339 |
2/2✓ Branch 0 taken 39229 times.
✓ Branch 1 taken 20509316 times.
|
20548545 | if (chroma_idc == 3 /* yuv444 */) { |
340 | 39229 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
341 | 39229 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
342 |
2/2✓ Branch 0 taken 98093 times.
✓ Branch 1 taken 20411223 times.
|
20509316 | } else if (chroma_idc == 2 /* yuv422 */) { |
343 | 98093 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
344 | 98093 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
345 | } else { /* yuv420 */ | ||
346 | 20411223 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
347 | 20411223 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
348 | } | ||
349 | 20548545 | x_offset += 8 * sl->mb_x; | |
350 | 20548545 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
351 | |||
352 |
2/2✓ Branch 0 taken 19018694 times.
✓ Branch 1 taken 1529851 times.
|
20548545 | if (list0) { |
353 | 19018694 | H264Ref *ref = &sl->ref_list[0][sl->ref_cache[0][scan8[n]]]; | |
354 | 19018694 | mc_dir_part(h, sl, ref, n, square, height, delta, 0, | |
355 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
356 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
357 | |||
358 | 19018694 | qpix_op = qpix_avg; | |
359 | 19018694 | chroma_op = chroma_avg; | |
360 | } | ||
361 | |||
362 |
2/2✓ Branch 0 taken 7070213 times.
✓ Branch 1 taken 13478332 times.
|
20548545 | if (list1) { |
363 | 7070213 | H264Ref *ref = &sl->ref_list[1][sl->ref_cache[1][scan8[n]]]; | |
364 | 7070213 | mc_dir_part(h, sl, ref, n, square, height, delta, 1, | |
365 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
366 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
367 | } | ||
368 | 20548545 | } | |
369 | |||
370 | 1589472 | static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, | |
371 | int n, int square, | ||
372 | int height, int delta, | ||
373 | uint8_t *dest_y, uint8_t *dest_cb, | ||
374 | uint8_t *dest_cr, | ||
375 | int x_offset, int y_offset, | ||
376 | const qpel_mc_func *qpix_put, | ||
377 | h264_chroma_mc_func chroma_put, | ||
378 | h264_weight_func luma_weight_op, | ||
379 | h264_weight_func chroma_weight_op, | ||
380 | h264_biweight_func luma_weight_avg, | ||
381 | h264_biweight_func chroma_weight_avg, | ||
382 | int list0, int list1, | ||
383 | int pixel_shift, int chroma_idc) | ||
384 | { | ||
385 | int chroma_height; | ||
386 | |||
387 | 1589472 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
388 |
2/2✓ Branch 0 taken 105686 times.
✓ Branch 1 taken 1483786 times.
|
1589472 | if (chroma_idc == 3 /* yuv444 */) { |
389 | 105686 | chroma_height = height; | |
390 | 105686 | chroma_weight_avg = luma_weight_avg; | |
391 | 105686 | chroma_weight_op = luma_weight_op; | |
392 | 105686 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
393 | 105686 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
394 |
2/2✓ Branch 0 taken 1266 times.
✓ Branch 1 taken 1482520 times.
|
1483786 | } else if (chroma_idc == 2 /* yuv422 */) { |
395 | 1266 | chroma_height = height; | |
396 | 1266 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
397 | 1266 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
398 | } else { /* yuv420 */ | ||
399 | 1482520 | chroma_height = height >> 1; | |
400 | 1482520 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
401 | 1482520 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
402 | } | ||
403 | 1589472 | x_offset += 8 * sl->mb_x; | |
404 | 1589472 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
405 | |||
406 |
4/4✓ Branch 0 taken 1537885 times.
✓ Branch 1 taken 51587 times.
✓ Branch 2 taken 322570 times.
✓ Branch 3 taken 1215315 times.
|
1912042 | if (list0 && list1) { |
407 | /* don't optimize for luma-only case, since B-frames usually | ||
408 | * use implicit weights => chroma too. */ | ||
409 | 322570 | uint8_t *tmp_cb = sl->bipred_scratchpad; | |
410 | 322570 | uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift); | |
411 | 322570 | uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize; | |
412 | 322570 | int refn0 = sl->ref_cache[0][scan8[n]]; | |
413 | 322570 | int refn1 = sl->ref_cache[1][scan8[n]]; | |
414 | |||
415 | 322570 | mc_dir_part(h, sl, &sl->ref_list[0][refn0], n, square, height, delta, 0, | |
416 | dest_y, dest_cb, dest_cr, | ||
417 | x_offset, y_offset, qpix_put, chroma_put, | ||
418 | pixel_shift, chroma_idc); | ||
419 | 322570 | mc_dir_part(h, sl, &sl->ref_list[1][refn1], n, square, height, delta, 1, | |
420 | tmp_y, tmp_cb, tmp_cr, | ||
421 | x_offset, y_offset, qpix_put, chroma_put, | ||
422 | pixel_shift, chroma_idc); | ||
423 | |||
424 |
2/2✓ Branch 0 taken 258247 times.
✓ Branch 1 taken 64323 times.
|
322570 | if (sl->pwt.use_weight == 2) { |
425 | 258247 | int weight0 = sl->pwt.implicit_weight[refn0][refn1][sl->mb_y & 1]; | |
426 | 258247 | int weight1 = 64 - weight0; | |
427 | 258247 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, | |
428 | height, 5, weight0, weight1, 0); | ||
429 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
430 | 258247 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, | |
431 | chroma_height, 5, weight0, weight1, 0); | ||
432 | 258247 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, | |
433 | chroma_height, 5, weight0, weight1, 0); | ||
434 | } | ||
435 | } else { | ||
436 | 64323 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, height, | |
437 | sl->pwt.luma_log2_weight_denom, | ||
438 | sl->pwt.luma_weight[refn0][0][0], | ||
439 | sl->pwt.luma_weight[refn1][1][0], | ||
440 | 64323 | sl->pwt.luma_weight[refn0][0][1] + | |
441 | 64323 | sl->pwt.luma_weight[refn1][1][1]); | |
442 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
443 | 64323 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height, | |
444 | sl->pwt.chroma_log2_weight_denom, | ||
445 | sl->pwt.chroma_weight[refn0][0][0][0], | ||
446 | sl->pwt.chroma_weight[refn1][1][0][0], | ||
447 | 64323 | sl->pwt.chroma_weight[refn0][0][0][1] + | |
448 | 64323 | sl->pwt.chroma_weight[refn1][1][0][1]); | |
449 | 64323 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, chroma_height, | |
450 | sl->pwt.chroma_log2_weight_denom, | ||
451 | sl->pwt.chroma_weight[refn0][0][1][0], | ||
452 | sl->pwt.chroma_weight[refn1][1][1][0], | ||
453 | 64323 | sl->pwt.chroma_weight[refn0][0][1][1] + | |
454 | 64323 | sl->pwt.chroma_weight[refn1][1][1][1]); | |
455 | } | ||
456 | } | ||
457 | } else { | ||
458 | 1266902 | int list = list1 ? 1 : 0; | |
459 | 1266902 | int refn = sl->ref_cache[list][scan8[n]]; | |
460 | 1266902 | H264Ref *ref = &sl->ref_list[list][refn]; | |
461 | 1266902 | mc_dir_part(h, sl, ref, n, square, height, delta, list, | |
462 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
463 | qpix_put, chroma_put, pixel_shift, chroma_idc); | ||
464 | |||
465 | 1266902 | luma_weight_op(dest_y, sl->mb_linesize, height, | |
466 | sl->pwt.luma_log2_weight_denom, | ||
467 | sl->pwt.luma_weight[refn][list][0], | ||
468 | sl->pwt.luma_weight[refn][list][1]); | ||
469 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
470 |
2/2✓ Branch 0 taken 549450 times.
✓ Branch 1 taken 717452 times.
|
1266902 | if (sl->pwt.use_weight_chroma) { |
471 | 549450 | chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height, | |
472 | sl->pwt.chroma_log2_weight_denom, | ||
473 | sl->pwt.chroma_weight[refn][list][0][0], | ||
474 | sl->pwt.chroma_weight[refn][list][0][1]); | ||
475 | 549450 | chroma_weight_op(dest_cr, sl->mb_uvlinesize, chroma_height, | |
476 | sl->pwt.chroma_log2_weight_denom, | ||
477 | sl->pwt.chroma_weight[refn][list][1][0], | ||
478 | sl->pwt.chroma_weight[refn][list][1][1]); | ||
479 | } | ||
480 | } | ||
481 | } | ||
482 | 1589472 | } | |
483 | |||
484 | 14851509 | static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, | |
485 | int list, int pixel_shift, | ||
486 | int chroma_idc) | ||
487 | { | ||
488 | /* fetch pixels for estimated mv 4 macroblocks ahead | ||
489 | * optimized for 64byte cache lines */ | ||
490 | 14851509 | const int refn = sl->ref_cache[list][scan8[0]]; | |
491 |
2/2✓ Branch 0 taken 14471161 times.
✓ Branch 1 taken 380348 times.
|
14851509 | if (refn >= 0) { |
492 | 14471161 | const int mx = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * sl->mb_x + 8; | |
493 | 14471161 | const int my = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * sl->mb_y; | |
494 | 14471161 | uint8_t **src = sl->ref_list[list][refn].data; | |
495 | 14471161 | int off = mx * (1<< pixel_shift) + | |
496 | 14471161 | (my + (sl->mb_x & 3) * 4) * sl->mb_linesize + | |
497 | 14471161 | (64 << pixel_shift); | |
498 | 14471161 | h->vdsp.prefetch(src[0] + off, sl->linesize, 4); | |
499 |
2/2✓ Branch 0 taken 132628 times.
✓ Branch 1 taken 14338533 times.
|
14471161 | if (chroma_idc == 3 /* yuv444 */) { |
500 | 132628 | h->vdsp.prefetch(src[1] + off, sl->linesize, 4); | |
501 | 132628 | h->vdsp.prefetch(src[2] + off, sl->linesize, 4); | |
502 | } else { | ||
503 | 14338533 | off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->mb_x&7))*sl->uvlinesize; | |
504 | 14338533 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); | |
505 | } | ||
506 | } | ||
507 | 14851509 | } | |
508 | |||
509 | 6781852 | static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, | |
510 | uint8_t *src_y, | ||
511 | uint8_t *src_cb, uint8_t *src_cr, | ||
512 | int linesize, int uvlinesize, | ||
513 | int xchg, int chroma444, | ||
514 | int simple, int pixel_shift) | ||
515 | { | ||
516 | int deblock_topleft; | ||
517 | int deblock_top; | ||
518 | 6781852 | int top_idx = 1; | |
519 | uint8_t *top_border_m1; | ||
520 | uint8_t *top_border; | ||
521 | |||
522 |
4/4✓ Branch 0 taken 3066836 times.
✓ Branch 1 taken 3715016 times.
✓ Branch 2 taken 1929248 times.
✓ Branch 3 taken 1137588 times.
|
6781852 | if (!simple && FRAME_MBAFF(h)) { |
523 |
2/2✓ Branch 0 taken 969466 times.
✓ Branch 1 taken 959782 times.
|
1929248 | if (sl->mb_y & 1) { |
524 |
2/2✓ Branch 0 taken 749392 times.
✓ Branch 1 taken 220074 times.
|
969466 | if (!MB_MBAFF(sl)) |
525 | 749392 | return; | |
526 | } else { | ||
527 | 959782 | top_idx = MB_MBAFF(sl) ? 0 : 1; | |
528 | } | ||
529 | } | ||
530 | |||
531 |
2/2✓ Branch 0 taken 61104 times.
✓ Branch 1 taken 5971356 times.
|
6032460 | if (sl->deblocking_filter == 2) { |
532 | 61104 | deblock_topleft = h->slice_table[sl->mb_xy - 1 - (h->mb_stride << MB_FIELD(sl))] == sl->slice_num; | |
533 | 61104 | deblock_top = sl->top_type; | |
534 | } else { | ||
535 | 5971356 | deblock_topleft = (sl->mb_x > 0); | |
536 | 5971356 | deblock_top = (sl->mb_y > !!MB_FIELD(sl)); | |
537 | } | ||
538 | |||
539 | 6032460 | src_y -= linesize + 1 + pixel_shift; | |
540 | 6032460 | src_cb -= uvlinesize + 1 + pixel_shift; | |
541 | 6032460 | src_cr -= uvlinesize + 1 + pixel_shift; | |
542 | |||
543 | 6032460 | top_border_m1 = sl->top_borders[top_idx][sl->mb_x - 1]; | |
544 | 6032460 | top_border = sl->top_borders[top_idx][sl->mb_x]; | |
545 | |||
546 | #define XCHG(a, b, xchg) \ | ||
547 | if (pixel_shift) { \ | ||
548 | if (xchg) { \ | ||
549 | AV_SWAP64(b + 0, a + 0); \ | ||
550 | AV_SWAP64(b + 8, a + 8); \ | ||
551 | } else { \ | ||
552 | AV_COPY128(b, a); \ | ||
553 | } \ | ||
554 | } else if (xchg) \ | ||
555 | AV_SWAP64(b, a); \ | ||
556 | else \ | ||
557 | AV_COPY64(b, a); | ||
558 | |||
559 |
2/2✓ Branch 0 taken 5796148 times.
✓ Branch 1 taken 236312 times.
|
6032460 | if (deblock_top) { |
560 |
2/2✓ Branch 0 taken 5688798 times.
✓ Branch 1 taken 107350 times.
|
5796148 | if (deblock_topleft) { |
561 |
2/2✓ Branch 0 taken 2293554 times.
✓ Branch 1 taken 3395244 times.
|
5688798 | XCHG(top_border_m1 + (8 << pixel_shift), |
562 | src_y - (7 << pixel_shift), 1); | ||
563 | } | ||
564 |
6/6✓ Branch 0 taken 2315284 times.
✓ Branch 1 taken 3480864 times.
✓ Branch 2 taken 1157642 times.
✓ Branch 3 taken 1157642 times.
✓ Branch 5 taken 1740432 times.
✓ Branch 6 taken 1740432 times.
|
5796148 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
565 |
2/2✓ Branch 0 taken 2315284 times.
✓ Branch 1 taken 3480864 times.
|
5796148 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); |
566 |
2/2✓ Branch 0 taken 5669946 times.
✓ Branch 1 taken 126202 times.
|
5796148 | if (sl->mb_x + 1 < h->mb_width) { |
567 |
2/2✓ Branch 0 taken 2292330 times.
✓ Branch 1 taken 3377616 times.
|
5669946 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1], |
568 | src_y + (17 << pixel_shift), 1); | ||
569 | } | ||
570 | if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
571 |
2/2✓ Branch 0 taken 121572 times.
✓ Branch 1 taken 5674576 times.
|
5796148 | if (chroma444) { |
572 |
2/2✓ Branch 0 taken 120330 times.
✓ Branch 1 taken 1242 times.
|
121572 | if (deblock_topleft) { |
573 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
574 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
575 | } | ||
576 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); |
577 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); |
578 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); |
579 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); |
580 |
2/2✓ Branch 0 taken 120314 times.
✓ Branch 1 taken 1258 times.
|
121572 | if (sl->mb_x + 1 < h->mb_width) { |
581 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); |
582 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); |
583 | } | ||
584 | } else { | ||
585 |
2/2✓ Branch 0 taken 5568468 times.
✓ Branch 1 taken 106108 times.
|
5674576 | if (deblock_topleft) { |
586 |
2/2✓ Branch 0 taken 2291302 times.
✓ Branch 1 taken 3277166 times.
|
5568468 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
587 |
2/2✓ Branch 0 taken 2291302 times.
✓ Branch 1 taken 3277166 times.
|
5568468 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
588 | } | ||
589 |
2/2✓ Branch 0 taken 2312892 times.
✓ Branch 1 taken 3361684 times.
|
5674576 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
590 |
2/2✓ Branch 0 taken 2312892 times.
✓ Branch 1 taken 3361684 times.
|
5674576 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); |
591 | } | ||
592 | } | ||
593 | } | ||
594 | } | ||
595 | |||
596 | 4485110 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, | |
597 | int index) | ||
598 | { | ||
599 |
2/2✓ Branch 0 taken 1122033 times.
✓ Branch 1 taken 3363077 times.
|
4485110 | if (high_bit_depth) { |
600 | 1122033 | return AV_RN32A(((int32_t *)mb) + index); | |
601 | } else | ||
602 | 3363077 | return AV_RN16A(mb + index); | |
603 | } | ||
604 | |||
605 | 9792 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, | |
606 | int index, int value) | ||
607 | { | ||
608 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9792 times.
|
9792 | if (high_bit_depth) { |
609 | ✗ | AV_WN32A(((int32_t *)mb) + index, value); | |
610 | } else | ||
611 | 9792 | AV_WN16A(mb + index, value); | |
612 | 9792 | } | |
613 | |||
614 | 4248736 | static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, | |
615 | H264SliceContext *sl, | ||
616 | int mb_type, int simple, | ||
617 | int transform_bypass, | ||
618 | int pixel_shift, | ||
619 | const int *block_offset, | ||
620 | int linesize, | ||
621 | uint8_t *dest_y, int p) | ||
622 | { | ||
623 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
624 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); | ||
625 | int i; | ||
626 |
2/2✓ Branch 0 taken 4115868 times.
✓ Branch 1 taken 132868 times.
|
4248736 | int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; |
627 | 4248736 | block_offset += 16 * p; | |
628 |
2/2✓ Branch 0 taken 3190459 times.
✓ Branch 1 taken 1058277 times.
|
4248736 | if (IS_INTRA4x4(mb_type)) { |
629 |
2/2✓ Branch 0 taken 1641944 times.
✓ Branch 1 taken 1548515 times.
|
3190459 | if (IS_8x8DCT(mb_type)) { |
630 |
2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 1641284 times.
|
1641944 | if (transform_bypass) { |
631 | 660 | idct_dc_add = | |
632 | 660 | idct_add = h->h264dsp.h264_add_pixels8_clear; | |
633 | } else { | ||
634 | 1641284 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; | |
635 | 1641284 | idct_add = h->h264dsp.h264_idct8_add; | |
636 | } | ||
637 |
2/2✓ Branch 0 taken 6567776 times.
✓ Branch 1 taken 1641944 times.
|
8209720 | for (i = 0; i < 16; i += 4) { |
638 | 6567776 | uint8_t *const ptr = dest_y + block_offset[i]; | |
639 | 6567776 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
640 |
5/6✓ Branch 0 taken 2640 times.
✓ Branch 1 taken 6565136 times.
✓ Branch 2 taken 2640 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2488 times.
✓ Branch 5 taken 152 times.
|
6567776 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
641 |
1/2✓ Branch 0 taken 2488 times.
✗ Branch 1 not taken.
|
2488 | if (h->x264_build < 151U) { |
642 | 2488 | h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
643 | } else | ||
644 | ✗ | h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), | |
645 | ✗ | (sl-> topleft_samples_available << i) & 0x8000, | |
646 | ✗ | (sl->topright_samples_available << i) & 0x4000, linesize); | |
647 | } else { | ||
648 | 6565288 | const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
649 | 6565288 | h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, | |
650 | 6565288 | (sl->topright_samples_available << i) & 0x4000, linesize); | |
651 |
2/2✓ Branch 0 taken 5157297 times.
✓ Branch 1 taken 1407991 times.
|
6565288 | if (nnz) { |
652 |
4/4✓ Branch 0 taken 561188 times.
✓ Branch 1 taken 4596109 times.
✓ Branch 3 taken 250704 times.
✓ Branch 4 taken 310484 times.
|
5157297 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
653 | 250704 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
654 | else | ||
655 | 4906593 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
656 | } | ||
657 | } | ||
658 | } | ||
659 | } else { | ||
660 |
2/2✓ Branch 0 taken 15230 times.
✓ Branch 1 taken 1533285 times.
|
1548515 | if (transform_bypass) { |
661 | 15230 | idct_dc_add = | |
662 | 15230 | idct_add = h->h264dsp.h264_add_pixels4_clear; | |
663 | } else { | ||
664 | 1533285 | idct_dc_add = h->h264dsp.h264_idct_dc_add; | |
665 | 1533285 | idct_add = h->h264dsp.h264_idct_add; | |
666 | } | ||
667 |
2/2✓ Branch 0 taken 24776240 times.
✓ Branch 1 taken 1548515 times.
|
26324755 | for (i = 0; i < 16; i++) { |
668 | 24776240 | uint8_t *const ptr = dest_y + block_offset[i]; | |
669 | 24776240 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
670 | |||
671 |
5/6✓ Branch 0 taken 243680 times.
✓ Branch 1 taken 24532560 times.
✓ Branch 2 taken 243680 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 180369 times.
✓ Branch 5 taken 63311 times.
|
24776240 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
672 | 180369 | h->hpc.pred4x4_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
673 | } else { | ||
674 | uint8_t *topright; | ||
675 | int nnz, tr; | ||
676 | uint64_t tr_high; | ||
677 |
4/4✓ Branch 0 taken 23426882 times.
✓ Branch 1 taken 1168989 times.
✓ Branch 2 taken 1120596 times.
✓ Branch 3 taken 22306286 times.
|
26885456 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
678 | 2289585 | const int topright_avail = (sl->topright_samples_available << i) & 0x8000; | |
679 | av_assert2(sl->mb_y || linesize <= block_offset[i]); | ||
680 |
2/2✓ Branch 0 taken 634384 times.
✓ Branch 1 taken 1655201 times.
|
2289585 | if (!topright_avail) { |
681 |
2/2✓ Branch 0 taken 248389 times.
✓ Branch 1 taken 385995 times.
|
634384 | if (pixel_shift) { |
682 | 248389 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; | |
683 | 248389 | topright = (uint8_t *)&tr_high; | |
684 | } else { | ||
685 | 385995 | tr = ptr[3 - linesize] * 0x01010101u; | |
686 | 385995 | topright = (uint8_t *)&tr; | |
687 | } | ||
688 | } else | ||
689 | 1655201 | topright = ptr + (4 << pixel_shift) - linesize; | |
690 | } else | ||
691 | 22306286 | topright = NULL; | |
692 | |||
693 | 24595871 | h->hpc.pred4x4[dir](ptr, topright, linesize); | |
694 | 24595871 | nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
695 |
2/2✓ Branch 0 taken 16819329 times.
✓ Branch 1 taken 7776542 times.
|
24595871 | if (nnz) { |
696 |
4/4✓ Branch 0 taken 3880685 times.
✓ Branch 1 taken 12938644 times.
✓ Branch 3 taken 2237651 times.
✓ Branch 4 taken 1643034 times.
|
16819329 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
697 | 2237651 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
698 | else | ||
699 | 14581678 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
700 | } | ||
701 | } | ||
702 | } | ||
703 | } | ||
704 | } else { | ||
705 | 1058277 | h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize); | |
706 |
2/2✓ Branch 0 taken 617432 times.
✓ Branch 1 taken 440845 times.
|
1058277 | if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
707 |
2/2✓ Branch 0 taken 616820 times.
✓ Branch 1 taken 612 times.
|
617432 | if (!transform_bypass) |
708 | 616820 | h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift), | |
709 | 616820 | sl->mb_luma_dc[p], | |
710 | 616820 | h->ps.pps->dequant4_coeff[p][qscale][0]); | |
711 | else { | ||
712 | static const uint8_t dc_mapping[16] = { | ||
713 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, | ||
714 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, | ||
715 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, | ||
716 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 | ||
717 | }; | ||
718 |
2/2✓ Branch 0 taken 9792 times.
✓ Branch 1 taken 612 times.
|
10404 | for (i = 0; i < 16; i++) |
719 | 19584 | dctcoef_set(sl->mb + (p * 256 << pixel_shift), | |
720 | 9792 | pixel_shift, dc_mapping[i], | |
721 | 9792 | dctcoef_get(sl->mb_luma_dc[p], | |
722 | pixel_shift, i)); | ||
723 | } | ||
724 | } | ||
725 | } | ||
726 | 4248736 | } | |
727 | |||
728 | 15561207 | static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, | |
729 | int mb_type, int simple, | ||
730 | int transform_bypass, | ||
731 | int pixel_shift, | ||
732 | const int *block_offset, | ||
733 | int linesize, | ||
734 | uint8_t *dest_y, int p) | ||
735 | { | ||
736 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
737 | int i; | ||
738 | 15561207 | block_offset += 16 * p; | |
739 |
2/2✓ Branch 0 taken 12370748 times.
✓ Branch 1 taken 3190459 times.
|
15561207 | if (!IS_INTRA4x4(mb_type)) { |
740 |
2/2✓ Branch 0 taken 1058277 times.
✓ Branch 1 taken 11312471 times.
|
12370748 | if (IS_INTRA16x16(mb_type)) { |
741 |
2/2✓ Branch 0 taken 864 times.
✓ Branch 1 taken 1057413 times.
|
1058277 | if (transform_bypass) { |
742 |
1/2✓ Branch 0 taken 864 times.
✗ Branch 1 not taken.
|
864 | if (h->ps.sps->profile_idc == 244 && |
743 |
2/2✓ Branch 0 taken 502 times.
✓ Branch 1 taken 362 times.
|
864 | (sl->intra16x16_pred_mode == VERT_PRED8x8 || |
744 |
2/2✓ Branch 0 taken 278 times.
✓ Branch 1 taken 224 times.
|
502 | sl->intra16x16_pred_mode == HOR_PRED8x8)) { |
745 | 640 | h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset, | |
746 | 640 | sl->mb + (p * 256 << pixel_shift), | |
747 | linesize); | ||
748 | } else { | ||
749 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 224 times.
|
3808 | for (i = 0; i < 16; i++) |
750 |
3/4✓ Branch 0 taken 40 times.
✓ Branch 1 taken 3544 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
|
3624 | if (sl->non_zero_count_cache[scan8[i + p * 16]] || |
751 | 40 | dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) | |
752 | 3544 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], | |
753 | 3544 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
754 | linesize); | ||
755 | } | ||
756 | } else { | ||
757 | 1057413 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, | |
758 | 1057413 | sl->mb + (p * 256 << pixel_shift), | |
759 | linesize, | ||
760 | 1057413 | sl->non_zero_count_cache + p * 5 * 8); | |
761 | } | ||
762 |
2/2✓ Branch 0 taken 6383646 times.
✓ Branch 1 taken 4928825 times.
|
11312471 | } else if (sl->cbp & 15) { |
763 |
2/2✓ Branch 0 taken 339104 times.
✓ Branch 1 taken 6044542 times.
|
6383646 | if (transform_bypass) { |
764 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
765 | 678208 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear | |
766 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | : h->h264dsp.h264_add_pixels4_clear; |
767 |
2/2✓ Branch 0 taken 5220812 times.
✓ Branch 1 taken 339104 times.
|
5559916 | for (i = 0; i < 16; i += di) |
768 |
2/2✓ Branch 0 taken 722104 times.
✓ Branch 1 taken 4498708 times.
|
5220812 | if (sl->non_zero_count_cache[scan8[i + p * 16]]) |
769 | 722104 | idct_add(dest_y + block_offset[i], | |
770 | 722104 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
771 | linesize); | ||
772 | } else { | ||
773 |
2/2✓ Branch 0 taken 1029675 times.
✓ Branch 1 taken 5014867 times.
|
6044542 | if (IS_8x8DCT(mb_type)) |
774 | 1029675 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, | |
775 | 1029675 | sl->mb + (p * 256 << pixel_shift), | |
776 | linesize, | ||
777 | 1029675 | sl->non_zero_count_cache + p * 5 * 8); | |
778 | else | ||
779 | 5014867 | h->h264dsp.h264_idct_add16(dest_y, block_offset, | |
780 | 5014867 | sl->mb + (p * 256 << pixel_shift), | |
781 | linesize, | ||
782 | 5014867 | sl->non_zero_count_cache + p * 5 * 8); | |
783 | } | ||
784 | } | ||
785 | } | ||
786 | 15561207 | } | |
787 | |||
788 | #define BITS 8 | ||
789 | #define SIMPLE 1 | ||
790 | #include "h264_mb_template.c" | ||
791 | |||
792 | #undef BITS | ||
793 | #define BITS 16 | ||
794 | #include "h264_mb_template.c" | ||
795 | |||
796 | #undef SIMPLE | ||
797 | #define SIMPLE 0 | ||
798 | #include "h264_mb_template.c" | ||
799 | |||
800 | 15198996 | void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl) | |
801 | { | ||
802 | 15198996 | const int mb_xy = sl->mb_xy; | |
803 | 15198996 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
804 | 38581033 | int is_complex = CONFIG_SMALL || sl->is_complex || | |
805 |
6/6✓ Branch 0 taken 8183041 times.
✓ Branch 1 taken 7015955 times.
✓ Branch 2 taken 8159936 times.
✓ Branch 3 taken 23105 times.
✓ Branch 4 taken 136878 times.
✓ Branch 5 taken 8023058 times.
|
15198996 | IS_INTRA_PCM(mb_type) || sl->qscale == 0; |
806 | |||
807 |
2/2✓ Branch 0 taken 192658 times.
✓ Branch 1 taken 15006338 times.
|
15198996 | if (CHROMA444(h)) { |
808 |
4/4✓ Branch 0 taken 79866 times.
✓ Branch 1 taken 112792 times.
✓ Branch 2 taken 19800 times.
✓ Branch 3 taken 60066 times.
|
192658 | if (is_complex || h->pixel_shift) |
809 | 132592 | hl_decode_mb_444_complex(h, sl); | |
810 | else | ||
811 | 60066 | hl_decode_mb_444_simple_8(h, sl); | |
812 |
2/2✓ Branch 0 taken 7063146 times.
✓ Branch 1 taken 7943192 times.
|
15006338 | } else if (is_complex) { |
813 | 7063146 | hl_decode_mb_complex(h, sl); | |
814 |
2/2✓ Branch 0 taken 1146200 times.
✓ Branch 1 taken 6796992 times.
|
7943192 | } else if (h->pixel_shift) { |
815 | 1146200 | hl_decode_mb_simple_16(h, sl); | |
816 | } else | ||
817 | 6796992 | hl_decode_mb_simple_8(h, sl); | |
818 | 15198996 | } | |
819 |