| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder | ||
| 3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | /** | ||
| 23 | * @file | ||
| 24 | * H.264 / AVC / MPEG-4 part10 macroblock decoding | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <stdint.h> | ||
| 28 | |||
| 29 | #include "config.h" | ||
| 30 | |||
| 31 | #include "libavutil/common.h" | ||
| 32 | #include "libavutil/intreadwrite.h" | ||
| 33 | #include "avcodec.h" | ||
| 34 | #include "h264dec.h" | ||
| 35 | #include "h264_ps.h" | ||
| 36 | #include "qpeldsp.h" | ||
| 37 | #include "rectangle.h" | ||
| 38 | #include "threadframe.h" | ||
| 39 | |||
| 40 | 136653 | static inline int get_lowest_part_list_y(H264SliceContext *sl, | |
| 41 | int n, int height, int y_offset, int list) | ||
| 42 | { | ||
| 43 | 136653 | int raw_my = sl->mv_cache[list][scan8[n]][1]; | |
| 44 |
2/2✓ Branch 0 taken 23509 times.
✓ Branch 1 taken 113144 times.
|
136653 | int filter_height_down = (raw_my & 3) ? 3 : 0; |
| 45 | 136653 | int full_my = (raw_my >> 2) + y_offset; | |
| 46 | 136653 | int bottom = full_my + filter_height_down + height; | |
| 47 | |||
| 48 | av_assert2(height >= 0); | ||
| 49 | |||
| 50 | 136653 | return FFMAX(0, bottom); | |
| 51 | } | ||
| 52 | |||
| 53 | 112897 | static inline void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, | |
| 54 | int16_t refs[2][48], int n, | ||
| 55 | int height, int y_offset, int list0, | ||
| 56 | int list1, int *nrefs) | ||
| 57 | { | ||
| 58 | int my; | ||
| 59 | |||
| 60 | 112897 | y_offset += 16 * (sl->mb_y >> MB_FIELD(sl)); | |
| 61 | |||
| 62 |
2/2✓ Branch 0 taken 105424 times.
✓ Branch 1 taken 7473 times.
|
112897 | if (list0) { |
| 63 | 105424 | int ref_n = sl->ref_cache[0][scan8[n]]; | |
| 64 | 105424 | H264Ref *ref = &sl->ref_list[0][ref_n]; | |
| 65 | |||
| 66 | // Error resilience puts the current picture in the ref list. | ||
| 67 | // Don't try to wait on these as it will cause a deadlock. | ||
| 68 | // Fields can wait on each other, though. | ||
| 69 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 105424 times.
|
105424 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
| 70 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
| 71 | 105424 | my = get_lowest_part_list_y(sl, n, height, y_offset, 0); | |
| 72 |
2/2✓ Branch 0 taken 92282 times.
✓ Branch 1 taken 13142 times.
|
105424 | if (refs[0][ref_n] < 0) |
| 73 | 92282 | nrefs[0] += 1; | |
| 74 | 105424 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); | |
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 |
2/2✓ Branch 0 taken 31229 times.
✓ Branch 1 taken 81668 times.
|
112897 | if (list1) { |
| 79 | 31229 | int ref_n = sl->ref_cache[1][scan8[n]]; | |
| 80 | 31229 | H264Ref *ref = &sl->ref_list[1][ref_n]; | |
| 81 | |||
| 82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 31229 times.
|
31229 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
| 83 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
| 84 | 31229 | my = get_lowest_part_list_y(sl, n, height, y_offset, 1); | |
| 85 |
2/2✓ Branch 0 taken 29028 times.
✓ Branch 1 taken 2201 times.
|
31229 | if (refs[1][ref_n] < 0) |
| 86 | 29028 | nrefs[1] += 1; | |
| 87 | 31229 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); | |
| 88 | } | ||
| 89 | } | ||
| 90 | 112897 | } | |
| 91 | |||
| 92 | /** | ||
| 93 | * Wait until all reference frames are available for MC operations. | ||
| 94 | * | ||
| 95 | * @param h the H.264 context | ||
| 96 | */ | ||
| 97 | 95468 | static void await_references(const H264Context *h, H264SliceContext *sl) | |
| 98 | { | ||
| 99 | 95468 | const int mb_xy = sl->mb_xy; | |
| 100 | 95468 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
| 101 | int16_t refs[2][48]; | ||
| 102 | 95468 | int nrefs[2] = { 0 }; | |
| 103 | int ref, list; | ||
| 104 | |||
| 105 | 95468 | memset(refs, -1, sizeof(refs)); | |
| 106 | |||
| 107 |
2/2✓ Branch 0 taken 85183 times.
✓ Branch 1 taken 10285 times.
|
95468 | if (IS_16X16(mb_type)) { |
| 108 | 85183 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
| 109 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 110 |
2/2✓ Branch 0 taken 3423 times.
✓ Branch 1 taken 6862 times.
|
10285 | } else if (IS_16X8(mb_type)) { |
| 111 | 3423 | get_lowest_part_y(h, sl, refs, 0, 8, 0, | |
| 112 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 113 | 3423 | get_lowest_part_y(h, sl, refs, 8, 8, 8, | |
| 114 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
| 115 |
2/2✓ Branch 0 taken 3290 times.
✓ Branch 1 taken 3572 times.
|
6862 | } else if (IS_8X16(mb_type)) { |
| 116 | 3290 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
| 117 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 118 | 3290 | get_lowest_part_y(h, sl, refs, 4, 16, 0, | |
| 119 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
| 120 | } else { | ||
| 121 | int i; | ||
| 122 | |||
| 123 | av_assert2(IS_8X8(mb_type)); | ||
| 124 | |||
| 125 |
2/2✓ Branch 0 taken 14288 times.
✓ Branch 1 taken 3572 times.
|
17860 | for (i = 0; i < 4; i++) { |
| 126 | 14288 | const int sub_mb_type = sl->sub_mb_type[i]; | |
| 127 | 14288 | const int n = 4 * i; | |
| 128 | 14288 | int y_offset = (i & 2) << 2; | |
| 129 | |||
| 130 |
1/2✓ Branch 0 taken 14288 times.
✗ Branch 1 not taken.
|
14288 | if (IS_SUB_8X8(sub_mb_type)) { |
| 131 | 14288 | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
| 132 | IS_DIR(sub_mb_type, 0, 0), | ||
| 133 | IS_DIR(sub_mb_type, 0, 1), | ||
| 134 | nrefs); | ||
| 135 | ✗ | } else if (IS_SUB_8X4(sub_mb_type)) { | |
| 136 | ✗ | get_lowest_part_y(h, sl, refs, n, 4, y_offset, | |
| 137 | IS_DIR(sub_mb_type, 0, 0), | ||
| 138 | IS_DIR(sub_mb_type, 0, 1), | ||
| 139 | nrefs); | ||
| 140 | ✗ | get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4, | |
| 141 | IS_DIR(sub_mb_type, 0, 0), | ||
| 142 | IS_DIR(sub_mb_type, 0, 1), | ||
| 143 | nrefs); | ||
| 144 | ✗ | } else if (IS_SUB_4X8(sub_mb_type)) { | |
| 145 | ✗ | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
| 146 | IS_DIR(sub_mb_type, 0, 0), | ||
| 147 | IS_DIR(sub_mb_type, 0, 1), | ||
| 148 | nrefs); | ||
| 149 | ✗ | get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset, | |
| 150 | IS_DIR(sub_mb_type, 0, 0), | ||
| 151 | IS_DIR(sub_mb_type, 0, 1), | ||
| 152 | nrefs); | ||
| 153 | } else { | ||
| 154 | int j; | ||
| 155 | av_assert2(IS_SUB_4X4(sub_mb_type)); | ||
| 156 | ✗ | for (j = 0; j < 4; j++) { | |
| 157 | ✗ | int sub_y_offset = y_offset + 2 * (j & 2); | |
| 158 | ✗ | get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset, | |
| 159 | IS_DIR(sub_mb_type, 0, 0), | ||
| 160 | IS_DIR(sub_mb_type, 0, 1), | ||
| 161 | nrefs); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 125893 times.
✓ Branch 1 taken 95468 times.
|
221361 | for (list = sl->list_count - 1; list >= 0; list--) |
| 168 |
3/4✓ Branch 0 taken 255047 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 129154 times.
✓ Branch 3 taken 125893 times.
|
255047 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { |
| 169 | 129154 | int row = refs[list][ref]; | |
| 170 |
2/2✓ Branch 0 taken 121310 times.
✓ Branch 1 taken 7844 times.
|
129154 | if (row >= 0) { |
| 171 | 121310 | H264Ref *ref_pic = &sl->ref_list[list][ref]; | |
| 172 | 121310 | int ref_field = ref_pic->reference - 1; | |
| 173 | 121310 | int ref_field_picture = ref_pic->parent->field_picture; | |
| 174 | 121310 | int pic_height = 16 * h->mb_height >> ref_field_picture; | |
| 175 | |||
| 176 | 121310 | row <<= MB_MBAFF(sl); | |
| 177 | 121310 | nrefs[list]--; | |
| 178 | |||
| 179 |
2/4✓ Branch 0 taken 121310 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 121310 times.
|
121310 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
| 180 | av_assert2((ref_pic->parent->reference & 3) == 3); | ||
| 181 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 182 | ✗ | FFMIN((row >> 1) - !(row & 1), | |
| 183 | pic_height - 1), | ||
| 184 | 1); | ||
| 185 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 186 | ✗ | FFMIN((row >> 1), pic_height - 1), | |
| 187 | 0); | ||
| 188 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
121310 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
| 189 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 190 | ✗ | FFMIN(row * 2 + ref_field, | |
| 191 | pic_height - 1), | ||
| 192 | 0); | ||
| 193 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
|
121310 | } else if (FIELD_PICTURE(h)) { |
| 194 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 195 | FFMIN(row, pic_height - 1), | ||
| 196 | ref_field); | ||
| 197 | } else { | ||
| 198 |
2/2✓ Branch 0 taken 2814 times.
✓ Branch 1 taken 118496 times.
|
121310 | ff_thread_await_progress(&ref_pic->parent->tf, |
| 199 | FFMIN(row, pic_height - 1), | ||
| 200 | 0); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | } | ||
| 204 | 95468 | } | |
| 205 | |||
| 206 | 28235774 | static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, | |
| 207 | H264Ref *pic, | ||
| 208 | int n, int square, int height, | ||
| 209 | int delta, int list, | ||
| 210 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 211 | uint8_t *dest_cr, | ||
| 212 | int src_x_offset, int src_y_offset, | ||
| 213 | const qpel_mc_func *qpix_op, | ||
| 214 | h264_chroma_mc_func chroma_op, | ||
| 215 | int pixel_shift, int chroma_idc) | ||
| 216 | { | ||
| 217 | 28235774 | const int mx = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8; | |
| 218 | 28235774 | int my = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8; | |
| 219 | 28235774 | const int luma_xy = (mx & 3) + ((my & 3) << 2); | |
| 220 | 28235774 | ptrdiff_t offset = (mx >> 2) * (1 << pixel_shift) + (my >> 2) * sl->mb_linesize; | |
| 221 | 28235774 | uint8_t *src_y = pic->data[0] + offset; | |
| 222 | uint8_t *src_cb, *src_cr; | ||
| 223 | 28235774 | int extra_width = 0; | |
| 224 | 28235774 | int extra_height = 0; | |
| 225 | 28235774 | int emu = 0; | |
| 226 | 28235774 | const int full_mx = mx >> 2; | |
| 227 | 28235774 | const int full_my = my >> 2; | |
| 228 | 28235774 | const int pic_width = 16 * h->mb_width; | |
| 229 | 28235774 | const int pic_height = 16 * h->mb_height >> MB_FIELD(sl); | |
| 230 | int ysh; | ||
| 231 | |||
| 232 |
2/2✓ Branch 0 taken 18975475 times.
✓ Branch 1 taken 9260299 times.
|
28235774 | if (mx & 7) |
| 233 | 18975475 | extra_width -= 3; | |
| 234 |
2/2✓ Branch 0 taken 16434153 times.
✓ Branch 1 taken 11801621 times.
|
28235774 | if (my & 7) |
| 235 | 16434153 | extra_height -= 3; | |
| 236 | |||
| 237 |
2/2✓ Branch 0 taken 28104097 times.
✓ Branch 1 taken 131677 times.
|
28235774 | if (full_mx < 0 - extra_width || |
| 238 |
2/2✓ Branch 0 taken 27744031 times.
✓ Branch 1 taken 360066 times.
|
28104097 | full_my < 0 - extra_height || |
| 239 |
2/2✓ Branch 0 taken 27145746 times.
✓ Branch 1 taken 598285 times.
|
27744031 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || |
| 240 |
2/2✓ Branch 0 taken 1034388 times.
✓ Branch 1 taken 26111358 times.
|
27145746 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { |
| 241 | 2124416 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 242 | 2124416 | src_y - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 243 | sl->mb_linesize, sl->mb_linesize, | ||
| 244 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, | ||
| 245 | full_my - 2, pic_width, pic_height); | ||
| 246 | 2124416 | src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 247 | 2124416 | emu = 1; | |
| 248 | } | ||
| 249 | |||
| 250 | 28235774 | qpix_op[luma_xy](dest_y, src_y, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 251 |
2/2✓ Branch 0 taken 8893166 times.
✓ Branch 1 taken 19342608 times.
|
28235774 | if (!square) |
| 252 | 8893166 | qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize); | |
| 253 | |||
| 254 | if (CONFIG_GRAY && h->flags & AV_CODEC_FLAG_GRAY) | ||
| 255 | return; | ||
| 256 | |||
| 257 |
2/2✓ Branch 0 taken 157614 times.
✓ Branch 1 taken 28078160 times.
|
28235774 | if (chroma_idc == 3 /* yuv444 */) { |
| 258 | 157614 | src_cb = pic->data[1] + offset; | |
| 259 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
| 260 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 261 | 5488 | src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 262 | sl->mb_linesize, sl->mb_linesize, | ||
| 263 | 16 + 5, 16 + 5 /*FIXME*/, | ||
| 264 | full_mx - 2, full_my - 2, | ||
| 265 | pic_width, pic_height); | ||
| 266 | 5488 | src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 267 | } | ||
| 268 | 157614 | qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 269 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
| 270 | 12367 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, sl->mb_linesize); | |
| 271 | |||
| 272 | 157614 | src_cr = pic->data[2] + offset; | |
| 273 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
| 274 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 275 | 5488 | src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 276 | sl->mb_linesize, sl->mb_linesize, | ||
| 277 | 16 + 5, 16 + 5 /*FIXME*/, | ||
| 278 | full_mx - 2, full_my - 2, | ||
| 279 | pic_width, pic_height); | ||
| 280 | 5488 | src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 281 | } | ||
| 282 | 157614 | qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 283 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
| 284 | 12367 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, sl->mb_linesize); | |
| 285 | 157614 | return; | |
| 286 | } | ||
| 287 | |||
| 288 |
2/2✓ Branch 0 taken 118650 times.
✓ Branch 1 taken 27959510 times.
|
28078160 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
| 289 |
4/4✓ Branch 0 taken 27959510 times.
✓ Branch 1 taken 118650 times.
✓ Branch 2 taken 9941018 times.
✓ Branch 3 taken 18018492 times.
|
28078160 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(sl)) { |
| 290 | // chroma offset when predicting from a field of opposite parity | ||
| 291 | 9941018 | my += 2 * ((sl->mb_y & 1) - (pic->reference - 1)); | |
| 292 |
4/4✓ Branch 0 taken 9863006 times.
✓ Branch 1 taken 78012 times.
✓ Branch 2 taken 509264 times.
✓ Branch 3 taken 9353742 times.
|
9941018 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
| 293 | } | ||
| 294 | |||
| 295 | 28078160 | src_cb = pic->data[1] + ((mx >> 3) * (1 << pixel_shift)) + | |
| 296 | 28078160 | (my >> ysh) * sl->mb_uvlinesize; | |
| 297 | 28078160 | src_cr = pic->data[2] + ((mx >> 3) * (1 << pixel_shift)) + | |
| 298 | 28078160 | (my >> ysh) * sl->mb_uvlinesize; | |
| 299 | |||
| 300 |
2/2✓ Branch 0 taken 2330339 times.
✓ Branch 1 taken 25747821 times.
|
28078160 | if (emu) { |
| 301 | 2330339 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb, | |
| 302 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
| 303 | 2330339 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
| 304 | 2330339 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
| 305 | 2330339 | src_cb = sl->edge_emu_buffer; | |
| 306 | } | ||
| 307 | 28078160 | chroma_op(dest_cb, src_cb, sl->mb_uvlinesize, | |
| 308 | 28078160 | height >> (chroma_idc == 1 /* yuv420 */), | |
| 309 | 28078160 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
| 310 | |||
| 311 |
2/2✓ Branch 0 taken 2330339 times.
✓ Branch 1 taken 25747821 times.
|
28078160 | if (emu) { |
| 312 | 2330339 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr, | |
| 313 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
| 314 | 2330339 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
| 315 | 2330339 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
| 316 | 2330339 | src_cr = sl->edge_emu_buffer; | |
| 317 | } | ||
| 318 | 28078160 | chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), | |
| 319 | 28078160 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
| 320 | } | ||
| 321 | |||
| 322 | 20720753 | static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, | |
| 323 | int n, int square, | ||
| 324 | int height, int delta, | ||
| 325 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 326 | uint8_t *dest_cr, | ||
| 327 | int x_offset, int y_offset, | ||
| 328 | const qpel_mc_func *qpix_put, | ||
| 329 | h264_chroma_mc_func chroma_put, | ||
| 330 | const qpel_mc_func *qpix_avg, | ||
| 331 | h264_chroma_mc_func chroma_avg, | ||
| 332 | int list0, int list1, | ||
| 333 | int pixel_shift, int chroma_idc) | ||
| 334 | { | ||
| 335 | 20720753 | const qpel_mc_func *qpix_op = qpix_put; | |
| 336 | 20720753 | h264_chroma_mc_func chroma_op = chroma_put; | |
| 337 | |||
| 338 | 20720753 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 339 |
2/2✓ Branch 0 taken 39229 times.
✓ Branch 1 taken 20681524 times.
|
20720753 | if (chroma_idc == 3 /* yuv444 */) { |
| 340 | 39229 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 341 | 39229 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 342 |
2/2✓ Branch 0 taken 98093 times.
✓ Branch 1 taken 20583431 times.
|
20681524 | } else if (chroma_idc == 2 /* yuv422 */) { |
| 343 | 98093 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 344 | 98093 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 345 | } else { /* yuv420 */ | ||
| 346 | 20583431 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 347 | 20583431 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 348 | } | ||
| 349 | 20720753 | x_offset += 8 * sl->mb_x; | |
| 350 | 20720753 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
| 351 | |||
| 352 |
2/2✓ Branch 0 taken 19177001 times.
✓ Branch 1 taken 1543752 times.
|
20720753 | if (list0) { |
| 353 | 19177001 | H264Ref *ref = &sl->ref_list[0][sl->ref_cache[0][scan8[n]]]; | |
| 354 | 19177001 | mc_dir_part(h, sl, ref, n, square, height, delta, 0, | |
| 355 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 356 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
| 357 | |||
| 358 | 19177001 | qpix_op = qpix_avg; | |
| 359 | 19177001 | chroma_op = chroma_avg; | |
| 360 | } | ||
| 361 | |||
| 362 |
2/2✓ Branch 0 taken 7116725 times.
✓ Branch 1 taken 13604028 times.
|
20720753 | if (list1) { |
| 363 | 7116725 | H264Ref *ref = &sl->ref_list[1][sl->ref_cache[1][scan8[n]]]; | |
| 364 | 7116725 | mc_dir_part(h, sl, ref, n, square, height, delta, 1, | |
| 365 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 366 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
| 367 | } | ||
| 368 | 20720753 | } | |
| 369 | |||
| 370 | 1606575 | static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, | |
| 371 | int n, int square, | ||
| 372 | int height, int delta, | ||
| 373 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 374 | uint8_t *dest_cr, | ||
| 375 | int x_offset, int y_offset, | ||
| 376 | const qpel_mc_func *qpix_put, | ||
| 377 | h264_chroma_mc_func chroma_put, | ||
| 378 | h264_weight_func luma_weight_op, | ||
| 379 | h264_weight_func chroma_weight_op, | ||
| 380 | h264_biweight_func luma_weight_avg, | ||
| 381 | h264_biweight_func chroma_weight_avg, | ||
| 382 | int list0, int list1, | ||
| 383 | int pixel_shift, int chroma_idc) | ||
| 384 | { | ||
| 385 | int chroma_height; | ||
| 386 | |||
| 387 | 1606575 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 388 |
2/2✓ Branch 0 taken 105686 times.
✓ Branch 1 taken 1500889 times.
|
1606575 | if (chroma_idc == 3 /* yuv444 */) { |
| 389 | 105686 | chroma_height = height; | |
| 390 | 105686 | chroma_weight_avg = luma_weight_avg; | |
| 391 | 105686 | chroma_weight_op = luma_weight_op; | |
| 392 | 105686 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 393 | 105686 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 394 |
2/2✓ Branch 0 taken 1266 times.
✓ Branch 1 taken 1499623 times.
|
1500889 | } else if (chroma_idc == 2 /* yuv422 */) { |
| 395 | 1266 | chroma_height = height; | |
| 396 | 1266 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 397 | 1266 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 398 | } else { /* yuv420 */ | ||
| 399 | 1499623 | chroma_height = height >> 1; | |
| 400 | 1499623 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 401 | 1499623 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 402 | } | ||
| 403 | 1606575 | x_offset += 8 * sl->mb_x; | |
| 404 | 1606575 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
| 405 | |||
| 406 |
4/4✓ Branch 0 taken 1554988 times.
✓ Branch 1 taken 51587 times.
✓ Branch 2 taken 335473 times.
✓ Branch 3 taken 1219515 times.
|
1942048 | if (list0 && list1) { |
| 407 | /* don't optimize for luma-only case, since B-frames usually | ||
| 408 | * use implicit weights => chroma too. */ | ||
| 409 | 335473 | uint8_t *tmp_cb = sl->bipred_scratchpad; | |
| 410 | 335473 | uint8_t *tmp_cr = sl->bipred_scratchpad + (8 << pixel_shift + (chroma_idc == 3)); | |
| 411 | 335473 | uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize; | |
| 412 | 335473 | int refn0 = sl->ref_cache[0][scan8[n]]; | |
| 413 | 335473 | int refn1 = sl->ref_cache[1][scan8[n]]; | |
| 414 | |||
| 415 | 335473 | mc_dir_part(h, sl, &sl->ref_list[0][refn0], n, square, height, delta, 0, | |
| 416 | dest_y, dest_cb, dest_cr, | ||
| 417 | x_offset, y_offset, qpix_put, chroma_put, | ||
| 418 | pixel_shift, chroma_idc); | ||
| 419 | 335473 | mc_dir_part(h, sl, &sl->ref_list[1][refn1], n, square, height, delta, 1, | |
| 420 | tmp_y, tmp_cb, tmp_cr, | ||
| 421 | x_offset, y_offset, qpix_put, chroma_put, | ||
| 422 | pixel_shift, chroma_idc); | ||
| 423 | |||
| 424 |
2/2✓ Branch 0 taken 271150 times.
✓ Branch 1 taken 64323 times.
|
335473 | if (sl->pwt.use_weight == 2) { |
| 425 | 271150 | int weight0 = sl->pwt.implicit_weight[refn0][refn1][sl->mb_y & 1]; | |
| 426 | 271150 | int weight1 = 64 - weight0; | |
| 427 | 271150 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, | |
| 428 | height, 5, weight0, weight1, 0); | ||
| 429 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 430 | 271150 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, | |
| 431 | chroma_height, 5, weight0, weight1, 0); | ||
| 432 | 271150 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, | |
| 433 | chroma_height, 5, weight0, weight1, 0); | ||
| 434 | } | ||
| 435 | } else { | ||
| 436 | 64323 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, height, | |
| 437 | sl->pwt.luma_log2_weight_denom, | ||
| 438 | sl->pwt.luma_weight[refn0][0][0], | ||
| 439 | sl->pwt.luma_weight[refn1][1][0], | ||
| 440 | 64323 | sl->pwt.luma_weight[refn0][0][1] + | |
| 441 | 64323 | sl->pwt.luma_weight[refn1][1][1]); | |
| 442 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 443 | 64323 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height, | |
| 444 | sl->pwt.chroma_log2_weight_denom, | ||
| 445 | sl->pwt.chroma_weight[refn0][0][0][0], | ||
| 446 | sl->pwt.chroma_weight[refn1][1][0][0], | ||
| 447 | 64323 | sl->pwt.chroma_weight[refn0][0][0][1] + | |
| 448 | 64323 | sl->pwt.chroma_weight[refn1][1][0][1]); | |
| 449 | 64323 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, chroma_height, | |
| 450 | sl->pwt.chroma_log2_weight_denom, | ||
| 451 | sl->pwt.chroma_weight[refn0][0][1][0], | ||
| 452 | sl->pwt.chroma_weight[refn1][1][1][0], | ||
| 453 | 64323 | sl->pwt.chroma_weight[refn0][0][1][1] + | |
| 454 | 64323 | sl->pwt.chroma_weight[refn1][1][1][1]); | |
| 455 | } | ||
| 456 | } | ||
| 457 | } else { | ||
| 458 | 1271102 | int list = list1 ? 1 : 0; | |
| 459 | 1271102 | int refn = sl->ref_cache[list][scan8[n]]; | |
| 460 | 1271102 | H264Ref *ref = &sl->ref_list[list][refn]; | |
| 461 | 1271102 | mc_dir_part(h, sl, ref, n, square, height, delta, list, | |
| 462 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 463 | qpix_put, chroma_put, pixel_shift, chroma_idc); | ||
| 464 | |||
| 465 | 1271102 | luma_weight_op(dest_y, sl->mb_linesize, height, | |
| 466 | sl->pwt.luma_log2_weight_denom, | ||
| 467 | sl->pwt.luma_weight[refn][list][0], | ||
| 468 | sl->pwt.luma_weight[refn][list][1]); | ||
| 469 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 470 |
2/2✓ Branch 0 taken 551781 times.
✓ Branch 1 taken 719321 times.
|
1271102 | if (sl->pwt.use_weight_chroma) { |
| 471 | 551781 | chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height, | |
| 472 | sl->pwt.chroma_log2_weight_denom, | ||
| 473 | sl->pwt.chroma_weight[refn][list][0][0], | ||
| 474 | sl->pwt.chroma_weight[refn][list][0][1]); | ||
| 475 | 551781 | chroma_weight_op(dest_cr, sl->mb_uvlinesize, chroma_height, | |
| 476 | sl->pwt.chroma_log2_weight_denom, | ||
| 477 | sl->pwt.chroma_weight[refn][list][1][0], | ||
| 478 | sl->pwt.chroma_weight[refn][list][1][1]); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | } | ||
| 482 | 1606575 | } | |
| 483 | |||
| 484 | 15041606 | static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, | |
| 485 | int list, int pixel_shift, | ||
| 486 | int chroma_idc) | ||
| 487 | { | ||
| 488 | /* fetch pixels for estimated mv 4 macroblocks ahead | ||
| 489 | * optimized for 64byte cache lines */ | ||
| 490 | 15041606 | const int refn = sl->ref_cache[list][scan8[0]]; | |
| 491 |
2/2✓ Branch 0 taken 14660331 times.
✓ Branch 1 taken 381275 times.
|
15041606 | if (refn >= 0) { |
| 492 | 14660331 | const int mx = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * sl->mb_x + 8; | |
| 493 | 14660331 | const int my = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * sl->mb_y; | |
| 494 | 14660331 | uint8_t **src = sl->ref_list[list][refn].data; | |
| 495 | 14660331 | int off = mx * (1<< pixel_shift) + | |
| 496 | 14660331 | (my + (sl->mb_x & 3) * 4) * sl->mb_linesize + | |
| 497 | 14660331 | (64 << pixel_shift); | |
| 498 | 14660331 | h->vdsp.prefetch(src[0] + off, sl->linesize, 4); | |
| 499 |
2/2✓ Branch 0 taken 132628 times.
✓ Branch 1 taken 14527703 times.
|
14660331 | if (chroma_idc == 3 /* yuv444 */) { |
| 500 | 132628 | h->vdsp.prefetch(src[1] + off, sl->linesize, 4); | |
| 501 | 132628 | h->vdsp.prefetch(src[2] + off, sl->linesize, 4); | |
| 502 | } else { | ||
| 503 | 14527703 | off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->mb_x&7))*sl->uvlinesize; | |
| 504 | 14527703 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); | |
| 505 | } | ||
| 506 | } | ||
| 507 | 15041606 | } | |
| 508 | |||
| 509 | 6798220 | static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, | |
| 510 | uint8_t *src_y, | ||
| 511 | uint8_t *src_cb, uint8_t *src_cr, | ||
| 512 | int linesize, int uvlinesize, | ||
| 513 | int xchg, int chroma444, | ||
| 514 | int simple, int pixel_shift) | ||
| 515 | { | ||
| 516 | int deblock_topleft; | ||
| 517 | int deblock_top; | ||
| 518 | 6798220 | int top_idx = 1; | |
| 519 | uint8_t *top_border_m1; | ||
| 520 | uint8_t *top_border; | ||
| 521 | |||
| 522 |
4/4✓ Branch 0 taken 3067562 times.
✓ Branch 1 taken 3730658 times.
✓ Branch 2 taken 1929248 times.
✓ Branch 3 taken 1138314 times.
|
6798220 | if (!simple && FRAME_MBAFF(h)) { |
| 523 |
2/2✓ Branch 0 taken 969466 times.
✓ Branch 1 taken 959782 times.
|
1929248 | if (sl->mb_y & 1) { |
| 524 |
2/2✓ Branch 0 taken 749392 times.
✓ Branch 1 taken 220074 times.
|
969466 | if (!MB_MBAFF(sl)) |
| 525 | 749392 | return; | |
| 526 | } else { | ||
| 527 | 959782 | top_idx = MB_MBAFF(sl) ? 0 : 1; | |
| 528 | } | ||
| 529 | } | ||
| 530 | |||
| 531 |
2/2✓ Branch 0 taken 61104 times.
✓ Branch 1 taken 5987724 times.
|
6048828 | if (sl->deblocking_filter == 2) { |
| 532 | 61104 | deblock_topleft = h->slice_table[sl->mb_xy - 1 - (h->mb_stride << MB_FIELD(sl))] == sl->slice_num; | |
| 533 | 61104 | deblock_top = sl->top_type; | |
| 534 | } else { | ||
| 535 | 5987724 | deblock_topleft = (sl->mb_x > 0); | |
| 536 | 5987724 | deblock_top = (sl->mb_y > !!MB_FIELD(sl)); | |
| 537 | } | ||
| 538 | |||
| 539 | 6048828 | src_y -= linesize + 1 + pixel_shift; | |
| 540 | 6048828 | src_cb -= uvlinesize + 1 + pixel_shift; | |
| 541 | 6048828 | src_cr -= uvlinesize + 1 + pixel_shift; | |
| 542 | |||
| 543 | 6048828 | top_border_m1 = sl->top_borders[top_idx][sl->mb_x - 1]; | |
| 544 | 6048828 | top_border = sl->top_borders[top_idx][sl->mb_x]; | |
| 545 | |||
| 546 | #define XCHG(a, b, xchg) \ | ||
| 547 | if (pixel_shift) { \ | ||
| 548 | if (xchg) { \ | ||
| 549 | AV_SWAP64(b + 0, a + 0); \ | ||
| 550 | AV_SWAP64(b + 8, a + 8); \ | ||
| 551 | } else { \ | ||
| 552 | AV_COPY128(b, a); \ | ||
| 553 | } \ | ||
| 554 | } else if (xchg) \ | ||
| 555 | AV_SWAP64(b, a); \ | ||
| 556 | else \ | ||
| 557 | AV_COPY64(b, a); | ||
| 558 | |||
| 559 |
2/2✓ Branch 0 taken 5811572 times.
✓ Branch 1 taken 237256 times.
|
6048828 | if (deblock_top) { |
| 560 |
2/2✓ Branch 0 taken 5703728 times.
✓ Branch 1 taken 107844 times.
|
5811572 | if (deblock_topleft) { |
| 561 |
2/2✓ Branch 0 taken 2294316 times.
✓ Branch 1 taken 3409412 times.
|
5703728 | XCHG(top_border_m1 + (8 << pixel_shift), |
| 562 | src_y - (7 << pixel_shift), 1); | ||
| 563 | } | ||
| 564 |
6/6✓ Branch 0 taken 2316050 times.
✓ Branch 1 taken 3495522 times.
✓ Branch 2 taken 1158025 times.
✓ Branch 3 taken 1158025 times.
✓ Branch 5 taken 1747761 times.
✓ Branch 6 taken 1747761 times.
|
5811572 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
| 565 |
2/2✓ Branch 0 taken 2316050 times.
✓ Branch 1 taken 3495522 times.
|
5811572 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); |
| 566 |
2/2✓ Branch 0 taken 5684850 times.
✓ Branch 1 taken 126722 times.
|
5811572 | if (sl->mb_x + 1 < h->mb_width) { |
| 567 |
2/2✓ Branch 0 taken 2293096 times.
✓ Branch 1 taken 3391754 times.
|
5684850 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1], |
| 568 | src_y + (17 << pixel_shift), 1); | ||
| 569 | } | ||
| 570 | if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 571 |
2/2✓ Branch 0 taken 121572 times.
✓ Branch 1 taken 5690000 times.
|
5811572 | if (chroma444) { |
| 572 |
2/2✓ Branch 0 taken 120330 times.
✓ Branch 1 taken 1242 times.
|
121572 | if (deblock_topleft) { |
| 573 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
| 574 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
| 575 | } | ||
| 576 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); |
| 577 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); |
| 578 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); |
| 579 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); |
| 580 |
2/2✓ Branch 0 taken 120314 times.
✓ Branch 1 taken 1258 times.
|
121572 | if (sl->mb_x + 1 < h->mb_width) { |
| 581 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); |
| 582 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); |
| 583 | } | ||
| 584 | } else { | ||
| 585 |
2/2✓ Branch 0 taken 5583398 times.
✓ Branch 1 taken 106602 times.
|
5690000 | if (deblock_topleft) { |
| 586 |
2/2✓ Branch 0 taken 2292064 times.
✓ Branch 1 taken 3291334 times.
|
5583398 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
| 587 |
2/2✓ Branch 0 taken 2292064 times.
✓ Branch 1 taken 3291334 times.
|
5583398 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
| 588 | } | ||
| 589 |
2/2✓ Branch 0 taken 2313658 times.
✓ Branch 1 taken 3376342 times.
|
5690000 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
| 590 |
2/2✓ Branch 0 taken 2313658 times.
✓ Branch 1 taken 3376342 times.
|
5690000 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); |
| 591 | } | ||
| 592 | } | ||
| 593 | } | ||
| 594 | } | ||
| 595 | |||
| 596 | 4490117 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, | |
| 597 | int index) | ||
| 598 | { | ||
| 599 |
2/2✓ Branch 0 taken 1122317 times.
✓ Branch 1 taken 3367800 times.
|
4490117 | if (high_bit_depth) { |
| 600 | 1122317 | return AV_RN32A(((int32_t *)mb) + index); | |
| 601 | } else | ||
| 602 | 3367800 | return AV_RN16A(mb + index); | |
| 603 | } | ||
| 604 | |||
| 605 | 9792 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, | |
| 606 | int index, int value) | ||
| 607 | { | ||
| 608 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9792 times.
|
9792 | if (high_bit_depth) { |
| 609 | ✗ | AV_WN32A(((int32_t *)mb) + index, value); | |
| 610 | } else | ||
| 611 | 9792 | AV_WN16A(mb + index, value); | |
| 612 | 9792 | } | |
| 613 | |||
| 614 | 4256920 | static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, | |
| 615 | H264SliceContext *sl, | ||
| 616 | int mb_type, int simple, | ||
| 617 | int transform_bypass, | ||
| 618 | int pixel_shift, | ||
| 619 | const int *block_offset, | ||
| 620 | int linesize, | ||
| 621 | uint8_t *dest_y, int p) | ||
| 622 | { | ||
| 623 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 624 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 625 | int i; | ||
| 626 |
2/2✓ Branch 0 taken 4124052 times.
✓ Branch 1 taken 132868 times.
|
4256920 | int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; |
| 627 | 4256920 | block_offset += 16 * p; | |
| 628 |
2/2✓ Branch 0 taken 3196922 times.
✓ Branch 1 taken 1059998 times.
|
4256920 | if (IS_INTRA4x4(mb_type)) { |
| 629 |
2/2✓ Branch 0 taken 1645503 times.
✓ Branch 1 taken 1551419 times.
|
3196922 | if (IS_8x8DCT(mb_type)) { |
| 630 |
2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 1644843 times.
|
1645503 | if (transform_bypass) { |
| 631 | 660 | idct_dc_add = | |
| 632 | 660 | idct_add = h->h264dsp.h264_add_pixels8_clear; | |
| 633 | } else { | ||
| 634 | 1644843 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; | |
| 635 | 1644843 | idct_add = h->h264dsp.h264_idct8_add; | |
| 636 | } | ||
| 637 |
2/2✓ Branch 0 taken 6582012 times.
✓ Branch 1 taken 1645503 times.
|
8227515 | for (i = 0; i < 16; i += 4) { |
| 638 | 6582012 | uint8_t *const ptr = dest_y + block_offset[i]; | |
| 639 | 6582012 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
| 640 |
5/6✓ Branch 0 taken 2640 times.
✓ Branch 1 taken 6579372 times.
✓ Branch 2 taken 2640 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2488 times.
✓ Branch 5 taken 152 times.
|
6582012 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
| 641 |
1/2✓ Branch 0 taken 2488 times.
✗ Branch 1 not taken.
|
2488 | if (h->x264_build < 151U) { |
| 642 | 2488 | h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 643 | } else | ||
| 644 | ✗ | h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 645 | ✗ | (sl-> topleft_samples_available << i) & 0x8000, | |
| 646 | ✗ | (sl->topright_samples_available << i) & 0x4000, linesize); | |
| 647 | } else { | ||
| 648 | 6579524 | const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
| 649 | 6579524 | h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, | |
| 650 | 6579524 | (sl->topright_samples_available << i) & 0x4000, linesize); | |
| 651 |
2/2✓ Branch 0 taken 5168020 times.
✓ Branch 1 taken 1411504 times.
|
6579524 | if (nnz) { |
| 652 |
4/4✓ Branch 0 taken 561587 times.
✓ Branch 1 taken 4606433 times.
✓ Branch 3 taken 250995 times.
✓ Branch 4 taken 310592 times.
|
5168020 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
| 653 | 250995 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 654 | else | ||
| 655 | 4917025 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 656 | } | ||
| 657 | } | ||
| 658 | } | ||
| 659 | } else { | ||
| 660 |
2/2✓ Branch 0 taken 15230 times.
✓ Branch 1 taken 1536189 times.
|
1551419 | if (transform_bypass) { |
| 661 | 15230 | idct_dc_add = | |
| 662 | 15230 | idct_add = h->h264dsp.h264_add_pixels4_clear; | |
| 663 | } else { | ||
| 664 | 1536189 | idct_dc_add = h->h264dsp.h264_idct_dc_add; | |
| 665 | 1536189 | idct_add = h->h264dsp.h264_idct_add; | |
| 666 | } | ||
| 667 |
2/2✓ Branch 0 taken 24822704 times.
✓ Branch 1 taken 1551419 times.
|
26374123 | for (i = 0; i < 16; i++) { |
| 668 | 24822704 | uint8_t *const ptr = dest_y + block_offset[i]; | |
| 669 | 24822704 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
| 670 | |||
| 671 |
5/6✓ Branch 0 taken 243680 times.
✓ Branch 1 taken 24579024 times.
✓ Branch 2 taken 243680 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 180369 times.
✓ Branch 5 taken 63311 times.
|
24822704 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
| 672 | 180369 | h->hpc.pred4x4_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 673 | } else { | ||
| 674 | uint8_t *topright; | ||
| 675 | int nnz, tr; | ||
| 676 | uint64_t tr_high; | ||
| 677 |
4/4✓ Branch 0 taken 23471503 times.
✓ Branch 1 taken 1170832 times.
✓ Branch 2 taken 1123120 times.
✓ Branch 3 taken 22348383 times.
|
26936287 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
| 678 | 2293952 | const int topright_avail = (sl->topright_samples_available << i) & 0x8000; | |
| 679 | av_assert2(sl->mb_y || linesize <= block_offset[i]); | ||
| 680 |
2/2✓ Branch 0 taken 635553 times.
✓ Branch 1 taken 1658399 times.
|
2293952 | if (!topright_avail) { |
| 681 |
2/2✓ Branch 0 taken 248405 times.
✓ Branch 1 taken 387148 times.
|
635553 | if (pixel_shift) { |
| 682 | 248405 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; | |
| 683 | 248405 | topright = (uint8_t *)&tr_high; | |
| 684 | } else { | ||
| 685 | 387148 | tr = ptr[3 - linesize] * 0x01010101u; | |
| 686 | 387148 | topright = (uint8_t *)&tr; | |
| 687 | } | ||
| 688 | } else | ||
| 689 | 1658399 | topright = ptr + (4 << pixel_shift) - linesize; | |
| 690 | } else | ||
| 691 | 22348383 | topright = NULL; | |
| 692 | |||
| 693 | 24642335 | h->hpc.pred4x4[dir](ptr, topright, linesize); | |
| 694 | 24642335 | nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
| 695 |
2/2✓ Branch 0 taken 16848177 times.
✓ Branch 1 taken 7794158 times.
|
24642335 | if (nnz) { |
| 696 |
4/4✓ Branch 0 taken 3885293 times.
✓ Branch 1 taken 12962884 times.
✓ Branch 3 taken 2240876 times.
✓ Branch 4 taken 1644417 times.
|
16848177 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
| 697 | 2240876 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 698 | else | ||
| 699 | 14607301 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 700 | } | ||
| 701 | } | ||
| 702 | } | ||
| 703 | } | ||
| 704 | } else { | ||
| 705 | 1059998 | h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize); | |
| 706 |
2/2✓ Branch 0 taken 617942 times.
✓ Branch 1 taken 442056 times.
|
1059998 | if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
| 707 |
2/2✓ Branch 0 taken 617330 times.
✓ Branch 1 taken 612 times.
|
617942 | if (!transform_bypass) |
| 708 | 617330 | h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift), | |
| 709 | 617330 | sl->mb_luma_dc[p], | |
| 710 | 617330 | h->ps.pps->dequant4_coeff[p][qscale][0]); | |
| 711 | else { | ||
| 712 | static const uint8_t dc_mapping[16] = { | ||
| 713 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, | ||
| 714 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, | ||
| 715 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, | ||
| 716 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 | ||
| 717 | }; | ||
| 718 |
2/2✓ Branch 0 taken 9792 times.
✓ Branch 1 taken 612 times.
|
10404 | for (i = 0; i < 16; i++) |
| 719 | 19584 | dctcoef_set(sl->mb + (p * 256 << pixel_shift), | |
| 720 | 9792 | pixel_shift, dc_mapping[i], | |
| 721 | 9792 | dctcoef_get(sl->mb_luma_dc[p], | |
| 722 | pixel_shift, i)); | ||
| 723 | } | ||
| 724 | } | ||
| 725 | } | ||
| 726 | 4256920 | } | |
| 727 | |||
| 728 | 15715527 | static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, | |
| 729 | int mb_type, int simple, | ||
| 730 | int transform_bypass, | ||
| 731 | int pixel_shift, | ||
| 732 | const int *block_offset, | ||
| 733 | int linesize, | ||
| 734 | uint8_t *dest_y, int p) | ||
| 735 | { | ||
| 736 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 737 | int i; | ||
| 738 | 15715527 | block_offset += 16 * p; | |
| 739 |
2/2✓ Branch 0 taken 12518605 times.
✓ Branch 1 taken 3196922 times.
|
15715527 | if (!IS_INTRA4x4(mb_type)) { |
| 740 |
2/2✓ Branch 0 taken 1059998 times.
✓ Branch 1 taken 11458607 times.
|
12518605 | if (IS_INTRA16x16(mb_type)) { |
| 741 |
2/2✓ Branch 0 taken 864 times.
✓ Branch 1 taken 1059134 times.
|
1059998 | if (transform_bypass) { |
| 742 |
1/2✓ Branch 0 taken 864 times.
✗ Branch 1 not taken.
|
864 | if (h->ps.sps->profile_idc == 244 && |
| 743 |
2/2✓ Branch 0 taken 502 times.
✓ Branch 1 taken 362 times.
|
864 | (sl->intra16x16_pred_mode == VERT_PRED8x8 || |
| 744 |
2/2✓ Branch 0 taken 278 times.
✓ Branch 1 taken 224 times.
|
502 | sl->intra16x16_pred_mode == HOR_PRED8x8)) { |
| 745 | 640 | h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset, | |
| 746 | 640 | sl->mb + (p * 256 << pixel_shift), | |
| 747 | linesize); | ||
| 748 | } else { | ||
| 749 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 224 times.
|
3808 | for (i = 0; i < 16; i++) |
| 750 |
3/4✓ Branch 0 taken 40 times.
✓ Branch 1 taken 3544 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
|
3624 | if (sl->non_zero_count_cache[scan8[i + p * 16]] || |
| 751 | 40 | dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) | |
| 752 | 3544 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], | |
| 753 | 3544 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 754 | linesize); | ||
| 755 | } | ||
| 756 | } else { | ||
| 757 | 1059134 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, | |
| 758 | 1059134 | sl->mb + (p * 256 << pixel_shift), | |
| 759 | linesize, | ||
| 760 | 1059134 | sl->non_zero_count_cache + p * 5 * 8); | |
| 761 | } | ||
| 762 |
2/2✓ Branch 0 taken 6448572 times.
✓ Branch 1 taken 5010035 times.
|
11458607 | } else if (sl->cbp & 15) { |
| 763 |
2/2✓ Branch 0 taken 339104 times.
✓ Branch 1 taken 6109468 times.
|
6448572 | if (transform_bypass) { |
| 764 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
| 765 | 678208 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear | |
| 766 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | : h->h264dsp.h264_add_pixels4_clear; |
| 767 |
2/2✓ Branch 0 taken 5220812 times.
✓ Branch 1 taken 339104 times.
|
5559916 | for (i = 0; i < 16; i += di) |
| 768 |
2/2✓ Branch 0 taken 722104 times.
✓ Branch 1 taken 4498708 times.
|
5220812 | if (sl->non_zero_count_cache[scan8[i + p * 16]]) |
| 769 | 722104 | idct_add(dest_y + block_offset[i], | |
| 770 | 722104 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 771 | linesize); | ||
| 772 | } else { | ||
| 773 |
2/2✓ Branch 0 taken 1051053 times.
✓ Branch 1 taken 5058415 times.
|
6109468 | if (IS_8x8DCT(mb_type)) |
| 774 | 1051053 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, | |
| 775 | 1051053 | sl->mb + (p * 256 << pixel_shift), | |
| 776 | linesize, | ||
| 777 | 1051053 | sl->non_zero_count_cache + p * 5 * 8); | |
| 778 | else | ||
| 779 | 5058415 | h->h264dsp.h264_idct_add16(dest_y, block_offset, | |
| 780 | 5058415 | sl->mb + (p * 256 << pixel_shift), | |
| 781 | linesize, | ||
| 782 | 5058415 | sl->non_zero_count_cache + p * 5 * 8); | |
| 783 | } | ||
| 784 | } | ||
| 785 | } | ||
| 786 | 15715527 | } | |
| 787 | |||
| 788 | #define BITS 8 | ||
| 789 | #define SIMPLE 1 | ||
| 790 | #include "h264_mb_template.c" | ||
| 791 | |||
| 792 | #undef BITS | ||
| 793 | #define BITS 16 | ||
| 794 | #include "h264_mb_template.c" | ||
| 795 | |||
| 796 | #undef SIMPLE | ||
| 797 | #define SIMPLE 0 | ||
| 798 | #include "h264_mb_template.c" | ||
| 799 | |||
| 800 | 15353316 | void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl) | |
| 801 | { | ||
| 802 | 15353316 | const int mb_xy = sl->mb_xy; | |
| 803 | 15353316 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
| 804 | 39034873 | int is_complex = CONFIG_SMALL || sl->is_complex || | |
| 805 |
6/6✓ Branch 0 taken 8328241 times.
✓ Branch 1 taken 7025075 times.
✓ Branch 2 taken 8305136 times.
✓ Branch 3 taken 23105 times.
✓ Branch 4 taken 136878 times.
✓ Branch 5 taken 8168258 times.
|
15353316 | IS_INTRA_PCM(mb_type) || sl->qscale == 0; |
| 806 | |||
| 807 |
2/2✓ Branch 0 taken 192658 times.
✓ Branch 1 taken 15160658 times.
|
15353316 | if (CHROMA444(h)) { |
| 808 |
4/4✓ Branch 0 taken 79866 times.
✓ Branch 1 taken 112792 times.
✓ Branch 2 taken 19800 times.
✓ Branch 3 taken 60066 times.
|
192658 | if (is_complex || h->pixel_shift) |
| 809 | 132592 | hl_decode_mb_444_complex(h, sl); | |
| 810 | else | ||
| 811 | 60066 | hl_decode_mb_444_simple_8(h, sl); | |
| 812 |
2/2✓ Branch 0 taken 7072266 times.
✓ Branch 1 taken 8088392 times.
|
15160658 | } else if (is_complex) { |
| 813 | 7072266 | hl_decode_mb_complex(h, sl); | |
| 814 |
2/2✓ Branch 0 taken 1154360 times.
✓ Branch 1 taken 6934032 times.
|
8088392 | } else if (h->pixel_shift) { |
| 815 | 1154360 | hl_decode_mb_simple_16(h, sl); | |
| 816 | } else | ||
| 817 | 6934032 | hl_decode_mb_simple_8(h, sl); | |
| 818 | 15353316 | } | |
| 819 |