| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * H.26L/H.264/AVC/JVT/14496-10/... decoder | ||
| 3 | * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | /** | ||
| 23 | * @file | ||
| 24 | * H.264 / AVC / MPEG-4 part10 macroblock decoding | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <stdint.h> | ||
| 28 | |||
| 29 | #include "config.h" | ||
| 30 | |||
| 31 | #include "libavutil/common.h" | ||
| 32 | #include "libavutil/intreadwrite.h" | ||
| 33 | #include "avcodec.h" | ||
| 34 | #include "h264dec.h" | ||
| 35 | #include "h264_ps.h" | ||
| 36 | #include "qpeldsp.h" | ||
| 37 | #include "rectangle.h" | ||
| 38 | #include "threadframe.h" | ||
| 39 | |||
| 40 | 136653 | static inline int get_lowest_part_list_y(H264SliceContext *sl, | |
| 41 | int n, int height, int y_offset, int list) | ||
| 42 | { | ||
| 43 | 136653 | int raw_my = sl->mv_cache[list][scan8[n]][1]; | |
| 44 |
2/2✓ Branch 0 taken 23509 times.
✓ Branch 1 taken 113144 times.
|
136653 | int filter_height_down = (raw_my & 3) ? 3 : 0; |
| 45 | 136653 | int full_my = (raw_my >> 2) + y_offset; | |
| 46 | 136653 | int bottom = full_my + filter_height_down + height; | |
| 47 | |||
| 48 | av_assert2(height >= 0); | ||
| 49 | |||
| 50 | 136653 | return FFMAX(0, bottom); | |
| 51 | } | ||
| 52 | |||
| 53 | 112897 | static inline void get_lowest_part_y(const H264Context *h, H264SliceContext *sl, | |
| 54 | int16_t refs[2][48], int n, | ||
| 55 | int height, int y_offset, int list0, | ||
| 56 | int list1, int *nrefs) | ||
| 57 | { | ||
| 58 | int my; | ||
| 59 | |||
| 60 | 112897 | y_offset += 16 * (sl->mb_y >> MB_FIELD(sl)); | |
| 61 | |||
| 62 |
2/2✓ Branch 0 taken 105424 times.
✓ Branch 1 taken 7473 times.
|
112897 | if (list0) { |
| 63 | 105424 | int ref_n = sl->ref_cache[0][scan8[n]]; | |
| 64 | 105424 | H264Ref *ref = &sl->ref_list[0][ref_n]; | |
| 65 | |||
| 66 | // Error resilience puts the current picture in the ref list. | ||
| 67 | // Don't try to wait on these as it will cause a deadlock. | ||
| 68 | // Fields can wait on each other, though. | ||
| 69 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 105424 times.
|
105424 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
| 70 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
| 71 | 105424 | my = get_lowest_part_list_y(sl, n, height, y_offset, 0); | |
| 72 |
2/2✓ Branch 0 taken 92282 times.
✓ Branch 1 taken 13142 times.
|
105424 | if (refs[0][ref_n] < 0) |
| 73 | 92282 | nrefs[0] += 1; | |
| 74 | 105424 | refs[0][ref_n] = FFMAX(refs[0][ref_n], my); | |
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 |
2/2✓ Branch 0 taken 31229 times.
✓ Branch 1 taken 81668 times.
|
112897 | if (list1) { |
| 79 | 31229 | int ref_n = sl->ref_cache[1][scan8[n]]; | |
| 80 | 31229 | H264Ref *ref = &sl->ref_list[1][ref_n]; | |
| 81 | |||
| 82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 31229 times.
|
31229 | if (ref->parent->tf.progress != h->cur_pic.tf.progress || |
| 83 | ✗ | (ref->reference & 3) != h->picture_structure) { | |
| 84 | 31229 | my = get_lowest_part_list_y(sl, n, height, y_offset, 1); | |
| 85 |
2/2✓ Branch 0 taken 29028 times.
✓ Branch 1 taken 2201 times.
|
31229 | if (refs[1][ref_n] < 0) |
| 86 | 29028 | nrefs[1] += 1; | |
| 87 | 31229 | refs[1][ref_n] = FFMAX(refs[1][ref_n], my); | |
| 88 | } | ||
| 89 | } | ||
| 90 | 112897 | } | |
| 91 | |||
| 92 | /** | ||
| 93 | * Wait until all reference frames are available for MC operations. | ||
| 94 | * | ||
| 95 | * @param h the H.264 context | ||
| 96 | */ | ||
| 97 | 95468 | static void await_references(const H264Context *h, H264SliceContext *sl) | |
| 98 | { | ||
| 99 | 95468 | const int mb_xy = sl->mb_xy; | |
| 100 | 95468 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
| 101 | int16_t refs[2][48]; | ||
| 102 | 95468 | int nrefs[2] = { 0 }; | |
| 103 | int ref, list; | ||
| 104 | |||
| 105 | 95468 | memset(refs, -1, sizeof(refs)); | |
| 106 | |||
| 107 |
2/2✓ Branch 0 taken 85183 times.
✓ Branch 1 taken 10285 times.
|
95468 | if (IS_16X16(mb_type)) { |
| 108 | 85183 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
| 109 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 110 |
2/2✓ Branch 0 taken 3423 times.
✓ Branch 1 taken 6862 times.
|
10285 | } else if (IS_16X8(mb_type)) { |
| 111 | 3423 | get_lowest_part_y(h, sl, refs, 0, 8, 0, | |
| 112 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 113 | 3423 | get_lowest_part_y(h, sl, refs, 8, 8, 8, | |
| 114 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
| 115 |
2/2✓ Branch 0 taken 3290 times.
✓ Branch 1 taken 3572 times.
|
6862 | } else if (IS_8X16(mb_type)) { |
| 116 | 3290 | get_lowest_part_y(h, sl, refs, 0, 16, 0, | |
| 117 | IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs); | ||
| 118 | 3290 | get_lowest_part_y(h, sl, refs, 4, 16, 0, | |
| 119 | IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs); | ||
| 120 | } else { | ||
| 121 | int i; | ||
| 122 | |||
| 123 | av_assert2(IS_8X8(mb_type)); | ||
| 124 | |||
| 125 |
2/2✓ Branch 0 taken 14288 times.
✓ Branch 1 taken 3572 times.
|
17860 | for (i = 0; i < 4; i++) { |
| 126 | 14288 | const int sub_mb_type = sl->sub_mb_type[i]; | |
| 127 | 14288 | const int n = 4 * i; | |
| 128 | 14288 | int y_offset = (i & 2) << 2; | |
| 129 | |||
| 130 |
1/2✓ Branch 0 taken 14288 times.
✗ Branch 1 not taken.
|
14288 | if (IS_SUB_8X8(sub_mb_type)) { |
| 131 | 14288 | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
| 132 | IS_DIR(sub_mb_type, 0, 0), | ||
| 133 | IS_DIR(sub_mb_type, 0, 1), | ||
| 134 | nrefs); | ||
| 135 | ✗ | } else if (IS_SUB_8X4(sub_mb_type)) { | |
| 136 | ✗ | get_lowest_part_y(h, sl, refs, n, 4, y_offset, | |
| 137 | IS_DIR(sub_mb_type, 0, 0), | ||
| 138 | IS_DIR(sub_mb_type, 0, 1), | ||
| 139 | nrefs); | ||
| 140 | ✗ | get_lowest_part_y(h, sl, refs, n + 2, 4, y_offset + 4, | |
| 141 | IS_DIR(sub_mb_type, 0, 0), | ||
| 142 | IS_DIR(sub_mb_type, 0, 1), | ||
| 143 | nrefs); | ||
| 144 | ✗ | } else if (IS_SUB_4X8(sub_mb_type)) { | |
| 145 | ✗ | get_lowest_part_y(h, sl, refs, n, 8, y_offset, | |
| 146 | IS_DIR(sub_mb_type, 0, 0), | ||
| 147 | IS_DIR(sub_mb_type, 0, 1), | ||
| 148 | nrefs); | ||
| 149 | ✗ | get_lowest_part_y(h, sl, refs, n + 1, 8, y_offset, | |
| 150 | IS_DIR(sub_mb_type, 0, 0), | ||
| 151 | IS_DIR(sub_mb_type, 0, 1), | ||
| 152 | nrefs); | ||
| 153 | } else { | ||
| 154 | int j; | ||
| 155 | av_assert2(IS_SUB_4X4(sub_mb_type)); | ||
| 156 | ✗ | for (j = 0; j < 4; j++) { | |
| 157 | ✗ | int sub_y_offset = y_offset + 2 * (j & 2); | |
| 158 | ✗ | get_lowest_part_y(h, sl, refs, n + j, 4, sub_y_offset, | |
| 159 | IS_DIR(sub_mb_type, 0, 0), | ||
| 160 | IS_DIR(sub_mb_type, 0, 1), | ||
| 161 | nrefs); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 125893 times.
✓ Branch 1 taken 95468 times.
|
221361 | for (list = sl->list_count - 1; list >= 0; list--) |
| 168 |
3/4✓ Branch 0 taken 255047 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 129154 times.
✓ Branch 3 taken 125893 times.
|
255047 | for (ref = 0; ref < 48 && nrefs[list]; ref++) { |
| 169 | 129154 | int row = refs[list][ref]; | |
| 170 |
2/2✓ Branch 0 taken 121310 times.
✓ Branch 1 taken 7844 times.
|
129154 | if (row >= 0) { |
| 171 | 121310 | H264Ref *ref_pic = &sl->ref_list[list][ref]; | |
| 172 | 121310 | int ref_field = ref_pic->reference - 1; | |
| 173 | 121310 | int ref_field_picture = ref_pic->parent->field_picture; | |
| 174 | 121310 | int pic_height = 16 * h->mb_height >> ref_field_picture; | |
| 175 | |||
| 176 | 121310 | row <<= MB_MBAFF(sl); | |
| 177 | 121310 | nrefs[list]--; | |
| 178 | |||
| 179 |
2/4✓ Branch 0 taken 121310 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 121310 times.
|
121310 | if (!FIELD_PICTURE(h) && ref_field_picture) { // frame referencing two fields |
| 180 | av_assert2((ref_pic->parent->reference & 3) == 3); | ||
| 181 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 182 | ✗ | FFMIN((row >> 1) - !(row & 1), | |
| 183 | pic_height - 1), | ||
| 184 | 1); | ||
| 185 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 186 | ✗ | FFMIN((row >> 1), pic_height - 1), | |
| 187 | 0); | ||
| 188 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
121310 | } else if (FIELD_PICTURE(h) && !ref_field_picture) { // field referencing one field of a frame |
| 189 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 190 | ✗ | FFMIN(row * 2 + ref_field, | |
| 191 | pic_height - 1), | ||
| 192 | 0); | ||
| 193 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 121310 times.
|
121310 | } else if (FIELD_PICTURE(h)) { |
| 194 | ✗ | ff_thread_await_progress(&ref_pic->parent->tf, | |
| 195 | FFMIN(row, pic_height - 1), | ||
| 196 | ref_field); | ||
| 197 | } else { | ||
| 198 |
2/2✓ Branch 0 taken 2814 times.
✓ Branch 1 taken 118496 times.
|
121310 | ff_thread_await_progress(&ref_pic->parent->tf, |
| 199 | FFMIN(row, pic_height - 1), | ||
| 200 | 0); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | } | ||
| 204 | 95468 | } | |
| 205 | |||
| 206 | 28030053 | static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext *sl, | |
| 207 | H264Ref *pic, | ||
| 208 | int n, int square, int height, | ||
| 209 | int delta, int list, | ||
| 210 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 211 | uint8_t *dest_cr, | ||
| 212 | int src_x_offset, int src_y_offset, | ||
| 213 | const qpel_mc_func *qpix_op, | ||
| 214 | h264_chroma_mc_func chroma_op, | ||
| 215 | int pixel_shift, int chroma_idc) | ||
| 216 | { | ||
| 217 | 28030053 | const int mx = sl->mv_cache[list][scan8[n]][0] + src_x_offset * 8; | |
| 218 | 28030053 | int my = sl->mv_cache[list][scan8[n]][1] + src_y_offset * 8; | |
| 219 | 28030053 | const int luma_xy = (mx & 3) + ((my & 3) << 2); | |
| 220 | 28030053 | ptrdiff_t offset = (mx >> 2) * (1 << pixel_shift) + (my >> 2) * sl->mb_linesize; | |
| 221 | 28030053 | uint8_t *src_y = pic->data[0] + offset; | |
| 222 | uint8_t *src_cb, *src_cr; | ||
| 223 | 28030053 | int extra_width = 0; | |
| 224 | 28030053 | int extra_height = 0; | |
| 225 | 28030053 | int emu = 0; | |
| 226 | 28030053 | const int full_mx = mx >> 2; | |
| 227 | 28030053 | const int full_my = my >> 2; | |
| 228 | 28030053 | const int pic_width = 16 * h->mb_width; | |
| 229 | 28030053 | const int pic_height = 16 * h->mb_height >> MB_FIELD(sl); | |
| 230 | int ysh; | ||
| 231 | |||
| 232 |
2/2✓ Branch 0 taken 18890138 times.
✓ Branch 1 taken 9139915 times.
|
28030053 | if (mx & 7) |
| 233 | 18890138 | extra_width -= 3; | |
| 234 |
2/2✓ Branch 0 taken 16377568 times.
✓ Branch 1 taken 11652485 times.
|
28030053 | if (my & 7) |
| 235 | 16377568 | extra_height -= 3; | |
| 236 | |||
| 237 |
2/2✓ Branch 0 taken 27901155 times.
✓ Branch 1 taken 128898 times.
|
28030053 | if (full_mx < 0 - extra_width || |
| 238 |
2/2✓ Branch 0 taken 27541089 times.
✓ Branch 1 taken 360066 times.
|
27901155 | full_my < 0 - extra_height || |
| 239 |
2/2✓ Branch 0 taken 26947203 times.
✓ Branch 1 taken 593886 times.
|
27541089 | full_mx + 16 /*FIXME*/ > pic_width + extra_width || |
| 240 |
2/2✓ Branch 0 taken 1032882 times.
✓ Branch 1 taken 25914321 times.
|
26947203 | full_my + 16 /*FIXME*/ > pic_height + extra_height) { |
| 241 | 2115732 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 242 | 2115732 | src_y - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 243 | sl->mb_linesize, sl->mb_linesize, | ||
| 244 | 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, | ||
| 245 | full_my - 2, pic_width, pic_height); | ||
| 246 | 2115732 | src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 247 | 2115732 | emu = 1; | |
| 248 | } | ||
| 249 | |||
| 250 | 28030053 | qpix_op[luma_xy](dest_y, src_y, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 251 |
2/2✓ Branch 0 taken 8864644 times.
✓ Branch 1 taken 19165409 times.
|
28030053 | if (!square) |
| 252 | 8864644 | qpix_op[luma_xy](dest_y + delta, src_y + delta, sl->mb_linesize); | |
| 253 | |||
| 254 | if (CONFIG_GRAY && h->flags & AV_CODEC_FLAG_GRAY) | ||
| 255 | return; | ||
| 256 | |||
| 257 |
2/2✓ Branch 0 taken 157614 times.
✓ Branch 1 taken 27872439 times.
|
28030053 | if (chroma_idc == 3 /* yuv444 */) { |
| 258 | 157614 | src_cb = pic->data[1] + offset; | |
| 259 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
| 260 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 261 | 5488 | src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 262 | sl->mb_linesize, sl->mb_linesize, | ||
| 263 | 16 + 5, 16 + 5 /*FIXME*/, | ||
| 264 | full_mx - 2, full_my - 2, | ||
| 265 | pic_width, pic_height); | ||
| 266 | 5488 | src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 267 | } | ||
| 268 | 157614 | qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 269 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
| 270 | 12367 | qpix_op[luma_xy](dest_cb + delta, src_cb + delta, sl->mb_linesize); | |
| 271 | |||
| 272 | 157614 | src_cr = pic->data[2] + offset; | |
| 273 |
2/2✓ Branch 0 taken 5488 times.
✓ Branch 1 taken 152126 times.
|
157614 | if (emu) { |
| 274 | 5488 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, | |
| 275 | 5488 | src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize, | |
| 276 | sl->mb_linesize, sl->mb_linesize, | ||
| 277 | 16 + 5, 16 + 5 /*FIXME*/, | ||
| 278 | full_mx - 2, full_my - 2, | ||
| 279 | pic_width, pic_height); | ||
| 280 | 5488 | src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; | |
| 281 | } | ||
| 282 | 157614 | qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps? | |
| 283 |
2/2✓ Branch 0 taken 12367 times.
✓ Branch 1 taken 145247 times.
|
157614 | if (!square) |
| 284 | 12367 | qpix_op[luma_xy](dest_cr + delta, src_cr + delta, sl->mb_linesize); | |
| 285 | 157614 | return; | |
| 286 | } | ||
| 287 | |||
| 288 |
2/2✓ Branch 0 taken 118650 times.
✓ Branch 1 taken 27753789 times.
|
27872439 | ysh = 3 - (chroma_idc == 2 /* yuv422 */); |
| 289 |
4/4✓ Branch 0 taken 27753789 times.
✓ Branch 1 taken 118650 times.
✓ Branch 2 taken 9941018 times.
✓ Branch 3 taken 17812771 times.
|
27872439 | if (chroma_idc == 1 /* yuv420 */ && MB_FIELD(sl)) { |
| 290 | // chroma offset when predicting from a field of opposite parity | ||
| 291 | 9941018 | my += 2 * ((sl->mb_y & 1) - (pic->reference - 1)); | |
| 292 |
4/4✓ Branch 0 taken 9863006 times.
✓ Branch 1 taken 78012 times.
✓ Branch 2 taken 509264 times.
✓ Branch 3 taken 9353742 times.
|
9941018 | emu |= (my >> 3) < 0 || (my >> 3) + 8 >= (pic_height >> 1); |
| 293 | } | ||
| 294 | |||
| 295 | 27872439 | src_cb = pic->data[1] + ((mx >> 3) * (1 << pixel_shift)) + | |
| 296 | 27872439 | (my >> ysh) * sl->mb_uvlinesize; | |
| 297 | 27872439 | src_cr = pic->data[2] + ((mx >> 3) * (1 << pixel_shift)) + | |
| 298 | 27872439 | (my >> ysh) * sl->mb_uvlinesize; | |
| 299 | |||
| 300 |
2/2✓ Branch 0 taken 2321655 times.
✓ Branch 1 taken 25550784 times.
|
27872439 | if (emu) { |
| 301 | 2321655 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb, | |
| 302 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
| 303 | 2321655 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
| 304 | 2321655 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
| 305 | 2321655 | src_cb = sl->edge_emu_buffer; | |
| 306 | } | ||
| 307 | 27872439 | chroma_op(dest_cb, src_cb, sl->mb_uvlinesize, | |
| 308 | 27872439 | height >> (chroma_idc == 1 /* yuv420 */), | |
| 309 | 27872439 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
| 310 | |||
| 311 |
2/2✓ Branch 0 taken 2321655 times.
✓ Branch 1 taken 25550784 times.
|
27872439 | if (emu) { |
| 312 | 2321655 | h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr, | |
| 313 | sl->mb_uvlinesize, sl->mb_uvlinesize, | ||
| 314 | 2321655 | 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), | |
| 315 | 2321655 | pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); | |
| 316 | 2321655 | src_cr = sl->edge_emu_buffer; | |
| 317 | } | ||
| 318 | 27872439 | chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), | |
| 319 | 27872439 | mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); | |
| 320 | } | ||
| 321 | |||
| 322 | 20570566 | static av_always_inline void mc_part_std(const H264Context *h, H264SliceContext *sl, | |
| 323 | int n, int square, | ||
| 324 | int height, int delta, | ||
| 325 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 326 | uint8_t *dest_cr, | ||
| 327 | int x_offset, int y_offset, | ||
| 328 | const qpel_mc_func *qpix_put, | ||
| 329 | h264_chroma_mc_func chroma_put, | ||
| 330 | const qpel_mc_func *qpix_avg, | ||
| 331 | h264_chroma_mc_func chroma_avg, | ||
| 332 | int list0, int list1, | ||
| 333 | int pixel_shift, int chroma_idc) | ||
| 334 | { | ||
| 335 | 20570566 | const qpel_mc_func *qpix_op = qpix_put; | |
| 336 | 20570566 | h264_chroma_mc_func chroma_op = chroma_put; | |
| 337 | |||
| 338 | 20570566 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 339 |
2/2✓ Branch 0 taken 39229 times.
✓ Branch 1 taken 20531337 times.
|
20570566 | if (chroma_idc == 3 /* yuv444 */) { |
| 340 | 39229 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 341 | 39229 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 342 |
2/2✓ Branch 0 taken 98093 times.
✓ Branch 1 taken 20433244 times.
|
20531337 | } else if (chroma_idc == 2 /* yuv422 */) { |
| 343 | 98093 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 344 | 98093 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 345 | } else { /* yuv420 */ | ||
| 346 | 20433244 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 347 | 20433244 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 348 | } | ||
| 349 | 20570566 | x_offset += 8 * sl->mb_x; | |
| 350 | 20570566 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
| 351 | |||
| 352 |
2/2✓ Branch 0 taken 19039359 times.
✓ Branch 1 taken 1531207 times.
|
20570566 | if (list0) { |
| 353 | 19039359 | H264Ref *ref = &sl->ref_list[0][sl->ref_cache[0][scan8[n]]]; | |
| 354 | 19039359 | mc_dir_part(h, sl, ref, n, square, height, delta, 0, | |
| 355 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 356 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
| 357 | |||
| 358 | 19039359 | qpix_op = qpix_avg; | |
| 359 | 19039359 | chroma_op = chroma_avg; | |
| 360 | } | ||
| 361 | |||
| 362 |
2/2✓ Branch 0 taken 7077638 times.
✓ Branch 1 taken 13492928 times.
|
20570566 | if (list1) { |
| 363 | 7077638 | H264Ref *ref = &sl->ref_list[1][sl->ref_cache[1][scan8[n]]]; | |
| 364 | 7077638 | mc_dir_part(h, sl, ref, n, square, height, delta, 1, | |
| 365 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 366 | qpix_op, chroma_op, pixel_shift, chroma_idc); | ||
| 367 | } | ||
| 368 | 20570566 | } | |
| 369 | |||
| 370 | 1589979 | static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceContext *sl, | |
| 371 | int n, int square, | ||
| 372 | int height, int delta, | ||
| 373 | uint8_t *dest_y, uint8_t *dest_cb, | ||
| 374 | uint8_t *dest_cr, | ||
| 375 | int x_offset, int y_offset, | ||
| 376 | const qpel_mc_func *qpix_put, | ||
| 377 | h264_chroma_mc_func chroma_put, | ||
| 378 | h264_weight_func luma_weight_op, | ||
| 379 | h264_weight_func chroma_weight_op, | ||
| 380 | h264_biweight_func luma_weight_avg, | ||
| 381 | h264_biweight_func chroma_weight_avg, | ||
| 382 | int list0, int list1, | ||
| 383 | int pixel_shift, int chroma_idc) | ||
| 384 | { | ||
| 385 | int chroma_height; | ||
| 386 | |||
| 387 | 1589979 | dest_y += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 388 |
2/2✓ Branch 0 taken 105686 times.
✓ Branch 1 taken 1484293 times.
|
1589979 | if (chroma_idc == 3 /* yuv444 */) { |
| 389 | 105686 | chroma_height = height; | |
| 390 | 105686 | chroma_weight_avg = luma_weight_avg; | |
| 391 | 105686 | chroma_weight_op = luma_weight_op; | |
| 392 | 105686 | dest_cb += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 393 | 105686 | dest_cr += (2 * x_offset << pixel_shift) + 2 * y_offset * sl->mb_linesize; | |
| 394 |
2/2✓ Branch 0 taken 1266 times.
✓ Branch 1 taken 1483027 times.
|
1484293 | } else if (chroma_idc == 2 /* yuv422 */) { |
| 395 | 1266 | chroma_height = height; | |
| 396 | 1266 | dest_cb += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 397 | 1266 | dest_cr += (x_offset << pixel_shift) + 2 * y_offset * sl->mb_uvlinesize; | |
| 398 | } else { /* yuv420 */ | ||
| 399 | 1483027 | chroma_height = height >> 1; | |
| 400 | 1483027 | dest_cb += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 401 | 1483027 | dest_cr += (x_offset << pixel_shift) + y_offset * sl->mb_uvlinesize; | |
| 402 | } | ||
| 403 | 1589979 | x_offset += 8 * sl->mb_x; | |
| 404 | 1589979 | y_offset += 8 * (sl->mb_y >> MB_FIELD(sl)); | |
| 405 | |||
| 406 |
4/4✓ Branch 0 taken 1538392 times.
✓ Branch 1 taken 51587 times.
✓ Branch 2 taken 323077 times.
✓ Branch 3 taken 1215315 times.
|
1913056 | if (list0 && list1) { |
| 407 | /* don't optimize for luma-only case, since B-frames usually | ||
| 408 | * use implicit weights => chroma too. */ | ||
| 409 | 323077 | uint8_t *tmp_cb = sl->bipred_scratchpad; | |
| 410 | 323077 | uint8_t *tmp_cr = sl->bipred_scratchpad + (8 << pixel_shift + (chroma_idc == 3)); | |
| 411 | 323077 | uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize; | |
| 412 | 323077 | int refn0 = sl->ref_cache[0][scan8[n]]; | |
| 413 | 323077 | int refn1 = sl->ref_cache[1][scan8[n]]; | |
| 414 | |||
| 415 | 323077 | mc_dir_part(h, sl, &sl->ref_list[0][refn0], n, square, height, delta, 0, | |
| 416 | dest_y, dest_cb, dest_cr, | ||
| 417 | x_offset, y_offset, qpix_put, chroma_put, | ||
| 418 | pixel_shift, chroma_idc); | ||
| 419 | 323077 | mc_dir_part(h, sl, &sl->ref_list[1][refn1], n, square, height, delta, 1, | |
| 420 | tmp_y, tmp_cb, tmp_cr, | ||
| 421 | x_offset, y_offset, qpix_put, chroma_put, | ||
| 422 | pixel_shift, chroma_idc); | ||
| 423 | |||
| 424 |
2/2✓ Branch 0 taken 258754 times.
✓ Branch 1 taken 64323 times.
|
323077 | if (sl->pwt.use_weight == 2) { |
| 425 | 258754 | int weight0 = sl->pwt.implicit_weight[refn0][refn1][sl->mb_y & 1]; | |
| 426 | 258754 | int weight1 = 64 - weight0; | |
| 427 | 258754 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, | |
| 428 | height, 5, weight0, weight1, 0); | ||
| 429 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 430 | 258754 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, | |
| 431 | chroma_height, 5, weight0, weight1, 0); | ||
| 432 | 258754 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, | |
| 433 | chroma_height, 5, weight0, weight1, 0); | ||
| 434 | } | ||
| 435 | } else { | ||
| 436 | 64323 | luma_weight_avg(dest_y, tmp_y, sl->mb_linesize, height, | |
| 437 | sl->pwt.luma_log2_weight_denom, | ||
| 438 | sl->pwt.luma_weight[refn0][0][0], | ||
| 439 | sl->pwt.luma_weight[refn1][1][0], | ||
| 440 | 64323 | sl->pwt.luma_weight[refn0][0][1] + | |
| 441 | 64323 | sl->pwt.luma_weight[refn1][1][1]); | |
| 442 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 443 | 64323 | chroma_weight_avg(dest_cb, tmp_cb, sl->mb_uvlinesize, chroma_height, | |
| 444 | sl->pwt.chroma_log2_weight_denom, | ||
| 445 | sl->pwt.chroma_weight[refn0][0][0][0], | ||
| 446 | sl->pwt.chroma_weight[refn1][1][0][0], | ||
| 447 | 64323 | sl->pwt.chroma_weight[refn0][0][0][1] + | |
| 448 | 64323 | sl->pwt.chroma_weight[refn1][1][0][1]); | |
| 449 | 64323 | chroma_weight_avg(dest_cr, tmp_cr, sl->mb_uvlinesize, chroma_height, | |
| 450 | sl->pwt.chroma_log2_weight_denom, | ||
| 451 | sl->pwt.chroma_weight[refn0][0][1][0], | ||
| 452 | sl->pwt.chroma_weight[refn1][1][1][0], | ||
| 453 | 64323 | sl->pwt.chroma_weight[refn0][0][1][1] + | |
| 454 | 64323 | sl->pwt.chroma_weight[refn1][1][1][1]); | |
| 455 | } | ||
| 456 | } | ||
| 457 | } else { | ||
| 458 | 1266902 | int list = list1 ? 1 : 0; | |
| 459 | 1266902 | int refn = sl->ref_cache[list][scan8[n]]; | |
| 460 | 1266902 | H264Ref *ref = &sl->ref_list[list][refn]; | |
| 461 | 1266902 | mc_dir_part(h, sl, ref, n, square, height, delta, list, | |
| 462 | dest_y, dest_cb, dest_cr, x_offset, y_offset, | ||
| 463 | qpix_put, chroma_put, pixel_shift, chroma_idc); | ||
| 464 | |||
| 465 | 1266902 | luma_weight_op(dest_y, sl->mb_linesize, height, | |
| 466 | sl->pwt.luma_log2_weight_denom, | ||
| 467 | sl->pwt.luma_weight[refn][list][0], | ||
| 468 | sl->pwt.luma_weight[refn][list][1]); | ||
| 469 | if (!CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 470 |
2/2✓ Branch 0 taken 549450 times.
✓ Branch 1 taken 717452 times.
|
1266902 | if (sl->pwt.use_weight_chroma) { |
| 471 | 549450 | chroma_weight_op(dest_cb, sl->mb_uvlinesize, chroma_height, | |
| 472 | sl->pwt.chroma_log2_weight_denom, | ||
| 473 | sl->pwt.chroma_weight[refn][list][0][0], | ||
| 474 | sl->pwt.chroma_weight[refn][list][0][1]); | ||
| 475 | 549450 | chroma_weight_op(dest_cr, sl->mb_uvlinesize, chroma_height, | |
| 476 | sl->pwt.chroma_log2_weight_denom, | ||
| 477 | sl->pwt.chroma_weight[refn][list][1][0], | ||
| 478 | sl->pwt.chroma_weight[refn][list][1][1]); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | } | ||
| 482 | 1589979 | } | |
| 483 | |||
| 484 | 14874980 | static av_always_inline void prefetch_motion(const H264Context *h, H264SliceContext *sl, | |
| 485 | int list, int pixel_shift, | ||
| 486 | int chroma_idc) | ||
| 487 | { | ||
| 488 | /* fetch pixels for estimated mv 4 macroblocks ahead | ||
| 489 | * optimized for 64byte cache lines */ | ||
| 490 | 14874980 | const int refn = sl->ref_cache[list][scan8[0]]; | |
| 491 |
2/2✓ Branch 0 taken 14494517 times.
✓ Branch 1 taken 380463 times.
|
14874980 | if (refn >= 0) { |
| 492 | 14494517 | const int mx = (sl->mv_cache[list][scan8[0]][0] >> 2) + 16 * sl->mb_x + 8; | |
| 493 | 14494517 | const int my = (sl->mv_cache[list][scan8[0]][1] >> 2) + 16 * sl->mb_y; | |
| 494 | 14494517 | uint8_t **src = sl->ref_list[list][refn].data; | |
| 495 | 14494517 | int off = mx * (1<< pixel_shift) + | |
| 496 | 14494517 | (my + (sl->mb_x & 3) * 4) * sl->mb_linesize + | |
| 497 | 14494517 | (64 << pixel_shift); | |
| 498 | 14494517 | h->vdsp.prefetch(src[0] + off, sl->linesize, 4); | |
| 499 |
2/2✓ Branch 0 taken 132628 times.
✓ Branch 1 taken 14361889 times.
|
14494517 | if (chroma_idc == 3 /* yuv444 */) { |
| 500 | 132628 | h->vdsp.prefetch(src[1] + off, sl->linesize, 4); | |
| 501 | 132628 | h->vdsp.prefetch(src[2] + off, sl->linesize, 4); | |
| 502 | } else { | ||
| 503 | 14361889 | off= ((mx>>1)+64) * (1<<pixel_shift) + ((my>>1) + (sl->mb_x&7))*sl->uvlinesize; | |
| 504 | 14361889 | h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); | |
| 505 | } | ||
| 506 | } | ||
| 507 | 14874980 | } | |
| 508 | |||
| 509 | 6783990 | static av_always_inline void xchg_mb_border(const H264Context *h, H264SliceContext *sl, | |
| 510 | uint8_t *src_y, | ||
| 511 | uint8_t *src_cb, uint8_t *src_cr, | ||
| 512 | int linesize, int uvlinesize, | ||
| 513 | int xchg, int chroma444, | ||
| 514 | int simple, int pixel_shift) | ||
| 515 | { | ||
| 516 | int deblock_topleft; | ||
| 517 | int deblock_top; | ||
| 518 | 6783990 | int top_idx = 1; | |
| 519 | uint8_t *top_border_m1; | ||
| 520 | uint8_t *top_border; | ||
| 521 | |||
| 522 |
4/4✓ Branch 0 taken 3067562 times.
✓ Branch 1 taken 3716428 times.
✓ Branch 2 taken 1929248 times.
✓ Branch 3 taken 1138314 times.
|
6783990 | if (!simple && FRAME_MBAFF(h)) { |
| 523 |
2/2✓ Branch 0 taken 969466 times.
✓ Branch 1 taken 959782 times.
|
1929248 | if (sl->mb_y & 1) { |
| 524 |
2/2✓ Branch 0 taken 749392 times.
✓ Branch 1 taken 220074 times.
|
969466 | if (!MB_MBAFF(sl)) |
| 525 | 749392 | return; | |
| 526 | } else { | ||
| 527 | 959782 | top_idx = MB_MBAFF(sl) ? 0 : 1; | |
| 528 | } | ||
| 529 | } | ||
| 530 | |||
| 531 |
2/2✓ Branch 0 taken 61104 times.
✓ Branch 1 taken 5973494 times.
|
6034598 | if (sl->deblocking_filter == 2) { |
| 532 | 61104 | deblock_topleft = h->slice_table[sl->mb_xy - 1 - (h->mb_stride << MB_FIELD(sl))] == sl->slice_num; | |
| 533 | 61104 | deblock_top = sl->top_type; | |
| 534 | } else { | ||
| 535 | 5973494 | deblock_topleft = (sl->mb_x > 0); | |
| 536 | 5973494 | deblock_top = (sl->mb_y > !!MB_FIELD(sl)); | |
| 537 | } | ||
| 538 | |||
| 539 | 6034598 | src_y -= linesize + 1 + pixel_shift; | |
| 540 | 6034598 | src_cb -= uvlinesize + 1 + pixel_shift; | |
| 541 | 6034598 | src_cr -= uvlinesize + 1 + pixel_shift; | |
| 542 | |||
| 543 | 6034598 | top_border_m1 = sl->top_borders[top_idx][sl->mb_x - 1]; | |
| 544 | 6034598 | top_border = sl->top_borders[top_idx][sl->mb_x]; | |
| 545 | |||
| 546 | #define XCHG(a, b, xchg) \ | ||
| 547 | if (pixel_shift) { \ | ||
| 548 | if (xchg) { \ | ||
| 549 | AV_SWAP64(b + 0, a + 0); \ | ||
| 550 | AV_SWAP64(b + 8, a + 8); \ | ||
| 551 | } else { \ | ||
| 552 | AV_COPY128(b, a); \ | ||
| 553 | } \ | ||
| 554 | } else if (xchg) \ | ||
| 555 | AV_SWAP64(b, a); \ | ||
| 556 | else \ | ||
| 557 | AV_COPY64(b, a); | ||
| 558 | |||
| 559 |
2/2✓ Branch 0 taken 5798156 times.
✓ Branch 1 taken 236442 times.
|
6034598 | if (deblock_top) { |
| 560 |
2/2✓ Branch 0 taken 5690758 times.
✓ Branch 1 taken 107398 times.
|
5798156 | if (deblock_topleft) { |
| 561 |
2/2✓ Branch 0 taken 2294316 times.
✓ Branch 1 taken 3396442 times.
|
5690758 | XCHG(top_border_m1 + (8 << pixel_shift), |
| 562 | src_y - (7 << pixel_shift), 1); | ||
| 563 | } | ||
| 564 |
6/6✓ Branch 0 taken 2316050 times.
✓ Branch 1 taken 3482106 times.
✓ Branch 2 taken 1158025 times.
✓ Branch 3 taken 1158025 times.
✓ Branch 5 taken 1741053 times.
✓ Branch 6 taken 1741053 times.
|
5798156 | XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg); |
| 565 |
2/2✓ Branch 0 taken 2316050 times.
✓ Branch 1 taken 3482106 times.
|
5798156 | XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1); |
| 566 |
2/2✓ Branch 0 taken 5671912 times.
✓ Branch 1 taken 126244 times.
|
5798156 | if (sl->mb_x + 1 < h->mb_width) { |
| 567 |
2/2✓ Branch 0 taken 2293096 times.
✓ Branch 1 taken 3378816 times.
|
5671912 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1], |
| 568 | src_y + (17 << pixel_shift), 1); | ||
| 569 | } | ||
| 570 | if (simple || !CONFIG_GRAY || !(h->flags & AV_CODEC_FLAG_GRAY)) { | ||
| 571 |
2/2✓ Branch 0 taken 121572 times.
✓ Branch 1 taken 5676584 times.
|
5798156 | if (chroma444) { |
| 572 |
2/2✓ Branch 0 taken 120330 times.
✓ Branch 1 taken 1242 times.
|
121572 | if (deblock_topleft) { |
| 573 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
| 574 |
2/2✓ Branch 0 taken 2252 times.
✓ Branch 1 taken 118078 times.
|
120330 | XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
| 575 | } | ||
| 576 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg); |
| 577 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1); |
| 578 |
6/6✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
✓ Branch 2 taken 1196 times.
✓ Branch 3 taken 1196 times.
✓ Branch 5 taken 59590 times.
✓ Branch 6 taken 59590 times.
|
121572 | XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg); |
| 579 |
2/2✓ Branch 0 taken 2392 times.
✓ Branch 1 taken 119180 times.
|
121572 | XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1); |
| 580 |
2/2✓ Branch 0 taken 120314 times.
✓ Branch 1 taken 1258 times.
|
121572 | if (sl->mb_x + 1 < h->mb_width) { |
| 581 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1); |
| 582 |
2/2✓ Branch 0 taken 2234 times.
✓ Branch 1 taken 118080 times.
|
120314 | XCHG(sl->top_borders[top_idx][sl->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1); |
| 583 | } | ||
| 584 | } else { | ||
| 585 |
2/2✓ Branch 0 taken 5570428 times.
✓ Branch 1 taken 106156 times.
|
5676584 | if (deblock_topleft) { |
| 586 |
2/2✓ Branch 0 taken 2292064 times.
✓ Branch 1 taken 3278364 times.
|
5570428 | XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1); |
| 587 |
2/2✓ Branch 0 taken 2292064 times.
✓ Branch 1 taken 3278364 times.
|
5570428 | XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1); |
| 588 | } | ||
| 589 |
2/2✓ Branch 0 taken 2313658 times.
✓ Branch 1 taken 3362926 times.
|
5676584 | XCHG(top_border + (16 << pixel_shift), src_cb + 1 + pixel_shift, 1); |
| 590 |
2/2✓ Branch 0 taken 2313658 times.
✓ Branch 1 taken 3362926 times.
|
5676584 | XCHG(top_border + (24 << pixel_shift), src_cr + 1 + pixel_shift, 1); |
| 591 | } | ||
| 592 | } | ||
| 593 | } | ||
| 594 | } | ||
| 595 | |||
| 596 | 4485713 | static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth, | |
| 597 | int index) | ||
| 598 | { | ||
| 599 |
2/2✓ Branch 0 taken 1122317 times.
✓ Branch 1 taken 3363396 times.
|
4485713 | if (high_bit_depth) { |
| 600 | 1122317 | return AV_RN32A(((int32_t *)mb) + index); | |
| 601 | } else | ||
| 602 | 3363396 | return AV_RN16A(mb + index); | |
| 603 | } | ||
| 604 | |||
| 605 | 9792 | static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth, | |
| 606 | int index, int value) | ||
| 607 | { | ||
| 608 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9792 times.
|
9792 | if (high_bit_depth) { |
| 609 | ✗ | AV_WN32A(((int32_t *)mb) + index, value); | |
| 610 | } else | ||
| 611 | 9792 | AV_WN16A(mb + index, value); | |
| 612 | 9792 | } | |
| 613 | |||
| 614 | 4249805 | static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, | |
| 615 | H264SliceContext *sl, | ||
| 616 | int mb_type, int simple, | ||
| 617 | int transform_bypass, | ||
| 618 | int pixel_shift, | ||
| 619 | const int *block_offset, | ||
| 620 | int linesize, | ||
| 621 | uint8_t *dest_y, int p) | ||
| 622 | { | ||
| 623 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 624 | void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 625 | int i; | ||
| 626 |
2/2✓ Branch 0 taken 4116937 times.
✓ Branch 1 taken 132868 times.
|
4249805 | int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; |
| 627 | 4249805 | block_offset += 16 * p; | |
| 628 |
2/2✓ Branch 0 taken 3190990 times.
✓ Branch 1 taken 1058815 times.
|
4249805 | if (IS_INTRA4x4(mb_type)) { |
| 629 |
2/2✓ Branch 0 taken 1642272 times.
✓ Branch 1 taken 1548718 times.
|
3190990 | if (IS_8x8DCT(mb_type)) { |
| 630 |
2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 1641612 times.
|
1642272 | if (transform_bypass) { |
| 631 | 660 | idct_dc_add = | |
| 632 | 660 | idct_add = h->h264dsp.h264_add_pixels8_clear; | |
| 633 | } else { | ||
| 634 | 1641612 | idct_dc_add = h->h264dsp.h264_idct8_dc_add; | |
| 635 | 1641612 | idct_add = h->h264dsp.h264_idct8_add; | |
| 636 | } | ||
| 637 |
2/2✓ Branch 0 taken 6569088 times.
✓ Branch 1 taken 1642272 times.
|
8211360 | for (i = 0; i < 16; i += 4) { |
| 638 | 6569088 | uint8_t *const ptr = dest_y + block_offset[i]; | |
| 639 | 6569088 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
| 640 |
5/6✓ Branch 0 taken 2640 times.
✓ Branch 1 taken 6566448 times.
✓ Branch 2 taken 2640 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2488 times.
✓ Branch 5 taken 152 times.
|
6569088 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
| 641 |
1/2✓ Branch 0 taken 2488 times.
✗ Branch 1 not taken.
|
2488 | if (h->x264_build < 151U) { |
| 642 | 2488 | h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 643 | } else | ||
| 644 | ✗ | h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 645 | ✗ | (sl-> topleft_samples_available << i) & 0x8000, | |
| 646 | ✗ | (sl->topright_samples_available << i) & 0x4000, linesize); | |
| 647 | } else { | ||
| 648 | 6566600 | const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
| 649 | 6566600 | h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, | |
| 650 | 6566600 | (sl->topright_samples_available << i) & 0x4000, linesize); | |
| 651 |
2/2✓ Branch 0 taken 5158422 times.
✓ Branch 1 taken 1408178 times.
|
6566600 | if (nnz) { |
| 652 |
4/4✓ Branch 0 taken 561264 times.
✓ Branch 1 taken 4597158 times.
✓ Branch 3 taken 250751 times.
✓ Branch 4 taken 310513 times.
|
5158422 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
| 653 | 250751 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 654 | else | ||
| 655 | 4907671 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 656 | } | ||
| 657 | } | ||
| 658 | } | ||
| 659 | } else { | ||
| 660 |
2/2✓ Branch 0 taken 15230 times.
✓ Branch 1 taken 1533488 times.
|
1548718 | if (transform_bypass) { |
| 661 | 15230 | idct_dc_add = | |
| 662 | 15230 | idct_add = h->h264dsp.h264_add_pixels4_clear; | |
| 663 | } else { | ||
| 664 | 1533488 | idct_dc_add = h->h264dsp.h264_idct_dc_add; | |
| 665 | 1533488 | idct_add = h->h264dsp.h264_idct_add; | |
| 666 | } | ||
| 667 |
2/2✓ Branch 0 taken 24779488 times.
✓ Branch 1 taken 1548718 times.
|
26328206 | for (i = 0; i < 16; i++) { |
| 668 | 24779488 | uint8_t *const ptr = dest_y + block_offset[i]; | |
| 669 | 24779488 | const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; | |
| 670 | |||
| 671 |
5/6✓ Branch 0 taken 243680 times.
✓ Branch 1 taken 24535808 times.
✓ Branch 2 taken 243680 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 180369 times.
✓ Branch 5 taken 63311 times.
|
24779488 | if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { |
| 672 | 180369 | h->hpc.pred4x4_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 673 | } else { | ||
| 674 | uint8_t *topright; | ||
| 675 | int nnz, tr; | ||
| 676 | uint64_t tr_high; | ||
| 677 |
4/4✓ Branch 0 taken 23430073 times.
✓ Branch 1 taken 1169046 times.
✓ Branch 2 taken 1120684 times.
✓ Branch 3 taken 22309389 times.
|
26888849 | if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) { |
| 678 | 2289730 | const int topright_avail = (sl->topright_samples_available << i) & 0x8000; | |
| 679 | av_assert2(sl->mb_y || linesize <= block_offset[i]); | ||
| 680 |
2/2✓ Branch 0 taken 634422 times.
✓ Branch 1 taken 1655308 times.
|
2289730 | if (!topright_avail) { |
| 681 |
2/2✓ Branch 0 taken 248405 times.
✓ Branch 1 taken 386017 times.
|
634422 | if (pixel_shift) { |
| 682 | 248405 | tr_high = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL; | |
| 683 | 248405 | topright = (uint8_t *)&tr_high; | |
| 684 | } else { | ||
| 685 | 386017 | tr = ptr[3 - linesize] * 0x01010101u; | |
| 686 | 386017 | topright = (uint8_t *)&tr; | |
| 687 | } | ||
| 688 | } else | ||
| 689 | 1655308 | topright = ptr + (4 << pixel_shift) - linesize; | |
| 690 | } else | ||
| 691 | 22309389 | topright = NULL; | |
| 692 | |||
| 693 | 24599119 | h->hpc.pred4x4[dir](ptr, topright, linesize); | |
| 694 | 24599119 | nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; | |
| 695 |
2/2✓ Branch 0 taken 16820727 times.
✓ Branch 1 taken 7778392 times.
|
24599119 | if (nnz) { |
| 696 |
4/4✓ Branch 0 taken 3881212 times.
✓ Branch 1 taken 12939515 times.
✓ Branch 3 taken 2237998 times.
✓ Branch 4 taken 1643214 times.
|
16820727 | if (nnz == 1 && dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) |
| 697 | 2237998 | idct_dc_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 698 | else | ||
| 699 | 14582729 | idct_add(ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); | |
| 700 | } | ||
| 701 | } | ||
| 702 | } | ||
| 703 | } | ||
| 704 | } else { | ||
| 705 | 1058815 | h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize); | |
| 706 |
2/2✓ Branch 0 taken 617791 times.
✓ Branch 1 taken 441024 times.
|
1058815 | if (sl->non_zero_count_cache[scan8[LUMA_DC_BLOCK_INDEX + p]]) { |
| 707 |
2/2✓ Branch 0 taken 617179 times.
✓ Branch 1 taken 612 times.
|
617791 | if (!transform_bypass) |
| 708 | 617179 | h->h264dsp.h264_luma_dc_dequant_idct(sl->mb + (p * 256 << pixel_shift), | |
| 709 | 617179 | sl->mb_luma_dc[p], | |
| 710 | 617179 | h->ps.pps->dequant4_coeff[p][qscale][0]); | |
| 711 | else { | ||
| 712 | static const uint8_t dc_mapping[16] = { | ||
| 713 | 0 * 16, 1 * 16, 4 * 16, 5 * 16, | ||
| 714 | 2 * 16, 3 * 16, 6 * 16, 7 * 16, | ||
| 715 | 8 * 16, 9 * 16, 12 * 16, 13 * 16, | ||
| 716 | 10 * 16, 11 * 16, 14 * 16, 15 * 16 | ||
| 717 | }; | ||
| 718 |
2/2✓ Branch 0 taken 9792 times.
✓ Branch 1 taken 612 times.
|
10404 | for (i = 0; i < 16; i++) |
| 719 | 19584 | dctcoef_set(sl->mb + (p * 256 << pixel_shift), | |
| 720 | 9792 | pixel_shift, dc_mapping[i], | |
| 721 | 9792 | dctcoef_get(sl->mb_luma_dc[p], | |
| 722 | pixel_shift, i)); | ||
| 723 | } | ||
| 724 | } | ||
| 725 | } | ||
| 726 | 4249805 | } | |
| 727 | |||
| 728 | 15579327 | static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl, | |
| 729 | int mb_type, int simple, | ||
| 730 | int transform_bypass, | ||
| 731 | int pixel_shift, | ||
| 732 | const int *block_offset, | ||
| 733 | int linesize, | ||
| 734 | uint8_t *dest_y, int p) | ||
| 735 | { | ||
| 736 | void (*idct_add)(uint8_t *dst, int16_t *block, int stride); | ||
| 737 | int i; | ||
| 738 | 15579327 | block_offset += 16 * p; | |
| 739 |
2/2✓ Branch 0 taken 12388337 times.
✓ Branch 1 taken 3190990 times.
|
15579327 | if (!IS_INTRA4x4(mb_type)) { |
| 740 |
2/2✓ Branch 0 taken 1058815 times.
✓ Branch 1 taken 11329522 times.
|
12388337 | if (IS_INTRA16x16(mb_type)) { |
| 741 |
2/2✓ Branch 0 taken 864 times.
✓ Branch 1 taken 1057951 times.
|
1058815 | if (transform_bypass) { |
| 742 |
1/2✓ Branch 0 taken 864 times.
✗ Branch 1 not taken.
|
864 | if (h->ps.sps->profile_idc == 244 && |
| 743 |
2/2✓ Branch 0 taken 502 times.
✓ Branch 1 taken 362 times.
|
864 | (sl->intra16x16_pred_mode == VERT_PRED8x8 || |
| 744 |
2/2✓ Branch 0 taken 278 times.
✓ Branch 1 taken 224 times.
|
502 | sl->intra16x16_pred_mode == HOR_PRED8x8)) { |
| 745 | 640 | h->hpc.pred16x16_add[sl->intra16x16_pred_mode](dest_y, block_offset, | |
| 746 | 640 | sl->mb + (p * 256 << pixel_shift), | |
| 747 | linesize); | ||
| 748 | } else { | ||
| 749 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 224 times.
|
3808 | for (i = 0; i < 16; i++) |
| 750 |
3/4✓ Branch 0 taken 40 times.
✓ Branch 1 taken 3544 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
|
3624 | if (sl->non_zero_count_cache[scan8[i + p * 16]] || |
| 751 | 40 | dctcoef_get(sl->mb, pixel_shift, i * 16 + p * 256)) | |
| 752 | 3544 | h->h264dsp.h264_add_pixels4_clear(dest_y + block_offset[i], | |
| 753 | 3544 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 754 | linesize); | ||
| 755 | } | ||
| 756 | } else { | ||
| 757 | 1057951 | h->h264dsp.h264_idct_add16intra(dest_y, block_offset, | |
| 758 | 1057951 | sl->mb + (p * 256 << pixel_shift), | |
| 759 | linesize, | ||
| 760 | 1057951 | sl->non_zero_count_cache + p * 5 * 8); | |
| 761 | } | ||
| 762 |
2/2✓ Branch 0 taken 6396687 times.
✓ Branch 1 taken 4932835 times.
|
11329522 | } else if (sl->cbp & 15) { |
| 763 |
2/2✓ Branch 0 taken 339104 times.
✓ Branch 1 taken 6057583 times.
|
6396687 | if (transform_bypass) { |
| 764 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | const int di = IS_8x8DCT(mb_type) ? 4 : 1; |
| 765 | 678208 | idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8_clear | |
| 766 |
2/2✓ Branch 0 taken 17071 times.
✓ Branch 1 taken 322033 times.
|
339104 | : h->h264dsp.h264_add_pixels4_clear; |
| 767 |
2/2✓ Branch 0 taken 5220812 times.
✓ Branch 1 taken 339104 times.
|
5559916 | for (i = 0; i < 16; i += di) |
| 768 |
2/2✓ Branch 0 taken 722104 times.
✓ Branch 1 taken 4498708 times.
|
5220812 | if (sl->non_zero_count_cache[scan8[i + p * 16]]) |
| 769 | 722104 | idct_add(dest_y + block_offset[i], | |
| 770 | 722104 | sl->mb + (i * 16 + p * 256 << pixel_shift), | |
| 771 | linesize); | ||
| 772 | } else { | ||
| 773 |
2/2✓ Branch 0 taken 1033748 times.
✓ Branch 1 taken 5023835 times.
|
6057583 | if (IS_8x8DCT(mb_type)) |
| 774 | 1033748 | h->h264dsp.h264_idct8_add4(dest_y, block_offset, | |
| 775 | 1033748 | sl->mb + (p * 256 << pixel_shift), | |
| 776 | linesize, | ||
| 777 | 1033748 | sl->non_zero_count_cache + p * 5 * 8); | |
| 778 | else | ||
| 779 | 5023835 | h->h264dsp.h264_idct_add16(dest_y, block_offset, | |
| 780 | 5023835 | sl->mb + (p * 256 << pixel_shift), | |
| 781 | linesize, | ||
| 782 | 5023835 | sl->non_zero_count_cache + p * 5 * 8); | |
| 783 | } | ||
| 784 | } | ||
| 785 | } | ||
| 786 | 15579327 | } | |
| 787 | |||
| 788 | #define BITS 8 | ||
| 789 | #define SIMPLE 1 | ||
| 790 | #include "h264_mb_template.c" | ||
| 791 | |||
| 792 | #undef BITS | ||
| 793 | #define BITS 16 | ||
| 794 | #include "h264_mb_template.c" | ||
| 795 | |||
| 796 | #undef SIMPLE | ||
| 797 | #define SIMPLE 0 | ||
| 798 | #include "h264_mb_template.c" | ||
| 799 | |||
| 800 | 15217116 | void ff_h264_hl_decode_mb(const H264Context *h, H264SliceContext *sl) | |
| 801 | { | ||
| 802 | 15217116 | const int mb_xy = sl->mb_xy; | |
| 803 | 15217116 | const int mb_type = h->cur_pic.mb_type[mb_xy]; | |
| 804 | 38626273 | int is_complex = CONFIG_SMALL || sl->is_complex || | |
| 805 |
6/6✓ Branch 0 taken 8192041 times.
✓ Branch 1 taken 7025075 times.
✓ Branch 2 taken 8168936 times.
✓ Branch 3 taken 23105 times.
✓ Branch 4 taken 136878 times.
✓ Branch 5 taken 8032058 times.
|
15217116 | IS_INTRA_PCM(mb_type) || sl->qscale == 0; |
| 806 | |||
| 807 |
2/2✓ Branch 0 taken 192658 times.
✓ Branch 1 taken 15024458 times.
|
15217116 | if (CHROMA444(h)) { |
| 808 |
4/4✓ Branch 0 taken 79866 times.
✓ Branch 1 taken 112792 times.
✓ Branch 2 taken 19800 times.
✓ Branch 3 taken 60066 times.
|
192658 | if (is_complex || h->pixel_shift) |
| 809 | 132592 | hl_decode_mb_444_complex(h, sl); | |
| 810 | else | ||
| 811 | 60066 | hl_decode_mb_444_simple_8(h, sl); | |
| 812 |
2/2✓ Branch 0 taken 7072266 times.
✓ Branch 1 taken 7952192 times.
|
15024458 | } else if (is_complex) { |
| 813 | 7072266 | hl_decode_mb_complex(h, sl); | |
| 814 |
2/2✓ Branch 0 taken 1154360 times.
✓ Branch 1 taken 6797832 times.
|
7952192 | } else if (h->pixel_shift) { |
| 815 | 1154360 | hl_decode_mb_simple_16(h, sl); | |
| 816 | } else | ||
| 817 | 6797832 | hl_decode_mb_simple_8(h, sl); | |
| 818 | 15217116 | } | |
| 819 |