| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * VVC thread logic | ||
| 3 | * | ||
| 4 | * Copyright (C) 2023 Nuo Mi | ||
| 5 | * | ||
| 6 | * This file is part of FFmpeg. | ||
| 7 | * | ||
| 8 | * FFmpeg is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU Lesser General Public | ||
| 10 | * License as published by the Free Software Foundation; either | ||
| 11 | * version 2.1 of the License, or (at your option) any later version. | ||
| 12 | * | ||
| 13 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 16 | * Lesser General Public License for more details. | ||
| 17 | * | ||
| 18 | * You should have received a copy of the GNU Lesser General Public | ||
| 19 | * License along with FFmpeg; if not, write to the Free Software | ||
| 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <stdatomic.h> | ||
| 24 | |||
| 25 | #include "libavcodec/executor.h" | ||
| 26 | #include "libavutil/mem.h" | ||
| 27 | #include "libavutil/thread.h" | ||
| 28 | |||
| 29 | #include "thread.h" | ||
| 30 | #include "ctu.h" | ||
| 31 | #include "filter.h" | ||
| 32 | #include "inter.h" | ||
| 33 | #include "intra.h" | ||
| 34 | #include "refs.h" | ||
| 35 | |||
| 36 | typedef struct ProgressListener { | ||
| 37 | VVCProgressListener l; | ||
| 38 | struct VVCTask *task; | ||
| 39 | VVCContext *s; | ||
| 40 | } ProgressListener; | ||
| 41 | |||
| 42 | typedef enum VVCTaskStage { | ||
| 43 | VVC_TASK_STAGE_INIT, // for CTU(0, 0) only | ||
| 44 | VVC_TASK_STAGE_PARSE, | ||
| 45 | VVC_TASK_STAGE_DEBLOCK_BS, | ||
| 46 | VVC_TASK_STAGE_INTER, | ||
| 47 | VVC_TASK_STAGE_RECON, | ||
| 48 | VVC_TASK_STAGE_LMCS, | ||
| 49 | VVC_TASK_STAGE_DEBLOCK_V, | ||
| 50 | VVC_TASK_STAGE_DEBLOCK_H, | ||
| 51 | VVC_TASK_STAGE_SAO, | ||
| 52 | VVC_TASK_STAGE_ALF, | ||
| 53 | VVC_TASK_STAGE_LAST | ||
| 54 | } VVCTaskStage; | ||
| 55 | |||
| 56 | typedef struct VVCTask { | ||
| 57 | union { | ||
| 58 | struct VVCTask *next; //for executor debug only | ||
| 59 | FFTask task; | ||
| 60 | } u; | ||
| 61 | |||
| 62 | VVCTaskStage stage; | ||
| 63 | |||
| 64 | // ctu x, y, and raster scan order | ||
| 65 | int rx, ry, rs; | ||
| 66 | VVCFrameContext *fc; | ||
| 67 | |||
| 68 | ProgressListener col_listener; | ||
| 69 | ProgressListener listener[2][VVC_MAX_REF_ENTRIES]; | ||
| 70 | |||
| 71 | // for parse task only | ||
| 72 | SliceContext *sc; | ||
| 73 | EntryPoint *ep; | ||
| 74 | int ctu_idx; //ctu idx in the current slice | ||
| 75 | |||
| 76 | // tasks with target scores met are ready for scheduling | ||
| 77 | atomic_uchar score[VVC_TASK_STAGE_LAST]; | ||
| 78 | atomic_uchar target_inter_score; | ||
| 79 | } VVCTask; | ||
| 80 | |||
| 81 | typedef struct VVCRowThread { | ||
| 82 | atomic_int col_progress[VVC_PROGRESS_LAST]; | ||
| 83 | } VVCRowThread; | ||
| 84 | |||
| 85 | typedef struct VVCFrameThread { | ||
| 86 | // error return for tasks | ||
| 87 | atomic_int ret; | ||
| 88 | |||
| 89 | VVCRowThread *rows; | ||
| 90 | VVCTask *tasks; | ||
| 91 | |||
| 92 | int ctu_size; | ||
| 93 | int ctu_width; | ||
| 94 | int ctu_height; | ||
| 95 | int ctu_count; | ||
| 96 | |||
| 97 | //protected by lock | ||
| 98 | atomic_int nb_scheduled_tasks; | ||
| 99 | atomic_int nb_scheduled_listeners; | ||
| 100 | |||
| 101 | int row_progress[VVC_PROGRESS_LAST]; | ||
| 102 | |||
| 103 | AVMutex lock; | ||
| 104 | AVCond cond; | ||
| 105 | } VVCFrameThread; | ||
| 106 | |||
| 107 | #define PRIORITY_LOWEST 2 | ||
| 108 | 266474 | static void add_task(VVCContext *s, VVCTask *t) | |
| 109 | { | ||
| 110 | 266474 | VVCFrameThread *ft = t->fc->ft; | |
| 111 | 266474 | FFTask *task = &t->u.task; | |
| 112 | 266474 | const int priorities[] = { | |
| 113 | 0, // VVC_TASK_STAGE_INIT, | ||
| 114 | 0, // VVC_TASK_STAGE_PARSE, | ||
| 115 | 1, // VVC_TASK_STAGE_DEBLOCK_BS | ||
| 116 | // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks. | ||
| 117 | // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks. | ||
| 118 | PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER | ||
| 119 | 1, // VVC_TASK_STAGE_RECON, | ||
| 120 | 1, // VVC_TASK_STAGE_LMCS, | ||
| 121 | 1, // VVC_TASK_STAGE_DEBLOCK_V, | ||
| 122 | 1, // VVC_TASK_STAGE_DEBLOCK_H, | ||
| 123 | 1, // VVC_TASK_STAGE_SAO, | ||
| 124 | 1, // VVC_TASK_STAGE_ALF, | ||
| 125 | }; | ||
| 126 | |||
| 127 | 266474 | atomic_fetch_add(&ft->nb_scheduled_tasks, 1); | |
| 128 | 266474 | task->priority = priorities[t->stage]; | |
| 129 | 266474 | ff_executor_execute(s->executor, task); | |
| 130 | 266474 | } | |
| 131 | |||
| 132 | 54541 | static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry) | |
| 133 | { | ||
| 134 | 54541 | memset(t, 0, sizeof(*t)); | |
| 135 | 54541 | t->stage = stage; | |
| 136 | 54541 | t->fc = fc; | |
| 137 | 54541 | t->rx = rx; | |
| 138 | 54541 | t->ry = ry; | |
| 139 | 54541 | t->rs = ry * fc->ft->ctu_width + rx; | |
| 140 |
2/2✓ Branch 0 taken 545410 times.
✓ Branch 1 taken 54541 times.
|
599951 | for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++) |
| 141 | 545410 | atomic_store(t->score + i, 0); | |
| 142 | 54541 | atomic_store(&t->target_inter_score, 0); | |
| 143 | 54541 | } | |
| 144 | |||
| 145 | 53475 | static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx) | |
| 146 | { | ||
| 147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
|
53475 | if (t->sc) { |
| 148 | // the task already inited, error bitstream | ||
| 149 | ✗ | return AVERROR_INVALIDDATA; | |
| 150 | } | ||
| 151 | 53475 | t->sc = sc; | |
| 152 | 53475 | t->ep = ep; | |
| 153 | 53475 | t->ctu_idx = ctu_idx; | |
| 154 | |||
| 155 | 53475 | return 0; | |
| 156 | } | ||
| 157 | |||
| 158 | 1647128 | static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage) | |
| 159 | { | ||
| 160 | 1647128 | return atomic_fetch_add(&t->score[stage], 1) + 1; | |
| 161 | } | ||
| 162 | |||
| 163 | 428866 | static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage) | |
| 164 | { | ||
| 165 | 428866 | return atomic_load(&t->score[stage]); | |
| 166 | } | ||
| 167 | |||
| 168 | //first row in tile or slice | ||
| 169 | 5230 | static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry) | |
| 170 | { | ||
| 171 | 5230 | const VVCFrameThread *ft = fc->ft; | |
| 172 | 5230 | const VVCPPS *pps = fc->ps.pps; | |
| 173 | |||
| 174 |
2/2✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 793 times.
|
5230 | if (ry != pps->ctb_to_row_bd[ry]) { |
| 175 | 4437 | const int rs = ry * ft->ctu_width + rx; | |
| 176 | 4437 | return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width]; | |
| 177 | } | ||
| 178 | 793 | return 1; | |
| 179 | } | ||
| 180 | |||
| 181 | 2075994 | static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score) | |
| 182 | { | ||
| 183 | // l:left, r:right, t: top, b: bottom | ||
| 184 | static const uint8_t target_score[] = | ||
| 185 | { | ||
| 186 | 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse | ||
| 187 | 0, //VVC_TASK_STAGE_INTER, not used | ||
| 188 | 2, //VVC_TASK_STAGE_RECON, need l + rt recon | ||
| 189 | 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon | ||
| 190 | 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v | ||
| 191 | 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h | ||
| 192 | 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h | ||
| 193 | 8, //VVC_TASK_STAGE_ALF, need sao around the ctu | ||
| 194 | }; | ||
| 195 | 2075994 | uint8_t target = 0; | |
| 196 | 2075994 | VVCFrameContext *fc = t->fc; | |
| 197 | |||
| 198 |
2/2✓ Branch 0 taken 1066 times.
✓ Branch 1 taken 2074928 times.
|
2075994 | if (stage == VVC_TASK_STAGE_INIT) |
| 199 | 1066 | return 1; | |
| 200 | |||
| 201 |
2/2✓ Branch 0 taken 109087 times.
✓ Branch 1 taken 1965841 times.
|
2074928 | if (stage == VVC_TASK_STAGE_PARSE) { |
| 202 | 109087 | const H266RawSPS *rsps = fc->ps.sps->r; | |
| 203 |
4/4✓ Branch 0 taken 3996 times.
✓ Branch 1 taken 105091 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 783 times.
|
109087 | const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry); |
| 204 | 109087 | const int no_prev_stage = t->rs > 0; | |
| 205 | 109087 | target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage | |
| 206 |
2/2✓ Branch 0 taken 148592 times.
✓ Branch 1 taken 1817249 times.
|
1965841 | } else if (stage == VVC_TASK_STAGE_INTER) { |
| 207 | 148592 | target = atomic_load(&t->target_inter_score); | |
| 208 | } else { | ||
| 209 | 1817249 | target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS]; | |
| 210 | } | ||
| 211 | |||
| 212 | //+1 for previous stage | ||
| 213 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2074928 times.
|
2074928 | av_assert0(score <= target + 1); |
| 214 | 2074928 | return score == target + 1; | |
| 215 | } | ||
| 216 | |||
| 217 | 2358058 | static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft, | |
| 218 | const int rx, const int ry, const VVCTaskStage stage) | ||
| 219 | { | ||
| 220 | 2358058 | VVCTask *t = ft->tasks + ft->ctu_width * ry + rx; | |
| 221 | uint8_t score; | ||
| 222 | |||
| 223 |
8/8✓ Branch 0 taken 2190106 times.
✓ Branch 1 taken 167952 times.
✓ Branch 2 taken 2036150 times.
✓ Branch 3 taken 153956 times.
✓ Branch 4 taken 1824725 times.
✓ Branch 5 taken 211425 times.
✓ Branch 6 taken 177597 times.
✓ Branch 7 taken 1647128 times.
|
2358058 | if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height) |
| 224 | 710930 | return; | |
| 225 | |||
| 226 | 1647128 | score = task_add_score(t, stage); | |
| 227 |
2/2✓ Branch 1 taken 266474 times.
✓ Branch 2 taken 1380654 times.
|
1647128 | if (task_has_target_score(t, stage, score)) { |
| 228 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 266474 times.
|
266474 | av_assert0(s); |
| 229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 266474 times.
|
266474 | av_assert0(stage == t->stage); |
| 230 | 266474 | add_task(s, t); | |
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | 367048 | static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled) | |
| 235 | { | ||
| 236 |
2/2✓ Branch 0 taken 101640 times.
✓ Branch 1 taken 265408 times.
|
367048 | if (atomic_fetch_sub(scheduled, 1) == 1) { |
| 237 | 101640 | ff_mutex_lock(&ft->lock); | |
| 238 | 101640 | ff_cond_signal(&ft->cond); | |
| 239 | 101640 | ff_mutex_unlock(&ft->lock); | |
| 240 | } | ||
| 241 | 367048 | } | |
| 242 | |||
| 243 | 100574 | static void progress_done(VVCProgressListener *_l, const int type) | |
| 244 | { | ||
| 245 | 100574 | const ProgressListener *l = (ProgressListener *)_l; | |
| 246 | 100574 | const VVCTask *t = l->task; | |
| 247 | 100574 | VVCFrameThread *ft = t->fc->ft; | |
| 248 | |||
| 249 | 100574 | frame_thread_add_score(l->s, ft, t->rx, t->ry, type); | |
| 250 | 100574 | sheduled_done(ft, &ft->nb_scheduled_listeners); | |
| 251 | 100574 | } | |
| 252 | |||
| 253 | 95117 | static void pixel_done(VVCProgressListener *l) | |
| 254 | { | ||
| 255 | 95117 | progress_done(l, VVC_TASK_STAGE_INTER); | |
| 256 | 95117 | } | |
| 257 | |||
| 258 | 5457 | static void mv_done(VVCProgressListener *l) | |
| 259 | { | ||
| 260 | 5457 | progress_done(l, VVC_TASK_STAGE_PARSE); | |
| 261 | 5457 | } | |
| 262 | |||
| 263 | 100574 | static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y) | |
| 264 | { | ||
| 265 | 100574 | const int is_inter = vp == VVC_PROGRESS_PIXEL; | |
| 266 | |||
| 267 | 100574 | l->task = t; | |
| 268 | 100574 | l->s = s; | |
| 269 | 100574 | l->l.vp = vp; | |
| 270 | 100574 | l->l.y = y; | |
| 271 |
2/2✓ Branch 0 taken 95117 times.
✓ Branch 1 taken 5457 times.
|
100574 | l->l.progress_done = is_inter ? pixel_done : mv_done; |
| 272 |
2/2✓ Branch 0 taken 95117 times.
✓ Branch 1 taken 5457 times.
|
100574 | if (is_inter) |
| 273 | 95117 | atomic_fetch_add(&t->target_inter_score, 1); | |
| 274 | 100574 | } | |
| 275 | |||
| 276 | 100574 | static void add_progress_listener(VVCFrame *ref, ProgressListener *l, | |
| 277 | VVCTask *t, VVCContext *s, const VVCProgress vp, const int y) | ||
| 278 | { | ||
| 279 | 100574 | VVCFrameThread *ft = t->fc->ft; | |
| 280 | |||
| 281 | 100574 | atomic_fetch_add(&ft->nb_scheduled_listeners, 1); | |
| 282 | 100574 | listener_init(l, t, s, vp, y); | |
| 283 | 100574 | ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l); | |
| 284 | 100574 | } | |
| 285 | |||
| 286 | 153 | static void ep_init_wpp(EntryPoint *next, const EntryPoint *ep, const VVCSPS *sps) | |
| 287 | { | ||
| 288 | 153 | memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state)); | |
| 289 | 153 | memcpy(next->pp, ep->pp, sizeof(next->pp)); | |
| 290 | 153 | ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag); | |
| 291 | 153 | } | |
| 292 | |||
| 293 | 53475 | static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t) | |
| 294 | { | ||
| 295 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 296 | 53475 | EntryPoint *ep = t->ep; | |
| 297 | 53475 | const VVCSPS *sps = fc->ps.sps; | |
| 298 | |||
| 299 |
2/2✓ Branch 0 taken 1436 times.
✓ Branch 1 taken 52039 times.
|
53475 | if (sps->r->sps_entropy_coding_sync_enabled_flag) { |
| 300 |
2/2✓ Branch 0 taken 212 times.
✓ Branch 1 taken 1224 times.
|
1436 | if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) { |
| 301 | 212 | EntryPoint *next = ep + 1; | |
| 302 |
4/4✓ Branch 0 taken 159 times.
✓ Branch 1 taken 53 times.
✓ Branch 3 taken 153 times.
✓ Branch 4 taken 6 times.
|
212 | if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) |
| 303 | 153 | ep_init_wpp(next, ep, sps); | |
| 304 | } | ||
| 305 |
4/4✓ Branch 0 taken 1075 times.
✓ Branch 1 taken 361 times.
✓ Branch 3 taken 1071 times.
✓ Branch 4 taken 4 times.
|
1436 | if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1)) |
| 306 | 1071 | frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE); | |
| 307 | } | ||
| 308 | |||
| 309 |
2/2✓ Branch 0 taken 51271 times.
✓ Branch 1 taken 2204 times.
|
53475 | if (t->ctu_idx + 1 < t->ep->ctu_end) { |
| 310 | 51271 | const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1]; | |
| 311 | 51271 | const int next_rx = next_rs % ft->ctu_width; | |
| 312 | 51271 | const int next_ry = next_rs / ft->ctu_width; | |
| 313 | 51271 | frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE); | |
| 314 | } | ||
| 315 | 53475 | } | |
| 316 | |||
| 317 | 53475 | static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs) | |
| 318 | { | ||
| 319 | 53475 | const VVCSH *sh = &sc->sh; | |
| 320 | |||
| 321 |
2/2✓ Branch 0 taken 47105 times.
✓ Branch 1 taken 6370 times.
|
53475 | if (!IS_I(sh->r)) { |
| 322 | 47105 | CTU *ctu = fc->tab.ctus + rs; | |
| 323 |
2/2✓ Branch 0 taken 94210 times.
✓ Branch 1 taken 47105 times.
|
141315 | for (int lx = 0; lx < 2; lx++) { |
| 324 |
2/2✓ Branch 0 taken 192208 times.
✓ Branch 1 taken 94210 times.
|
286418 | for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) { |
| 325 | 192208 | int y = ctu->max_y[lx][i]; | |
| 326 | 192208 | VVCRefPic *refp = sc->rpl[lx].refs + i; | |
| 327 | 192208 | VVCFrame *ref = refp->ref; | |
| 328 |
3/4✓ Branch 0 taken 192208 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 95117 times.
✓ Branch 3 taken 97091 times.
|
192208 | if (ref && y >= 0) { |
| 329 |
2/2✓ Branch 0 taken 214 times.
✓ Branch 1 taken 94903 times.
|
95117 | if (refp->is_scaled) |
| 330 | 214 | y = y * refp->scale[1] >> 14; | |
| 331 | 95117 | add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER); | |
| 332 | } | ||
| 333 | } | ||
| 334 | } | ||
| 335 | } | ||
| 336 | 53475 | } | |
| 337 | |||
| 338 | 53475 | static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry) | |
| 339 | { | ||
| 340 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 341 | 53475 | const int rs = ry * ft->ctu_width + rx; | |
| 342 | 53475 | const int slice_idx = fc->tab.slice_idx[rs]; | |
| 343 | 53475 | VVCTask *t = ft->tasks + rs; | |
| 344 | 53475 | const SliceContext *sc = fc->slices[slice_idx]; | |
| 345 | |||
| 346 | 53475 | schedule_next_parse(s, fc, sc, t); | |
| 347 | 53475 | schedule_inter(s, fc, sc, t, rs); | |
| 348 | 53475 | } | |
| 349 | |||
| 350 | 760531 | static void task_stage_done(const VVCTask *t, VVCContext *s) | |
| 351 | { | ||
| 352 | 760531 | VVCFrameContext *fc = t->fc; | |
| 353 | 760531 | VVCFrameThread *ft = fc->ft; | |
| 354 | 760531 | const VVCTaskStage stage = t->stage; | |
| 355 | |||
| 356 | #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage) | ||
| 357 | |||
| 358 | //this is a reserve map of ready_score, ordered by zigzag | ||
| 359 |
2/2✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 676146 times.
|
760531 | if (stage == VVC_TASK_STAGE_PARSE) { |
| 360 | 84385 | ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS); | |
| 361 | 84385 | ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS); | |
| 362 |
8/8✓ Branch 0 taken 77387 times.
✓ Branch 1 taken 6998 times.
✓ Branch 2 taken 70389 times.
✓ Branch 3 taken 6998 times.
✓ Branch 4 taken 61932 times.
✓ Branch 5 taken 8457 times.
✓ Branch 6 taken 8457 times.
✓ Branch 7 taken 53475 times.
|
84385 | if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height) |
| 363 | 30910 | return; | |
| 364 | 53475 | parse_task_done(s, fc, t->rx, t->ry); | |
| 365 |
2/2✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 591761 times.
|
676146 | } else if (stage == VVC_TASK_STAGE_RECON) { |
| 366 | 84385 | ADD(-1, 1, VVC_TASK_STAGE_RECON); | |
| 367 | 84385 | ADD( 1, 0, VVC_TASK_STAGE_RECON); | |
| 368 | 84385 | ADD(-1, -1, VVC_TASK_STAGE_LMCS); | |
| 369 | 84385 | ADD( 0, -1, VVC_TASK_STAGE_LMCS); | |
| 370 | 84385 | ADD(-1, 0, VVC_TASK_STAGE_LMCS); | |
| 371 |
2/2✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 507376 times.
|
591761 | } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) { |
| 372 | 84385 | ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V); | |
| 373 | 84385 | ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H); | |
| 374 |
2/2✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 422991 times.
|
507376 | } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) { |
| 375 | 84385 | ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H); | |
| 376 | 84385 | ADD(-1, -1, VVC_TASK_STAGE_SAO); | |
| 377 | 84385 | ADD( 0, -1, VVC_TASK_STAGE_SAO); | |
| 378 | 84385 | ADD(-1, 0, VVC_TASK_STAGE_SAO); | |
| 379 | 84385 | ADD( 1, -1, VVC_TASK_STAGE_SAO); | |
| 380 | 84385 | ADD( 1, 0, VVC_TASK_STAGE_SAO); | |
| 381 |
2/2✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 338606 times.
|
422991 | } else if (stage == VVC_TASK_STAGE_SAO) { |
| 382 | 84385 | ADD(-1, -1, VVC_TASK_STAGE_ALF); | |
| 383 | 84385 | ADD( 0, -1, VVC_TASK_STAGE_ALF); | |
| 384 | 84385 | ADD(-1, 0, VVC_TASK_STAGE_ALF); | |
| 385 | 84385 | ADD( 1, -1, VVC_TASK_STAGE_ALF); | |
| 386 | 84385 | ADD(-1, 1, VVC_TASK_STAGE_ALF); | |
| 387 | 84385 | ADD( 1, 0, VVC_TASK_STAGE_ALF); | |
| 388 | 84385 | ADD( 0, 1, VVC_TASK_STAGE_ALF); | |
| 389 | 84385 | ADD( 1, 1, VVC_TASK_STAGE_ALF); | |
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | 482341 | static int task_is_stage_ready(VVCTask *t, int add) | |
| 394 | { | ||
| 395 | 482341 | const VVCTaskStage stage = t->stage; | |
| 396 | uint8_t score; | ||
| 397 |
2/2✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 428866 times.
|
482341 | if (stage > VVC_TASK_STAGE_ALF) |
| 398 | 53475 | return 0; | |
| 399 | 428866 | score = task_get_score(t, stage) + add; | |
| 400 | 428866 | return task_has_target_score(t, stage, score); | |
| 401 | } | ||
| 402 | |||
| 403 | 53475 | static void check_colocation(VVCContext *s, VVCTask *t) | |
| 404 | { | ||
| 405 | 53475 | const VVCFrameContext *fc = t->fc; | |
| 406 | |||
| 407 |
4/4✓ Branch 0 taken 6474 times.
✓ Branch 1 taken 47001 times.
✓ Branch 2 taken 5894 times.
✓ Branch 3 taken 580 times.
|
53475 | if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) { |
| 408 | 52895 | VVCFrame *col = fc->ref->collocated_ref; | |
| 409 | 52895 | const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]; | |
| 410 |
4/4✓ Branch 0 taken 47001 times.
✓ Branch 1 taken 5894 times.
✓ Branch 2 taken 5457 times.
✓ Branch 3 taken 41544 times.
|
52895 | if (col && first_col) { |
| 411 | //we depend on bottom and right boundary, do not - 1 for y | ||
| 412 | 5457 | const int y = (t->ry << fc->ps.sps->ctb_log2_size_y); | |
| 413 | 5457 | add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y); | |
| 414 | 5457 | return; | |
| 415 | } | ||
| 416 | } | ||
| 417 | 48018 | frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE); | |
| 418 | } | ||
| 419 | |||
| 420 | 2204 | static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep) | |
| 421 | { | ||
| 422 | 2204 | const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start]; | |
| 423 | 2204 | VVCTask *t = ft->tasks + rs; | |
| 424 | |||
| 425 | 2204 | frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE); | |
| 426 | 2204 | } | |
| 427 | |||
| 428 | 1066 | static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 429 | { | ||
| 430 | 1066 | VVCFrameContext *fc = lc->fc; | |
| 431 | 1066 | VVCFrameThread *ft = fc->ft; | |
| 432 | 1066 | const int ret = ff_vvc_per_frame_init(fc); | |
| 433 | |||
| 434 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1066 times.
|
1066 | if (ret < 0) |
| 435 | ✗ | return ret; | |
| 436 | |||
| 437 |
2/2✓ Branch 0 taken 1831 times.
✓ Branch 1 taken 1066 times.
|
2897 | for (int i = 0; i < fc->nb_slices; i++) { |
| 438 | 1831 | SliceContext *sc = fc->slices[i]; | |
| 439 |
2/2✓ Branch 0 taken 2204 times.
✓ Branch 1 taken 1831 times.
|
4035 | for (int j = 0; j < sc->nb_eps; j++) { |
| 440 | 2204 | EntryPoint *ep = sc->eps + j; | |
| 441 |
2/2✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 2204 times.
|
55679 | for (int k = ep->ctu_start; k < ep->ctu_end; k++) { |
| 442 | 53475 | const int rs = sc->sh.ctb_addr_in_curr_slice[k]; | |
| 443 | 53475 | VVCTask *t = ft->tasks + rs; | |
| 444 | 53475 | check_colocation(s, t); | |
| 445 | } | ||
| 446 | 2204 | submit_entry_point(s, ft, sc, ep); | |
| 447 | } | ||
| 448 | } | ||
| 449 | 1066 | return 0; | |
| 450 | } | ||
| 451 | |||
| 452 | 106950 | static void report_frame_progress(VVCFrameContext *fc, | |
| 453 | const int ry, const VVCProgress idx) | ||
| 454 | { | ||
| 455 | 106950 | VVCFrameThread *ft = fc->ft; | |
| 456 | 106950 | const int ctu_size = ft->ctu_size; | |
| 457 | int old; | ||
| 458 | |||
| 459 |
2/2✓ Branch 0 taken 9732 times.
✓ Branch 1 taken 97218 times.
|
106950 | if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) { |
| 460 | int y; | ||
| 461 | 9732 | ff_mutex_lock(&ft->lock); | |
| 462 | 9732 | y = old = ft->row_progress[idx]; | |
| 463 |
4/4✓ Branch 0 taken 17332 times.
✓ Branch 1 taken 2132 times.
✓ Branch 2 taken 9732 times.
✓ Branch 3 taken 7600 times.
|
19464 | while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width) |
| 464 | 9732 | y++; | |
| 465 |
2/2✓ Branch 0 taken 9659 times.
✓ Branch 1 taken 73 times.
|
9732 | if (old != y) |
| 466 | 9659 | ft->row_progress[idx] = y; | |
| 467 | // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock | ||
| 468 | // We need to unlock ft->lock first | ||
| 469 | 9732 | ff_mutex_unlock(&ft->lock); | |
| 470 | |||
| 471 |
2/2✓ Branch 0 taken 9659 times.
✓ Branch 1 taken 73 times.
|
9732 | if (old != y) { |
| 472 |
2/2✓ Branch 0 taken 7527 times.
✓ Branch 1 taken 2132 times.
|
9659 | const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size; |
| 473 | 9659 | ff_vvc_report_progress(fc->ref, idx, progress); | |
| 474 | } | ||
| 475 | } | ||
| 476 | 106950 | } | |
| 477 | |||
| 478 | 53475 | static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 479 | { | ||
| 480 | int ret; | ||
| 481 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 482 | 53475 | const int rs = t->rs; | |
| 483 | 53475 | const CTU *ctu = fc->tab.ctus + rs; | |
| 484 | |||
| 485 | 53475 | lc->ep = t->ep; | |
| 486 | |||
| 487 | 53475 | ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry); | |
| 488 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
|
53475 | if (ret < 0) |
| 489 | ✗ | return ret; | |
| 490 | |||
| 491 |
2/2✓ Branch 0 taken 30464 times.
✓ Branch 1 taken 23011 times.
|
53475 | if (!ctu->has_dmvr) |
| 492 | 30464 | report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV); | |
| 493 | |||
| 494 | 53475 | return 0; | |
| 495 | } | ||
| 496 | |||
| 497 | 53475 | static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 498 | { | ||
| 499 |
2/2✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
|
53475 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) |
| 500 | 53471 | ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs); | |
| 501 | |||
| 502 | 53475 | return 0; | |
| 503 | } | ||
| 504 | |||
| 505 | 53475 | static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 506 | { | ||
| 507 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 508 | 53475 | const CTU *ctu = fc->tab.ctus + t->rs; | |
| 509 | int ret; | ||
| 510 | |||
| 511 | 53475 | ret = ff_vvc_predict_inter(lc, t->rs); | |
| 512 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
|
53475 | if (ret < 0) |
| 513 | ✗ | return ret; | |
| 514 | |||
| 515 |
2/2✓ Branch 0 taken 23011 times.
✓ Branch 1 taken 30464 times.
|
53475 | if (ctu->has_dmvr) |
| 516 | 23011 | report_frame_progress(fc, t->ry, VVC_PROGRESS_MV); | |
| 517 | |||
| 518 | 53475 | return 0; | |
| 519 | } | ||
| 520 | |||
| 521 | 53475 | static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 522 | { | ||
| 523 | 53475 | return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry); | |
| 524 | } | ||
| 525 | |||
| 526 | 53475 | static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 527 | { | ||
| 528 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 529 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 530 | 53475 | const int ctu_size = ft->ctu_size; | |
| 531 | 53475 | const int x0 = t->rx * ctu_size; | |
| 532 | 53475 | const int y0 = t->ry * ctu_size; | |
| 533 | |||
| 534 | 53475 | ff_vvc_lmcs_filter(lc, x0, y0); | |
| 535 | |||
| 536 | 53475 | return 0; | |
| 537 | } | ||
| 538 | |||
| 539 | 53475 | static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 540 | { | ||
| 541 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 542 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 543 | 53475 | const int ctb_size = ft->ctu_size; | |
| 544 | 53475 | const int x0 = t->rx * ctb_size; | |
| 545 | 53475 | const int y0 = t->ry * ctb_size; | |
| 546 | |||
| 547 |
2/2✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
|
53475 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) { |
| 548 | 53471 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
| 549 | 53471 | ff_vvc_deblock_vertical(lc, x0, y0, t->rs); | |
| 550 | } | ||
| 551 | |||
| 552 | 53475 | return 0; | |
| 553 | } | ||
| 554 | |||
| 555 | 53475 | static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 556 | { | ||
| 557 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 558 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 559 | 53475 | const int ctb_size = ft->ctu_size; | |
| 560 | 53475 | const int x0 = t->rx * ctb_size; | |
| 561 | 53475 | const int y0 = t->ry * ctb_size; | |
| 562 | |||
| 563 |
2/2✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
|
53475 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) { |
| 564 | 53471 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
| 565 | 53471 | ff_vvc_deblock_horizontal(lc, x0, y0, t->rs); | |
| 566 | } | ||
| 567 |
2/2✓ Branch 0 taken 53055 times.
✓ Branch 1 taken 420 times.
|
53475 | if (fc->ps.sps->r->sps_sao_enabled_flag) |
| 568 | 53055 | ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1); | |
| 569 | |||
| 570 | 53475 | return 0; | |
| 571 | } | ||
| 572 | |||
| 573 | 53475 | static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 574 | { | ||
| 575 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 576 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 577 | 53475 | const int ctb_size = ft->ctu_size; | |
| 578 | 53475 | const int x0 = t->rx * ctb_size; | |
| 579 | 53475 | const int y0 = t->ry * ctb_size; | |
| 580 | |||
| 581 |
2/2✓ Branch 0 taken 53055 times.
✓ Branch 1 taken 420 times.
|
53475 | if (fc->ps.sps->r->sps_sao_enabled_flag) { |
| 582 | 53055 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
| 583 | 53055 | ff_vvc_sao_filter(lc, x0, y0); | |
| 584 | } | ||
| 585 | |||
| 586 |
2/2✓ Branch 0 taken 43437 times.
✓ Branch 1 taken 10038 times.
|
53475 | if (fc->ps.sps->r->sps_alf_enabled_flag) |
| 587 | 43437 | ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0); | |
| 588 | |||
| 589 | 53475 | return 0; | |
| 590 | } | ||
| 591 | |||
| 592 | 53475 | static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
| 593 | { | ||
| 594 | 53475 | VVCFrameContext *fc = lc->fc; | |
| 595 | 53475 | VVCFrameThread *ft = fc->ft; | |
| 596 | 53475 | const int ctu_size = ft->ctu_size; | |
| 597 | 53475 | const int x0 = t->rx * ctu_size; | |
| 598 | 53475 | const int y0 = t->ry * ctu_size; | |
| 599 | |||
| 600 |
2/2✓ Branch 0 taken 43437 times.
✓ Branch 1 taken 10038 times.
|
53475 | if (fc->ps.sps->r->sps_alf_enabled_flag) { |
| 601 | 43437 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
| 602 | 43437 | ff_vvc_alf_filter(lc, x0, y0); | |
| 603 | } | ||
| 604 | 53475 | report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL); | |
| 605 | |||
| 606 | 53475 | return 0; | |
| 607 | } | ||
| 608 | |||
| 609 | const static char* task_name[] = { | ||
| 610 | "INIT", | ||
| 611 | "P", | ||
| 612 | "B", | ||
| 613 | "I", | ||
| 614 | "R", | ||
| 615 | "L", | ||
| 616 | "V", | ||
| 617 | "H", | ||
| 618 | "S", | ||
| 619 | "A" | ||
| 620 | }; | ||
| 621 | |||
| 622 | typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t); | ||
| 623 | |||
| 624 | 482341 | static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc) | |
| 625 | { | ||
| 626 | int ret; | ||
| 627 | 482341 | VVCFrameContext *fc = t->fc; | |
| 628 | 482341 | VVCFrameThread *ft = fc->ft; | |
| 629 | 482341 | const VVCTaskStage stage = t->stage; | |
| 630 | static const run_func run[] = { | ||
| 631 | run_init, | ||
| 632 | run_parse, | ||
| 633 | run_deblock_bs, | ||
| 634 | run_inter, | ||
| 635 | run_recon, | ||
| 636 | run_lmcs, | ||
| 637 | run_deblock_v, | ||
| 638 | run_deblock_h, | ||
| 639 | run_sao, | ||
| 640 | run_alf, | ||
| 641 | }; | ||
| 642 | |||
| 643 | ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry); | ||
| 644 | |||
| 645 | 482341 | lc->sc = t->sc; | |
| 646 | |||
| 647 |
1/2✓ Branch 0 taken 482341 times.
✗ Branch 1 not taken.
|
482341 | if (!atomic_load(&ft->ret)) { |
| 648 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 482341 times.
|
482341 | if ((ret = run[stage](s, lc, t)) < 0) { |
| 649 | #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H | ||
| 650 | intptr_t zero = 0; | ||
| 651 | #else | ||
| 652 | ✗ | int zero = 0; | |
| 653 | #endif | ||
| 654 | ✗ | atomic_compare_exchange_strong(&ft->ret, &zero, ret); | |
| 655 | ✗ | av_log(s->avctx, AV_LOG_ERROR, | |
| 656 | "frame %5d, %s(%3d, %3d) failed with %d\r\n", | ||
| 657 | ✗ | (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret); | |
| 658 | } | ||
| 659 |
1/2✓ Branch 0 taken 482341 times.
✗ Branch 1 not taken.
|
482341 | if (!ret) |
| 660 | 482341 | task_stage_done(t, s); | |
| 661 | } | ||
| 662 | 482341 | return; | |
| 663 | } | ||
| 664 | |||
| 665 | 266474 | static int task_run(FFTask *_t, void *local_context, void *user_data) | |
| 666 | { | ||
| 667 | 266474 | VVCTask *t = (VVCTask*)_t; | |
| 668 | 266474 | VVCContext *s = (VVCContext *)user_data; | |
| 669 | 266474 | VVCLocalContext *lc = local_context; | |
| 670 | 266474 | VVCFrameThread *ft = t->fc->ft; | |
| 671 | |||
| 672 | 266474 | lc->fc = t->fc; | |
| 673 | |||
| 674 | do { | ||
| 675 | 482341 | task_run_stage(t, s, lc); | |
| 676 | 482341 | t->stage++; | |
| 677 |
2/2✓ Branch 1 taken 215867 times.
✓ Branch 2 taken 266474 times.
|
482341 | } while (task_is_stage_ready(t, 1)); |
| 678 | |||
| 679 |
2/2✓ Branch 0 taken 212999 times.
✓ Branch 1 taken 53475 times.
|
266474 | if (t->stage != VVC_TASK_STAGE_LAST) |
| 680 | 212999 | frame_thread_add_score(s, ft, t->rx, t->ry, t->stage); | |
| 681 | |||
| 682 | 266474 | sheduled_done(ft, &ft->nb_scheduled_tasks); | |
| 683 | |||
| 684 | 266474 | return 0; | |
| 685 | } | ||
| 686 | |||
| 687 | 94 | av_cold FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count) | |
| 688 | { | ||
| 689 | 94 | FFTaskCallbacks callbacks = { | |
| 690 | s, | ||
| 691 | sizeof(VVCLocalContext), | ||
| 692 | PRIORITY_LOWEST + 1, | ||
| 693 | task_run, | ||
| 694 | }; | ||
| 695 | 94 | return ff_executor_alloc(&callbacks, thread_count); | |
| 696 | } | ||
| 697 | |||
| 698 | 94 | av_cold void ff_vvc_executor_free(FFExecutor **e) | |
| 699 | { | ||
| 700 | 94 | ff_executor_free(e); | |
| 701 | 94 | } | |
| 702 | |||
| 703 | 1038 | void ff_vvc_frame_thread_free(VVCFrameContext *fc) | |
| 704 | { | ||
| 705 | 1038 | VVCFrameThread *ft = fc->ft; | |
| 706 | |||
| 707 |
2/2✓ Branch 0 taken 752 times.
✓ Branch 1 taken 286 times.
|
1038 | if (!ft) |
| 708 | 752 | return; | |
| 709 | |||
| 710 | 286 | ff_mutex_destroy(&ft->lock); | |
| 711 | 286 | ff_cond_destroy(&ft->cond); | |
| 712 | 286 | av_freep(&ft->rows); | |
| 713 | 286 | av_freep(&ft->tasks); | |
| 714 | 286 | av_freep(&ft); | |
| 715 | } | ||
| 716 | |||
| 717 | 1066 | static void frame_thread_init_score(VVCFrameContext *fc) | |
| 718 | { | ||
| 719 | 1066 | const VVCFrameThread *ft = fc->ft; | |
| 720 | VVCTask task; | ||
| 721 | |||
| 722 | 1066 | task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0); | |
| 723 | |||
| 724 |
2/2✓ Branch 0 taken 9594 times.
✓ Branch 1 taken 1066 times.
|
10660 | for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) { |
| 725 | 9594 | task.stage = i; | |
| 726 | |||
| 727 |
2/2✓ Branch 0 taken 95301 times.
✓ Branch 1 taken 9594 times.
|
104895 | for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) { |
| 728 | 95301 | task.ry = -1; //top | |
| 729 | 95301 | task_stage_done(&task, NULL); | |
| 730 | 95301 | task.ry = ft->ctu_height; //bottom | |
| 731 | 95301 | task_stage_done(&task, NULL); | |
| 732 | } | ||
| 733 | |||
| 734 |
2/2✓ Branch 0 taken 43794 times.
✓ Branch 1 taken 9594 times.
|
53388 | for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) { |
| 735 | 43794 | task.rx = -1; //left | |
| 736 | 43794 | task_stage_done(&task, NULL); | |
| 737 | 43794 | task.rx = ft->ctu_width; //right | |
| 738 | 43794 | task_stage_done(&task, NULL); | |
| 739 | } | ||
| 740 | } | ||
| 741 | 1066 | } | |
| 742 | |||
| 743 | 1066 | int ff_vvc_frame_thread_init(VVCFrameContext *fc) | |
| 744 | { | ||
| 745 | 1066 | const VVCSPS *sps = fc->ps.sps; | |
| 746 | 1066 | const VVCPPS *pps = fc->ps.pps; | |
| 747 | 1066 | VVCFrameThread *ft = fc->ft; | |
| 748 | int ret; | ||
| 749 | |||
| 750 |
3/4✓ Branch 0 taken 780 times.
✓ Branch 1 taken 286 times.
✓ Branch 2 taken 780 times.
✗ Branch 3 not taken.
|
1066 | if (!ft || ft->ctu_width != pps->ctb_width || |
| 751 |
1/2✓ Branch 0 taken 780 times.
✗ Branch 1 not taken.
|
780 | ft->ctu_height != pps->ctb_height || |
| 752 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 780 times.
|
780 | ft->ctu_size != sps->ctb_size_y) { |
| 753 | |||
| 754 | 286 | ff_vvc_frame_thread_free(fc); | |
| 755 | 286 | ft = av_calloc(1, sizeof(*fc->ft)); | |
| 756 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
|
286 | if (!ft) |
| 757 | ✗ | return AVERROR(ENOMEM); | |
| 758 | |||
| 759 | 286 | ft->ctu_width = fc->ps.pps->ctb_width; | |
| 760 | 286 | ft->ctu_height = fc->ps.pps->ctb_height; | |
| 761 | 286 | ft->ctu_count = fc->ps.pps->ctb_count; | |
| 762 | 286 | ft->ctu_size = fc->ps.sps->ctb_size_y; | |
| 763 | |||
| 764 | 286 | ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows)); | |
| 765 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
|
286 | if (!ft->rows) |
| 766 | ✗ | goto fail; | |
| 767 | |||
| 768 | 286 | ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks)); | |
| 769 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
|
286 | if (!ft->tasks) |
| 770 | ✗ | goto fail; | |
| 771 | |||
| 772 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 286 times.
|
286 | if ((ret = ff_cond_init(&ft->cond, NULL))) |
| 773 | ✗ | goto fail; | |
| 774 | |||
| 775 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 286 times.
|
286 | if ((ret = ff_mutex_init(&ft->lock, NULL))) { |
| 776 | ✗ | ff_cond_destroy(&ft->cond); | |
| 777 | ✗ | goto fail; | |
| 778 | } | ||
| 779 | } | ||
| 780 | 1066 | fc->ft = ft; | |
| 781 | 1066 | ft->ret = 0; | |
| 782 |
2/2✓ Branch 0 taken 4866 times.
✓ Branch 1 taken 1066 times.
|
5932 | for (int y = 0; y < ft->ctu_height; y++) { |
| 783 | 4866 | VVCRowThread *row = ft->rows + y; | |
| 784 | 4866 | memset(row->col_progress, 0, sizeof(row->col_progress)); | |
| 785 | } | ||
| 786 | |||
| 787 |
2/2✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 1066 times.
|
54541 | for (int rs = 0; rs < ft->ctu_count; rs++) { |
| 788 | 53475 | VVCTask *t = ft->tasks + rs; | |
| 789 | 53475 | task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width); | |
| 790 | } | ||
| 791 | |||
| 792 | 1066 | memset(&ft->row_progress[0], 0, sizeof(ft->row_progress)); | |
| 793 | |||
| 794 | 1066 | frame_thread_init_score(fc); | |
| 795 | |||
| 796 | 1066 | return 0; | |
| 797 | |||
| 798 | ✗ | fail: | |
| 799 | ✗ | if (ft) { | |
| 800 | ✗ | av_freep(&ft->rows); | |
| 801 | ✗ | av_freep(&ft->tasks); | |
| 802 | ✗ | av_freep(&ft); | |
| 803 | } | ||
| 804 | |||
| 805 | ✗ | return AVERROR(ENOMEM); | |
| 806 | } | ||
| 807 | |||
| 808 | 1066 | int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc) | |
| 809 | { | ||
| 810 | 1066 | VVCFrameThread *ft = fc->ft; | |
| 811 | |||
| 812 |
2/2✓ Branch 0 taken 1831 times.
✓ Branch 1 taken 1066 times.
|
2897 | for (int i = 0; i < fc->nb_slices; i++) { |
| 813 | 1831 | SliceContext *sc = fc->slices[i]; | |
| 814 |
2/2✓ Branch 0 taken 2204 times.
✓ Branch 1 taken 1831 times.
|
4035 | for (int j = 0; j < sc->nb_eps; j++) { |
| 815 | 2204 | EntryPoint *ep = sc->eps + j; | |
| 816 |
2/2✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 2204 times.
|
55679 | for (int k = ep->ctu_start; k < ep->ctu_end; k++) { |
| 817 | 53475 | const int rs = sc->sh.ctb_addr_in_curr_slice[k]; | |
| 818 | 53475 | VVCTask *t = ft->tasks + rs; | |
| 819 | 53475 | const int ret = task_init_parse(t, sc, ep, k); | |
| 820 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
|
53475 | if (ret < 0) |
| 821 | ✗ | return ret; | |
| 822 | } | ||
| 823 | } | ||
| 824 | } | ||
| 825 |
2/2✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 1066 times.
|
54541 | for (int rs = 0; rs < ft->ctu_count; rs++) { |
| 826 | 53475 | const VVCTask *t = ft->tasks + rs; | |
| 827 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
|
53475 | if (!t->sc) { |
| 828 | ✗ | av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry); | |
| 829 | ✗ | return AVERROR_INVALIDDATA; | |
| 830 | } | ||
| 831 | } | ||
| 832 | 1066 | frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT); | |
| 833 | |||
| 834 | 1066 | return 0; | |
| 835 | } | ||
| 836 | |||
| 837 | 1066 | int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc) | |
| 838 | { | ||
| 839 | 1066 | VVCFrameThread *ft = fc->ft; | |
| 840 | |||
| 841 | 1066 | ff_mutex_lock(&ft->lock); | |
| 842 | |||
| 843 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 1066 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1066 times.
|
1066 | while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners)) |
| 844 | ✗ | ff_cond_wait(&ft->cond, &ft->lock); | |
| 845 | |||
| 846 | 1066 | ff_mutex_unlock(&ft->lock); | |
| 847 | 1066 | ff_vvc_report_frame_finished(fc->ref); | |
| 848 | |||
| 849 | ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order); | ||
| 850 | 1066 | return ft->ret; | |
| 851 | } | ||
| 852 |