FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2026-05-01 22:15:40
Exec Total Coverage
Lines: 411 435 94.5%
Functions: 42 42 100.0%
Branches: 167 192 87.0%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavcodec/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
44 VVC_TASK_STAGE_PARSE,
45 VVC_TASK_STAGE_DEBLOCK_BS,
46 VVC_TASK_STAGE_INTER,
47 VVC_TASK_STAGE_RECON,
48 VVC_TASK_STAGE_LMCS,
49 VVC_TASK_STAGE_DEBLOCK_V,
50 VVC_TASK_STAGE_DEBLOCK_H,
51 VVC_TASK_STAGE_SAO,
52 VVC_TASK_STAGE_ALF,
53 VVC_TASK_STAGE_LAST
54 } VVCTaskStage;
55
56 typedef struct VVCTask {
57 union {
58 struct VVCTask *next; //for executor debug only
59 FFTask task;
60 } u;
61
62 VVCTaskStage stage;
63
64 // ctu x, y, and raster scan order
65 int rx, ry, rs;
66 VVCFrameContext *fc;
67
68 ProgressListener col_listener;
69 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71 // for parse task only
72 SliceContext *sc;
73 EntryPoint *ep;
74 int ctu_idx; //ctu idx in the current slice
75
76 // tasks with target scores met are ready for scheduling
77 atomic_uchar score[VVC_TASK_STAGE_LAST];
78 atomic_uchar target_inter_score;
79 } VVCTask;
80
81 typedef struct VVCRowThread {
82 atomic_int col_progress[VVC_PROGRESS_LAST];
83 } VVCRowThread;
84
85 typedef struct VVCFrameThread {
86 // error return for tasks
87 atomic_int ret;
88
89 VVCRowThread *rows;
90 VVCTask *tasks;
91
92 int ctu_size;
93 int ctu_width;
94 int ctu_height;
95 int ctu_count;
96
97 //protected by lock
98 atomic_int nb_scheduled_tasks;
99 atomic_int nb_scheduled_listeners;
100
101 int row_progress[VVC_PROGRESS_LAST];
102
103 AVMutex lock;
104 AVCond cond;
105 } VVCFrameThread;
106
107 #define PRIORITY_LOWEST 2
108 279394 static void add_task(VVCContext *s, VVCTask *t)
109 {
110 279394 VVCFrameThread *ft = t->fc->ft;
111 279394 FFTask *task = &t->u.task;
112 279394 const int priorities[] = {
113 0, // VVC_TASK_STAGE_INIT,
114 0, // VVC_TASK_STAGE_PARSE,
115 1, // VVC_TASK_STAGE_DEBLOCK_BS
116 // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117 // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118 PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER
119 1, // VVC_TASK_STAGE_RECON,
120 1, // VVC_TASK_STAGE_LMCS,
121 1, // VVC_TASK_STAGE_DEBLOCK_V,
122 1, // VVC_TASK_STAGE_DEBLOCK_H,
123 1, // VVC_TASK_STAGE_SAO,
124 1, // VVC_TASK_STAGE_ALF,
125 };
126
127 279394 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128 279394 task->priority = priorities[t->stage];
129 279394 ff_executor_execute(s->executor, task);
130 279394 }
131
132 56887 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133 {
134 56887 memset(t, 0, sizeof(*t));
135 56887 t->stage = stage;
136 56887 t->fc = fc;
137 56887 t->rx = rx;
138 56887 t->ry = ry;
139 56887 t->rs = ry * fc->ft->ctu_width + rx;
140
2/2
✓ Branch 0 taken 568870 times.
✓ Branch 1 taken 56887 times.
625757 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141 568870 atomic_store(t->score + i, 0);
142 56887 atomic_store(&t->target_inter_score, 0);
143 56887 }
144
145 55785 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146 {
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55785 times.
55785 if (t->sc) {
148 // the task already inited, error bitstream
149 return AVERROR_INVALIDDATA;
150 }
151 55785 t->sc = sc;
152 55785 t->ep = ep;
153 55785 t->ctu_idx = ctu_idx;
154
155 55785 return 0;
156 }
157
158 1717948 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159 {
160 1717948 return atomic_fetch_add(&t->score[stage], 1) + 1;
161 }
162
163 447382 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164 {
165 447382 return atomic_load(&t->score[stage]);
166 }
167
168 //first row in tile or slice
169 5230 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170 {
171 5230 const VVCFrameThread *ft = fc->ft;
172 5230 const VVCPPS *pps = fc->ps.pps;
173
174
2/2
✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 793 times.
5230 if (ry != pps->ctb_to_row_bd[ry]) {
175 4437 const int rs = ry * ft->ctu_width + rx;
176 4437 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177 }
178 793 return 1;
179 }
180
181 2165330 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182 {
183 // l:left, r:right, t: top, b: bottom
184 static const uint8_t target_score[] =
185 {
186 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187 0, //VVC_TASK_STAGE_INTER, not used
188 2, //VVC_TASK_STAGE_RECON, need l + rt recon
189 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
190 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
193 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
194 };
195 2165330 uint8_t target = 0;
196 2165330 VVCFrameContext *fc = t->fc;
197
198
2/2
✓ Branch 0 taken 1102 times.
✓ Branch 1 taken 2164228 times.
2165330 if (stage == VVC_TASK_STAGE_INIT)
199 1102 return 1;
200
201
2/2
✓ Branch 0 taken 113743 times.
✓ Branch 1 taken 2050485 times.
2164228 if (stage == VVC_TASK_STAGE_PARSE) {
202 113743 const H266RawSPS *rsps = fc->ps.sps->r;
203
4/4
✓ Branch 0 taken 3996 times.
✓ Branch 1 taken 109747 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 783 times.
113743 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204 113743 const int no_prev_stage = t->rs > 0;
205 113743 target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
206
2/2
✓ Branch 0 taken 153326 times.
✓ Branch 1 taken 1897159 times.
2050485 } else if (stage == VVC_TASK_STAGE_INTER) {
207 153326 target = atomic_load(&t->target_inter_score);
208 } else {
209 1897159 target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210 }
211
212 //+1 for previous stage
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2164228 times.
2164228 av_assert0(score <= target + 1);
214 2164228 return score == target + 1;
215 }
216
217 2459422 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218 const int rx, const int ry, const VVCTaskStage stage)
219 {
220 2459422 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221 uint8_t score;
222
223
8/8
✓ Branch 0 taken 2284414 times.
✓ Branch 1 taken 175008 times.
✓ Branch 2 taken 2123990 times.
✓ Branch 3 taken 160424 times.
✓ Branch 4 taken 1903315 times.
✓ Branch 5 taken 220675 times.
✓ Branch 6 taken 185367 times.
✓ Branch 7 taken 1717948 times.
2459422 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224 741474 return;
225
226 1717948 score = task_add_score(t, stage);
227
2/2
✓ Branch 1 taken 279394 times.
✓ Branch 2 taken 1438554 times.
1717948 if (task_has_target_score(t, stage, score)) {
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 279394 times.
279394 av_assert0(s);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 279394 times.
279394 av_assert0(stage == t->stage);
230 279394 add_task(s, t);
231 }
232 }
233
234 382584 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235 {
236
2/2
✓ Branch 0 taken 104292 times.
✓ Branch 1 taken 278292 times.
382584 if (atomic_fetch_sub(scheduled, 1) == 1) {
237 104292 ff_mutex_lock(&ft->lock);
238 104292 ff_cond_signal(&ft->cond);
239 104292 ff_mutex_unlock(&ft->lock);
240 }
241 382584 }
242
243 103190 static void progress_done(VVCProgressListener *_l, const int type)
244 {
245 103190 const ProgressListener *l = (ProgressListener *)_l;
246 103190 const VVCTask *t = l->task;
247 103190 VVCFrameThread *ft = t->fc->ft;
248
249 103190 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250 103190 sheduled_done(ft, &ft->nb_scheduled_listeners);
251 103190 }
252
253 97541 static void pixel_done(VVCProgressListener *l)
254 {
255 97541 progress_done(l, VVC_TASK_STAGE_INTER);
256 97541 }
257
258 5649 static void mv_done(VVCProgressListener *l)
259 {
260 5649 progress_done(l, VVC_TASK_STAGE_PARSE);
261 5649 }
262
263 103190 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264 {
265 103190 const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267 103190 l->task = t;
268 103190 l->s = s;
269 103190 l->l.vp = vp;
270 103190 l->l.y = y;
271
2/2
✓ Branch 0 taken 97541 times.
✓ Branch 1 taken 5649 times.
103190 l->l.progress_done = is_inter ? pixel_done : mv_done;
272
2/2
✓ Branch 0 taken 97541 times.
✓ Branch 1 taken 5649 times.
103190 if (is_inter)
273 97541 atomic_fetch_add(&t->target_inter_score, 1);
274 103190 }
275
276 103190 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278 {
279 103190 VVCFrameThread *ft = t->fc->ft;
280
281 103190 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282 103190 listener_init(l, t, s, vp, y);
283 103190 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284 103190 }
285
286 153 static void ep_init_wpp(EntryPoint *next, const EntryPoint *ep, const VVCSPS *sps)
287 {
288 153 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
289 153 memcpy(next->pp, ep->pp, sizeof(next->pp));
290 153 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
291 153 }
292
293 55785 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
294 {
295 55785 VVCFrameThread *ft = fc->ft;
296 55785 EntryPoint *ep = t->ep;
297 55785 const VVCSPS *sps = fc->ps.sps;
298
299
2/2
✓ Branch 0 taken 1436 times.
✓ Branch 1 taken 54349 times.
55785 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
300
2/2
✓ Branch 0 taken 212 times.
✓ Branch 1 taken 1224 times.
1436 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
301 212 EntryPoint *next = ep + 1;
302
4/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 53 times.
✓ Branch 3 taken 153 times.
✓ Branch 4 taken 6 times.
212 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1))
303 153 ep_init_wpp(next, ep, sps);
304 }
305
4/4
✓ Branch 0 taken 1075 times.
✓ Branch 1 taken 361 times.
✓ Branch 3 taken 1071 times.
✓ Branch 4 taken 4 times.
1436 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
306 1071 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
307 }
308
309
2/2
✓ Branch 0 taken 53511 times.
✓ Branch 1 taken 2274 times.
55785 if (t->ctu_idx + 1 < t->ep->ctu_end) {
310 53511 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
311 53511 const int next_rx = next_rs % ft->ctu_width;
312 53511 const int next_ry = next_rs / ft->ctu_width;
313 53511 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
314 }
315 55785 }
316
317 55785 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
318 {
319 55785 const VVCSH *sh = &sc->sh;
320
321
2/2
✓ Branch 0 taken 48845 times.
✓ Branch 1 taken 6940 times.
55785 if (!IS_I(sh->r)) {
322 48845 CTU *ctu = fc->tab.ctus + rs;
323
2/2
✓ Branch 0 taken 97690 times.
✓ Branch 1 taken 48845 times.
146535 for (int lx = 0; lx < 2; lx++) {
324
2/2
✓ Branch 0 taken 195448 times.
✓ Branch 1 taken 97690 times.
293138 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
325 195448 int y = ctu->max_y[lx][i];
326 195448 VVCRefPic *refp = sc->rpl[lx].refs + i;
327 195448 VVCFrame *ref = refp->ref;
328
3/4
✓ Branch 0 taken 195448 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 97541 times.
✓ Branch 3 taken 97907 times.
195448 if (ref && y >= 0) {
329
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 97327 times.
97541 if (refp->is_scaled)
330 214 y = y * refp->scale[1] >> 14;
331 97541 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
332 }
333 }
334 }
335 }
336 55785 }
337
338 55785 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
339 {
340 55785 VVCFrameThread *ft = fc->ft;
341 55785 const int rs = ry * ft->ctu_width + rx;
342 55785 const int slice_idx = fc->tab.slice_idx[rs];
343 55785 VVCTask *t = ft->tasks + rs;
344 55785 const SliceContext *sc = fc->slices[slice_idx];
345
346 55785 schedule_next_parse(s, fc, sc, t);
347 55785 schedule_inter(s, fc, sc, t, rs);
348 55785 }
349
350 793309 static void task_stage_done(const VVCTask *t, VVCContext *s)
351 {
352 793309 VVCFrameContext *fc = t->fc;
353 793309 VVCFrameThread *ft = fc->ft;
354 793309 const VVCTaskStage stage = t->stage;
355
356 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
357
358 //this is a reserve map of ready_score, ordered by zigzag
359
2/2
✓ Branch 0 taken 88023 times.
✓ Branch 1 taken 705286 times.
793309 if (stage == VVC_TASK_STAGE_PARSE) {
360 88023 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS);
361 88023 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS);
362
8/8
✓ Branch 0 taken 80731 times.
✓ Branch 1 taken 7292 times.
✓ Branch 2 taken 73439 times.
✓ Branch 3 taken 7292 times.
✓ Branch 4 taken 64612 times.
✓ Branch 5 taken 8827 times.
✓ Branch 6 taken 8827 times.
✓ Branch 7 taken 55785 times.
88023 if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
363 32238 return;
364 55785 parse_task_done(s, fc, t->rx, t->ry);
365
2/2
✓ Branch 0 taken 88023 times.
✓ Branch 1 taken 617263 times.
705286 } else if (stage == VVC_TASK_STAGE_RECON) {
366 88023 ADD(-1, 1, VVC_TASK_STAGE_RECON);
367 88023 ADD( 1, 0, VVC_TASK_STAGE_RECON);
368 88023 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
369 88023 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
370 88023 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
371
2/2
✓ Branch 0 taken 88023 times.
✓ Branch 1 taken 529240 times.
617263 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
372 88023 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
373 88023 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
374
2/2
✓ Branch 0 taken 88023 times.
✓ Branch 1 taken 441217 times.
529240 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
375 88023 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
376 88023 ADD(-1, -1, VVC_TASK_STAGE_SAO);
377 88023 ADD( 0, -1, VVC_TASK_STAGE_SAO);
378 88023 ADD(-1, 0, VVC_TASK_STAGE_SAO);
379 88023 ADD( 1, -1, VVC_TASK_STAGE_SAO);
380 88023 ADD( 1, 0, VVC_TASK_STAGE_SAO);
381
2/2
✓ Branch 0 taken 88023 times.
✓ Branch 1 taken 353194 times.
441217 } else if (stage == VVC_TASK_STAGE_SAO) {
382 88023 ADD(-1, -1, VVC_TASK_STAGE_ALF);
383 88023 ADD( 0, -1, VVC_TASK_STAGE_ALF);
384 88023 ADD(-1, 0, VVC_TASK_STAGE_ALF);
385 88023 ADD( 1, -1, VVC_TASK_STAGE_ALF);
386 88023 ADD(-1, 1, VVC_TASK_STAGE_ALF);
387 88023 ADD( 1, 0, VVC_TASK_STAGE_ALF);
388 88023 ADD( 0, 1, VVC_TASK_STAGE_ALF);
389 88023 ADD( 1, 1, VVC_TASK_STAGE_ALF);
390 }
391 }
392
393 503167 static int task_is_stage_ready(VVCTask *t, int add)
394 {
395 503167 const VVCTaskStage stage = t->stage;
396 uint8_t score;
397
2/2
✓ Branch 0 taken 55785 times.
✓ Branch 1 taken 447382 times.
503167 if (stage > VVC_TASK_STAGE_ALF)
398 55785 return 0;
399 447382 score = task_get_score(t, stage) + add;
400 447382 return task_has_target_score(t, stage, score);
401 }
402
403 55785 static void check_colocation(VVCContext *s, VVCTask *t)
404 {
405 55785 const VVCFrameContext *fc = t->fc;
406
407
4/4
✓ Branch 0 taken 7824 times.
✓ Branch 1 taken 47961 times.
✓ Branch 2 taken 6164 times.
✓ Branch 3 taken 1660 times.
55785 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
408 54125 VVCFrame *col = fc->ref->collocated_ref;
409 54125 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
410
4/4
✓ Branch 0 taken 47961 times.
✓ Branch 1 taken 6164 times.
✓ Branch 2 taken 5649 times.
✓ Branch 3 taken 42312 times.
54125 if (col && first_col) {
411 //we depend on bottom and right boundary, do not - 1 for y
412 5649 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
413 5649 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
414 5649 return;
415 }
416 }
417 50136 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
418 }
419
420 2274 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
421 {
422 2274 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
423 2274 VVCTask *t = ft->tasks + rs;
424
425 2274 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
426 2274 }
427
428 1102 static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
429 {
430 1102 VVCFrameContext *fc = lc->fc;
431 1102 VVCFrameThread *ft = fc->ft;
432 1102 const int ret = ff_vvc_per_frame_init(fc);
433
434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1102 times.
1102 if (ret < 0)
435 return ret;
436
437
2/2
✓ Branch 0 taken 1867 times.
✓ Branch 1 taken 1102 times.
2969 for (int i = 0; i < fc->nb_slices; i++) {
438 1867 SliceContext *sc = fc->slices[i];
439
2/2
✓ Branch 0 taken 2274 times.
✓ Branch 1 taken 1867 times.
4141 for (int j = 0; j < sc->nb_eps; j++) {
440 2274 EntryPoint *ep = sc->eps + j;
441
2/2
✓ Branch 0 taken 55785 times.
✓ Branch 1 taken 2274 times.
58059 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
442 55785 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
443 55785 VVCTask *t = ft->tasks + rs;
444 55785 check_colocation(s, t);
445 }
446 2274 submit_entry_point(s, ft, sc, ep);
447 }
448 }
449 1102 return 0;
450 }
451
452 111570 static void report_frame_progress(VVCFrameContext *fc,
453 const int ry, const VVCProgress idx)
454 {
455 111570 VVCFrameThread *ft = fc->ft;
456 111570 const int ctu_size = ft->ctu_size;
457 int old;
458
459
2/2
✓ Branch 0 taken 10176 times.
✓ Branch 1 taken 101394 times.
111570 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
460 int y;
461 10176 ff_mutex_lock(&ft->lock);
462 10176 y = old = ft->row_progress[idx];
463
4/4
✓ Branch 0 taken 18148 times.
✓ Branch 1 taken 2204 times.
✓ Branch 2 taken 10176 times.
✓ Branch 3 taken 7972 times.
20352 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
464 10176 y++;
465
2/2
✓ Branch 0 taken 10103 times.
✓ Branch 1 taken 73 times.
10176 if (old != y)
466 10103 ft->row_progress[idx] = y;
467 // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
468 // We need to unlock ft->lock first
469 10176 ff_mutex_unlock(&ft->lock);
470
471
2/2
✓ Branch 0 taken 10103 times.
✓ Branch 1 taken 73 times.
10176 if (old != y) {
472
2/2
✓ Branch 0 taken 7899 times.
✓ Branch 1 taken 2204 times.
10103 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
473 10103 ff_vvc_report_progress(fc->ref, idx, progress);
474 }
475 }
476 111570 }
477
478 55785 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
479 {
480 int ret;
481 55785 VVCFrameContext *fc = lc->fc;
482 55785 const int rs = t->rs;
483 55785 const CTU *ctu = fc->tab.ctus + rs;
484
485 55785 lc->ep = t->ep;
486
487 55785 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
488
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55785 times.
55785 if (ret < 0)
489 return ret;
490
491
2/2
✓ Branch 0 taken 32774 times.
✓ Branch 1 taken 23011 times.
55785 if (!ctu->has_dmvr)
492 32774 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
493
494 55785 return 0;
495 }
496
497 55785 static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
498 {
499
2/2
✓ Branch 0 taken 55061 times.
✓ Branch 1 taken 724 times.
55785 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
500 55061 ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
501
502 55785 return 0;
503 }
504
505 55785 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
506 {
507 55785 VVCFrameContext *fc = lc->fc;
508 55785 const CTU *ctu = fc->tab.ctus + t->rs;
509 int ret;
510
511 55785 ret = ff_vvc_predict_inter(lc, t->rs);
512
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55785 times.
55785 if (ret < 0)
513 return ret;
514
515
2/2
✓ Branch 0 taken 23011 times.
✓ Branch 1 taken 32774 times.
55785 if (ctu->has_dmvr)
516 23011 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
517
518 55785 return 0;
519 }
520
521 55785 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522 {
523 55785 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
524 }
525
526 55785 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
527 {
528 55785 VVCFrameContext *fc = lc->fc;
529 55785 VVCFrameThread *ft = fc->ft;
530 55785 const int ctu_size = ft->ctu_size;
531 55785 const int x0 = t->rx * ctu_size;
532 55785 const int y0 = t->ry * ctu_size;
533
534 55785 ff_vvc_lmcs_filter(lc, x0, y0);
535
536 55785 return 0;
537 }
538
539 55785 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
540 {
541 55785 VVCFrameContext *fc = lc->fc;
542 55785 VVCFrameThread *ft = fc->ft;
543 55785 const int ctb_size = ft->ctu_size;
544 55785 const int x0 = t->rx * ctb_size;
545 55785 const int y0 = t->ry * ctb_size;
546
547
2/2
✓ Branch 0 taken 55061 times.
✓ Branch 1 taken 724 times.
55785 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
548 55061 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
549 55061 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
550 }
551
552 55785 return 0;
553 }
554
555 55785 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
556 {
557 55785 VVCFrameContext *fc = lc->fc;
558 55785 VVCFrameThread *ft = fc->ft;
559 55785 const int ctb_size = ft->ctu_size;
560 55785 const int x0 = t->rx * ctb_size;
561 55785 const int y0 = t->ry * ctb_size;
562
563
2/2
✓ Branch 0 taken 55061 times.
✓ Branch 1 taken 724 times.
55785 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
564 55061 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
565 55061 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
566 }
567
2/2
✓ Branch 0 taken 53325 times.
✓ Branch 1 taken 2460 times.
55785 if (fc->ps.sps->r->sps_sao_enabled_flag)
568 53325 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
569
570 55785 return 0;
571 }
572
573 55785 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
574 {
575 55785 VVCFrameContext *fc = lc->fc;
576 55785 VVCFrameThread *ft = fc->ft;
577 55785 const int ctb_size = ft->ctu_size;
578 55785 const int x0 = t->rx * ctb_size;
579 55785 const int y0 = t->ry * ctb_size;
580
581
2/2
✓ Branch 0 taken 53325 times.
✓ Branch 1 taken 2460 times.
55785 if (fc->ps.sps->r->sps_sao_enabled_flag) {
582 53325 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
583 53325 ff_vvc_sao_filter(lc, x0, y0);
584 }
585
586
2/2
✓ Branch 0 taken 43707 times.
✓ Branch 1 taken 12078 times.
55785 if (fc->ps.sps->r->sps_alf_enabled_flag)
587 43707 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
588
589 55785 return 0;
590 }
591
592 55785 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
593 {
594 55785 VVCFrameContext *fc = lc->fc;
595 55785 VVCFrameThread *ft = fc->ft;
596 55785 const int ctu_size = ft->ctu_size;
597 55785 const int x0 = t->rx * ctu_size;
598 55785 const int y0 = t->ry * ctu_size;
599
600
2/2
✓ Branch 0 taken 43707 times.
✓ Branch 1 taken 12078 times.
55785 if (fc->ps.sps->r->sps_alf_enabled_flag) {
601 43707 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
602 43707 ff_vvc_alf_filter(lc, x0, y0);
603 }
604 55785 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
605
606 55785 return 0;
607 }
608
609 const static char* task_name[] = {
610 "INIT",
611 "P",
612 "B",
613 "I",
614 "R",
615 "L",
616 "V",
617 "H",
618 "S",
619 "A"
620 };
621
622 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
623
624 503167 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
625 {
626 int ret;
627 503167 VVCFrameContext *fc = t->fc;
628 503167 VVCFrameThread *ft = fc->ft;
629 503167 const VVCTaskStage stage = t->stage;
630 static const run_func run[] = {
631 run_init,
632 run_parse,
633 run_deblock_bs,
634 run_inter,
635 run_recon,
636 run_lmcs,
637 run_deblock_v,
638 run_deblock_h,
639 run_sao,
640 run_alf,
641 };
642
643 ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
644
645 503167 lc->sc = t->sc;
646
647
1/2
✓ Branch 0 taken 503167 times.
✗ Branch 1 not taken.
503167 if (!atomic_load(&ft->ret)) {
648
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 503167 times.
503167 if ((ret = run[stage](s, lc, t)) < 0) {
649 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
650 intptr_t zero = 0;
651 #else
652 int zero = 0;
653 #endif
654 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
655 av_log(s->avctx, AV_LOG_ERROR,
656 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
657 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
658 }
659
1/2
✓ Branch 0 taken 503167 times.
✗ Branch 1 not taken.
503167 if (!ret)
660 503167 task_stage_done(t, s);
661 }
662 503167 return;
663 }
664
665 279394 static int task_run(FFTask *_t, void *local_context, void *user_data)
666 {
667 279394 VVCTask *t = (VVCTask*)_t;
668 279394 VVCContext *s = (VVCContext *)user_data;
669 279394 VVCLocalContext *lc = local_context;
670 279394 VVCFrameThread *ft = t->fc->ft;
671
672 279394 lc->fc = t->fc;
673
674 do {
675 503167 task_run_stage(t, s, lc);
676 503167 t->stage++;
677
2/2
✓ Branch 1 taken 223773 times.
✓ Branch 2 taken 279394 times.
503167 } while (task_is_stage_ready(t, 1));
678
679
2/2
✓ Branch 0 taken 223609 times.
✓ Branch 1 taken 55785 times.
279394 if (t->stage != VVC_TASK_STAGE_LAST)
680 223609 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
681
682 279394 sheduled_done(ft, &ft->nb_scheduled_tasks);
683
684 279394 return 0;
685 }
686
687 102 av_cold FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
688 {
689 102 FFTaskCallbacks callbacks = {
690 s,
691 sizeof(VVCLocalContext),
692 PRIORITY_LOWEST + 1,
693 task_run,
694 };
695 102 return ff_executor_alloc(&callbacks, thread_count);
696 }
697
698 102 av_cold void ff_vvc_executor_free(FFExecutor **e)
699 {
700 102 ff_executor_free(e);
701 102 }
702
703 1116 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
704 {
705 1116 VVCFrameThread *ft = fc->ft;
706
707
2/2
✓ Branch 0 taken 816 times.
✓ Branch 1 taken 300 times.
1116 if (!ft)
708 816 return;
709
710 300 ff_mutex_destroy(&ft->lock);
711 300 ff_cond_destroy(&ft->cond);
712 300 av_freep(&ft->rows);
713 300 av_freep(&ft->tasks);
714 300 av_freep(&ft);
715 }
716
717 1102 static void frame_thread_init_score(VVCFrameContext *fc)
718 {
719 1102 const VVCFrameThread *ft = fc->ft;
720 VVCTask task;
721
722 1102 task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
723
724
2/2
✓ Branch 0 taken 9918 times.
✓ Branch 1 taken 1102 times.
11020 for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
725 9918 task.stage = i;
726
727
2/2
✓ Branch 0 taken 99279 times.
✓ Branch 1 taken 9918 times.
109197 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
728 99279 task.ry = -1; //top
729 99279 task_stage_done(&task, NULL);
730 99279 task.ry = ft->ctu_height; //bottom
731 99279 task_stage_done(&task, NULL);
732 }
733
734
2/2
✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 9918 times.
55710 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
735 45792 task.rx = -1; //left
736 45792 task_stage_done(&task, NULL);
737 45792 task.rx = ft->ctu_width; //right
738 45792 task_stage_done(&task, NULL);
739 }
740 }
741 1102 }
742
743 1102 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
744 {
745 1102 const VVCSPS *sps = fc->ps.sps;
746 1102 const VVCPPS *pps = fc->ps.pps;
747 1102 VVCFrameThread *ft = fc->ft;
748 int ret;
749
750
3/4
✓ Branch 0 taken 802 times.
✓ Branch 1 taken 300 times.
✓ Branch 2 taken 802 times.
✗ Branch 3 not taken.
1102 if (!ft || ft->ctu_width != pps->ctb_width ||
751
1/2
✓ Branch 0 taken 802 times.
✗ Branch 1 not taken.
802 ft->ctu_height != pps->ctb_height ||
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 802 times.
802 ft->ctu_size != sps->ctb_size_y) {
753
754 300 ff_vvc_frame_thread_free(fc);
755 300 ft = av_calloc(1, sizeof(*fc->ft));
756
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 300 times.
300 if (!ft)
757 return AVERROR(ENOMEM);
758
759 300 ft->ctu_width = fc->ps.pps->ctb_width;
760 300 ft->ctu_height = fc->ps.pps->ctb_height;
761 300 ft->ctu_count = fc->ps.pps->ctb_count;
762 300 ft->ctu_size = fc->ps.sps->ctb_size_y;
763
764 300 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
765
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 300 times.
300 if (!ft->rows)
766 goto fail;
767
768 300 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
769
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 300 times.
300 if (!ft->tasks)
770 goto fail;
771
772
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 300 times.
300 if ((ret = ff_cond_init(&ft->cond, NULL)))
773 goto fail;
774
775
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 300 times.
300 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
776 ff_cond_destroy(&ft->cond);
777 goto fail;
778 }
779 }
780 1102 fc->ft = ft;
781 1102 ft->ret = 0;
782
2/2
✓ Branch 0 taken 5088 times.
✓ Branch 1 taken 1102 times.
6190 for (int y = 0; y < ft->ctu_height; y++) {
783 5088 VVCRowThread *row = ft->rows + y;
784 5088 memset(row->col_progress, 0, sizeof(row->col_progress));
785 }
786
787
2/2
✓ Branch 0 taken 55785 times.
✓ Branch 1 taken 1102 times.
56887 for (int rs = 0; rs < ft->ctu_count; rs++) {
788 55785 VVCTask *t = ft->tasks + rs;
789 55785 task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
790 }
791
792 1102 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
793
794 1102 frame_thread_init_score(fc);
795
796 1102 return 0;
797
798 fail:
799 if (ft) {
800 av_freep(&ft->rows);
801 av_freep(&ft->tasks);
802 av_freep(&ft);
803 }
804
805 return AVERROR(ENOMEM);
806 }
807
808 1102 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
809 {
810 1102 VVCFrameThread *ft = fc->ft;
811
812
2/2
✓ Branch 0 taken 1867 times.
✓ Branch 1 taken 1102 times.
2969 for (int i = 0; i < fc->nb_slices; i++) {
813 1867 SliceContext *sc = fc->slices[i];
814
2/2
✓ Branch 0 taken 2274 times.
✓ Branch 1 taken 1867 times.
4141 for (int j = 0; j < sc->nb_eps; j++) {
815 2274 EntryPoint *ep = sc->eps + j;
816
2/2
✓ Branch 0 taken 55785 times.
✓ Branch 1 taken 2274 times.
58059 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
817 55785 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
818 55785 VVCTask *t = ft->tasks + rs;
819 55785 const int ret = task_init_parse(t, sc, ep, k);
820
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55785 times.
55785 if (ret < 0)
821 return ret;
822 }
823 }
824 }
825
2/2
✓ Branch 0 taken 55785 times.
✓ Branch 1 taken 1102 times.
56887 for (int rs = 0; rs < ft->ctu_count; rs++) {
826 55785 const VVCTask *t = ft->tasks + rs;
827
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55785 times.
55785 if (!t->sc) {
828 av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry);
829 return AVERROR_INVALIDDATA;
830 }
831 }
832 1102 frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
833
834 1102 return 0;
835 }
836
837 1102 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
838 {
839 1102 VVCFrameThread *ft = fc->ft;
840
841 1102 ff_mutex_lock(&ft->lock);
842
843
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1102 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1102 times.
1102 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
844 ff_cond_wait(&ft->cond, &ft->lock);
845
846 1102 ff_mutex_unlock(&ft->lock);
847 1102 ff_vvc_report_frame_finished(fc->ref);
848
849 ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order);
850 1102 return ft->ret;
851 }
852