FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2026-01-23 19:11:46
Exec Total Coverage
Lines: 411 435 94.5%
Functions: 42 42 100.0%
Branches: 167 192 87.0%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavcodec/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
44 VVC_TASK_STAGE_PARSE,
45 VVC_TASK_STAGE_DEBLOCK_BS,
46 VVC_TASK_STAGE_INTER,
47 VVC_TASK_STAGE_RECON,
48 VVC_TASK_STAGE_LMCS,
49 VVC_TASK_STAGE_DEBLOCK_V,
50 VVC_TASK_STAGE_DEBLOCK_H,
51 VVC_TASK_STAGE_SAO,
52 VVC_TASK_STAGE_ALF,
53 VVC_TASK_STAGE_LAST
54 } VVCTaskStage;
55
56 typedef struct VVCTask {
57 union {
58 struct VVCTask *next; //for executor debug only
59 FFTask task;
60 } u;
61
62 VVCTaskStage stage;
63
64 // ctu x, y, and raster scan order
65 int rx, ry, rs;
66 VVCFrameContext *fc;
67
68 ProgressListener col_listener;
69 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71 // for parse task only
72 SliceContext *sc;
73 EntryPoint *ep;
74 int ctu_idx; //ctu idx in the current slice
75
76 // tasks with target scores met are ready for scheduling
77 atomic_uchar score[VVC_TASK_STAGE_LAST];
78 atomic_uchar target_inter_score;
79 } VVCTask;
80
81 typedef struct VVCRowThread {
82 atomic_int col_progress[VVC_PROGRESS_LAST];
83 } VVCRowThread;
84
85 typedef struct VVCFrameThread {
86 // error return for tasks
87 atomic_int ret;
88
89 VVCRowThread *rows;
90 VVCTask *tasks;
91
92 int ctu_size;
93 int ctu_width;
94 int ctu_height;
95 int ctu_count;
96
97 //protected by lock
98 atomic_int nb_scheduled_tasks;
99 atomic_int nb_scheduled_listeners;
100
101 int row_progress[VVC_PROGRESS_LAST];
102
103 AVMutex lock;
104 AVCond cond;
105 } VVCFrameThread;
106
107 #define PRIORITY_LOWEST 2
108 266474 static void add_task(VVCContext *s, VVCTask *t)
109 {
110 266474 VVCFrameThread *ft = t->fc->ft;
111 266474 FFTask *task = &t->u.task;
112 266474 const int priorities[] = {
113 0, // VVC_TASK_STAGE_INIT,
114 0, // VVC_TASK_STAGE_PARSE,
115 1, // VVC_TASK_STAGE_DEBLOCK_BS
116 // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117 // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118 PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER
119 1, // VVC_TASK_STAGE_RECON,
120 1, // VVC_TASK_STAGE_LMCS,
121 1, // VVC_TASK_STAGE_DEBLOCK_V,
122 1, // VVC_TASK_STAGE_DEBLOCK_H,
123 1, // VVC_TASK_STAGE_SAO,
124 1, // VVC_TASK_STAGE_ALF,
125 };
126
127 266474 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128 266474 task->priority = priorities[t->stage];
129 266474 ff_executor_execute(s->executor, task);
130 266474 }
131
132 54541 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133 {
134 54541 memset(t, 0, sizeof(*t));
135 54541 t->stage = stage;
136 54541 t->fc = fc;
137 54541 t->rx = rx;
138 54541 t->ry = ry;
139 54541 t->rs = ry * fc->ft->ctu_width + rx;
140
2/2
✓ Branch 0 taken 545410 times.
✓ Branch 1 taken 54541 times.
599951 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141 545410 atomic_store(t->score + i, 0);
142 54541 atomic_store(&t->target_inter_score, 0);
143 54541 }
144
145 53475 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146 {
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
53475 if (t->sc) {
148 // the task already inited, error bitstream
149 return AVERROR_INVALIDDATA;
150 }
151 53475 t->sc = sc;
152 53475 t->ep = ep;
153 53475 t->ctu_idx = ctu_idx;
154
155 53475 return 0;
156 }
157
158 1647128 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159 {
160 1647128 return atomic_fetch_add(&t->score[stage], 1) + 1;
161 }
162
163 428866 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164 {
165 428866 return atomic_load(&t->score[stage]);
166 }
167
168 //first row in tile or slice
169 5230 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170 {
171 5230 const VVCFrameThread *ft = fc->ft;
172 5230 const VVCPPS *pps = fc->ps.pps;
173
174
2/2
✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 793 times.
5230 if (ry != pps->ctb_to_row_bd[ry]) {
175 4437 const int rs = ry * ft->ctu_width + rx;
176 4437 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177 }
178 793 return 1;
179 }
180
181 2075994 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182 {
183 // l:left, r:right, t: top, b: bottom
184 static const uint8_t target_score[] =
185 {
186 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187 0, //VVC_TASK_STAGE_INTER, not used
188 2, //VVC_TASK_STAGE_RECON, need l + rt recon
189 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
190 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
193 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
194 };
195 2075994 uint8_t target = 0;
196 2075994 VVCFrameContext *fc = t->fc;
197
198
2/2
✓ Branch 0 taken 1066 times.
✓ Branch 1 taken 2074928 times.
2075994 if (stage == VVC_TASK_STAGE_INIT)
199 1066 return 1;
200
201
2/2
✓ Branch 0 taken 109087 times.
✓ Branch 1 taken 1965841 times.
2074928 if (stage == VVC_TASK_STAGE_PARSE) {
202 109087 const H266RawSPS *rsps = fc->ps.sps->r;
203
4/4
✓ Branch 0 taken 3996 times.
✓ Branch 1 taken 105091 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 783 times.
109087 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204 109087 const int no_prev_stage = t->rs > 0;
205 109087 target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
206
2/2
✓ Branch 0 taken 148592 times.
✓ Branch 1 taken 1817249 times.
1965841 } else if (stage == VVC_TASK_STAGE_INTER) {
207 148592 target = atomic_load(&t->target_inter_score);
208 } else {
209 1817249 target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210 }
211
212 //+1 for previous stage
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2074928 times.
2074928 av_assert0(score <= target + 1);
214 2074928 return score == target + 1;
215 }
216
217 2358058 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218 const int rx, const int ry, const VVCTaskStage stage)
219 {
220 2358058 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221 uint8_t score;
222
223
8/8
✓ Branch 0 taken 2190106 times.
✓ Branch 1 taken 167952 times.
✓ Branch 2 taken 2036150 times.
✓ Branch 3 taken 153956 times.
✓ Branch 4 taken 1824725 times.
✓ Branch 5 taken 211425 times.
✓ Branch 6 taken 177597 times.
✓ Branch 7 taken 1647128 times.
2358058 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224 710930 return;
225
226 1647128 score = task_add_score(t, stage);
227
2/2
✓ Branch 1 taken 266474 times.
✓ Branch 2 taken 1380654 times.
1647128 if (task_has_target_score(t, stage, score)) {
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 266474 times.
266474 av_assert0(s);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 266474 times.
266474 av_assert0(stage == t->stage);
230 266474 add_task(s, t);
231 }
232 }
233
234 367048 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235 {
236
2/2
✓ Branch 0 taken 101640 times.
✓ Branch 1 taken 265408 times.
367048 if (atomic_fetch_sub(scheduled, 1) == 1) {
237 101640 ff_mutex_lock(&ft->lock);
238 101640 ff_cond_signal(&ft->cond);
239 101640 ff_mutex_unlock(&ft->lock);
240 }
241 367048 }
242
243 100574 static void progress_done(VVCProgressListener *_l, const int type)
244 {
245 100574 const ProgressListener *l = (ProgressListener *)_l;
246 100574 const VVCTask *t = l->task;
247 100574 VVCFrameThread *ft = t->fc->ft;
248
249 100574 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250 100574 sheduled_done(ft, &ft->nb_scheduled_listeners);
251 100574 }
252
253 95117 static void pixel_done(VVCProgressListener *l)
254 {
255 95117 progress_done(l, VVC_TASK_STAGE_INTER);
256 95117 }
257
258 5457 static void mv_done(VVCProgressListener *l)
259 {
260 5457 progress_done(l, VVC_TASK_STAGE_PARSE);
261 5457 }
262
263 100574 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264 {
265 100574 const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267 100574 l->task = t;
268 100574 l->s = s;
269 100574 l->l.vp = vp;
270 100574 l->l.y = y;
271
2/2
✓ Branch 0 taken 95117 times.
✓ Branch 1 taken 5457 times.
100574 l->l.progress_done = is_inter ? pixel_done : mv_done;
272
2/2
✓ Branch 0 taken 95117 times.
✓ Branch 1 taken 5457 times.
100574 if (is_inter)
273 95117 atomic_fetch_add(&t->target_inter_score, 1);
274 100574 }
275
276 100574 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278 {
279 100574 VVCFrameThread *ft = t->fc->ft;
280
281 100574 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282 100574 listener_init(l, t, s, vp, y);
283 100574 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284 100574 }
285
286 153 static void ep_init_wpp(EntryPoint *next, const EntryPoint *ep, const VVCSPS *sps)
287 {
288 153 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
289 153 memcpy(next->pp, ep->pp, sizeof(next->pp));
290 153 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
291 153 }
292
293 53475 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
294 {
295 53475 VVCFrameThread *ft = fc->ft;
296 53475 EntryPoint *ep = t->ep;
297 53475 const VVCSPS *sps = fc->ps.sps;
298
299
2/2
✓ Branch 0 taken 1436 times.
✓ Branch 1 taken 52039 times.
53475 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
300
2/2
✓ Branch 0 taken 212 times.
✓ Branch 1 taken 1224 times.
1436 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
301 212 EntryPoint *next = ep + 1;
302
4/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 53 times.
✓ Branch 3 taken 153 times.
✓ Branch 4 taken 6 times.
212 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1))
303 153 ep_init_wpp(next, ep, sps);
304 }
305
4/4
✓ Branch 0 taken 1075 times.
✓ Branch 1 taken 361 times.
✓ Branch 3 taken 1071 times.
✓ Branch 4 taken 4 times.
1436 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
306 1071 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
307 }
308
309
2/2
✓ Branch 0 taken 51271 times.
✓ Branch 1 taken 2204 times.
53475 if (t->ctu_idx + 1 < t->ep->ctu_end) {
310 51271 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
311 51271 const int next_rx = next_rs % ft->ctu_width;
312 51271 const int next_ry = next_rs / ft->ctu_width;
313 51271 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
314 }
315 53475 }
316
317 53475 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
318 {
319 53475 const VVCSH *sh = &sc->sh;
320
321
2/2
✓ Branch 0 taken 47105 times.
✓ Branch 1 taken 6370 times.
53475 if (!IS_I(sh->r)) {
322 47105 CTU *ctu = fc->tab.ctus + rs;
323
2/2
✓ Branch 0 taken 94210 times.
✓ Branch 1 taken 47105 times.
141315 for (int lx = 0; lx < 2; lx++) {
324
2/2
✓ Branch 0 taken 192208 times.
✓ Branch 1 taken 94210 times.
286418 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
325 192208 int y = ctu->max_y[lx][i];
326 192208 VVCRefPic *refp = sc->rpl[lx].refs + i;
327 192208 VVCFrame *ref = refp->ref;
328
3/4
✓ Branch 0 taken 192208 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 95117 times.
✓ Branch 3 taken 97091 times.
192208 if (ref && y >= 0) {
329
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 94903 times.
95117 if (refp->is_scaled)
330 214 y = y * refp->scale[1] >> 14;
331 95117 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
332 }
333 }
334 }
335 }
336 53475 }
337
338 53475 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
339 {
340 53475 VVCFrameThread *ft = fc->ft;
341 53475 const int rs = ry * ft->ctu_width + rx;
342 53475 const int slice_idx = fc->tab.slice_idx[rs];
343 53475 VVCTask *t = ft->tasks + rs;
344 53475 const SliceContext *sc = fc->slices[slice_idx];
345
346 53475 schedule_next_parse(s, fc, sc, t);
347 53475 schedule_inter(s, fc, sc, t, rs);
348 53475 }
349
350 760531 static void task_stage_done(const VVCTask *t, VVCContext *s)
351 {
352 760531 VVCFrameContext *fc = t->fc;
353 760531 VVCFrameThread *ft = fc->ft;
354 760531 const VVCTaskStage stage = t->stage;
355
356 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
357
358 //this is a reserve map of ready_score, ordered by zigzag
359
2/2
✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 676146 times.
760531 if (stage == VVC_TASK_STAGE_PARSE) {
360 84385 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS);
361 84385 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS);
362
8/8
✓ Branch 0 taken 77387 times.
✓ Branch 1 taken 6998 times.
✓ Branch 2 taken 70389 times.
✓ Branch 3 taken 6998 times.
✓ Branch 4 taken 61932 times.
✓ Branch 5 taken 8457 times.
✓ Branch 6 taken 8457 times.
✓ Branch 7 taken 53475 times.
84385 if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
363 30910 return;
364 53475 parse_task_done(s, fc, t->rx, t->ry);
365
2/2
✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 591761 times.
676146 } else if (stage == VVC_TASK_STAGE_RECON) {
366 84385 ADD(-1, 1, VVC_TASK_STAGE_RECON);
367 84385 ADD( 1, 0, VVC_TASK_STAGE_RECON);
368 84385 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
369 84385 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
370 84385 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
371
2/2
✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 507376 times.
591761 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
372 84385 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
373 84385 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
374
2/2
✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 422991 times.
507376 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
375 84385 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
376 84385 ADD(-1, -1, VVC_TASK_STAGE_SAO);
377 84385 ADD( 0, -1, VVC_TASK_STAGE_SAO);
378 84385 ADD(-1, 0, VVC_TASK_STAGE_SAO);
379 84385 ADD( 1, -1, VVC_TASK_STAGE_SAO);
380 84385 ADD( 1, 0, VVC_TASK_STAGE_SAO);
381
2/2
✓ Branch 0 taken 84385 times.
✓ Branch 1 taken 338606 times.
422991 } else if (stage == VVC_TASK_STAGE_SAO) {
382 84385 ADD(-1, -1, VVC_TASK_STAGE_ALF);
383 84385 ADD( 0, -1, VVC_TASK_STAGE_ALF);
384 84385 ADD(-1, 0, VVC_TASK_STAGE_ALF);
385 84385 ADD( 1, -1, VVC_TASK_STAGE_ALF);
386 84385 ADD(-1, 1, VVC_TASK_STAGE_ALF);
387 84385 ADD( 1, 0, VVC_TASK_STAGE_ALF);
388 84385 ADD( 0, 1, VVC_TASK_STAGE_ALF);
389 84385 ADD( 1, 1, VVC_TASK_STAGE_ALF);
390 }
391 }
392
393 482341 static int task_is_stage_ready(VVCTask *t, int add)
394 {
395 482341 const VVCTaskStage stage = t->stage;
396 uint8_t score;
397
2/2
✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 428866 times.
482341 if (stage > VVC_TASK_STAGE_ALF)
398 53475 return 0;
399 428866 score = task_get_score(t, stage) + add;
400 428866 return task_has_target_score(t, stage, score);
401 }
402
403 53475 static void check_colocation(VVCContext *s, VVCTask *t)
404 {
405 53475 const VVCFrameContext *fc = t->fc;
406
407
4/4
✓ Branch 0 taken 6474 times.
✓ Branch 1 taken 47001 times.
✓ Branch 2 taken 5894 times.
✓ Branch 3 taken 580 times.
53475 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
408 52895 VVCFrame *col = fc->ref->collocated_ref;
409 52895 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
410
4/4
✓ Branch 0 taken 47001 times.
✓ Branch 1 taken 5894 times.
✓ Branch 2 taken 5457 times.
✓ Branch 3 taken 41544 times.
52895 if (col && first_col) {
411 //we depend on bottom and right boundary, do not - 1 for y
412 5457 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
413 5457 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
414 5457 return;
415 }
416 }
417 48018 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
418 }
419
420 2204 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
421 {
422 2204 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
423 2204 VVCTask *t = ft->tasks + rs;
424
425 2204 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
426 2204 }
427
428 1066 static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
429 {
430 1066 VVCFrameContext *fc = lc->fc;
431 1066 VVCFrameThread *ft = fc->ft;
432 1066 const int ret = ff_vvc_per_frame_init(fc);
433
434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1066 times.
1066 if (ret < 0)
435 return ret;
436
437
2/2
✓ Branch 0 taken 1831 times.
✓ Branch 1 taken 1066 times.
2897 for (int i = 0; i < fc->nb_slices; i++) {
438 1831 SliceContext *sc = fc->slices[i];
439
2/2
✓ Branch 0 taken 2204 times.
✓ Branch 1 taken 1831 times.
4035 for (int j = 0; j < sc->nb_eps; j++) {
440 2204 EntryPoint *ep = sc->eps + j;
441
2/2
✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 2204 times.
55679 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
442 53475 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
443 53475 VVCTask *t = ft->tasks + rs;
444 53475 check_colocation(s, t);
445 }
446 2204 submit_entry_point(s, ft, sc, ep);
447 }
448 }
449 1066 return 0;
450 }
451
452 106950 static void report_frame_progress(VVCFrameContext *fc,
453 const int ry, const VVCProgress idx)
454 {
455 106950 VVCFrameThread *ft = fc->ft;
456 106950 const int ctu_size = ft->ctu_size;
457 int old;
458
459
2/2
✓ Branch 0 taken 9732 times.
✓ Branch 1 taken 97218 times.
106950 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
460 int y;
461 9732 ff_mutex_lock(&ft->lock);
462 9732 y = old = ft->row_progress[idx];
463
4/4
✓ Branch 0 taken 17332 times.
✓ Branch 1 taken 2132 times.
✓ Branch 2 taken 9732 times.
✓ Branch 3 taken 7600 times.
19464 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
464 9732 y++;
465
2/2
✓ Branch 0 taken 9659 times.
✓ Branch 1 taken 73 times.
9732 if (old != y)
466 9659 ft->row_progress[idx] = y;
467 // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
468 // We need to unlock ft->lock first
469 9732 ff_mutex_unlock(&ft->lock);
470
471
2/2
✓ Branch 0 taken 9659 times.
✓ Branch 1 taken 73 times.
9732 if (old != y) {
472
2/2
✓ Branch 0 taken 7527 times.
✓ Branch 1 taken 2132 times.
9659 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
473 9659 ff_vvc_report_progress(fc->ref, idx, progress);
474 }
475 }
476 106950 }
477
478 53475 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
479 {
480 int ret;
481 53475 VVCFrameContext *fc = lc->fc;
482 53475 const int rs = t->rs;
483 53475 const CTU *ctu = fc->tab.ctus + rs;
484
485 53475 lc->ep = t->ep;
486
487 53475 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
488
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
53475 if (ret < 0)
489 return ret;
490
491
2/2
✓ Branch 0 taken 30464 times.
✓ Branch 1 taken 23011 times.
53475 if (!ctu->has_dmvr)
492 30464 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
493
494 53475 return 0;
495 }
496
497 53475 static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
498 {
499
2/2
✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
53475 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
500 53471 ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
501
502 53475 return 0;
503 }
504
505 53475 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
506 {
507 53475 VVCFrameContext *fc = lc->fc;
508 53475 const CTU *ctu = fc->tab.ctus + t->rs;
509 int ret;
510
511 53475 ret = ff_vvc_predict_inter(lc, t->rs);
512
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
53475 if (ret < 0)
513 return ret;
514
515
2/2
✓ Branch 0 taken 23011 times.
✓ Branch 1 taken 30464 times.
53475 if (ctu->has_dmvr)
516 23011 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
517
518 53475 return 0;
519 }
520
521 53475 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522 {
523 53475 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
524 }
525
526 53475 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
527 {
528 53475 VVCFrameContext *fc = lc->fc;
529 53475 VVCFrameThread *ft = fc->ft;
530 53475 const int ctu_size = ft->ctu_size;
531 53475 const int x0 = t->rx * ctu_size;
532 53475 const int y0 = t->ry * ctu_size;
533
534 53475 ff_vvc_lmcs_filter(lc, x0, y0);
535
536 53475 return 0;
537 }
538
539 53475 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
540 {
541 53475 VVCFrameContext *fc = lc->fc;
542 53475 VVCFrameThread *ft = fc->ft;
543 53475 const int ctb_size = ft->ctu_size;
544 53475 const int x0 = t->rx * ctb_size;
545 53475 const int y0 = t->ry * ctb_size;
546
547
2/2
✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
53475 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
548 53471 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
549 53471 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
550 }
551
552 53475 return 0;
553 }
554
555 53475 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
556 {
557 53475 VVCFrameContext *fc = lc->fc;
558 53475 VVCFrameThread *ft = fc->ft;
559 53475 const int ctb_size = ft->ctu_size;
560 53475 const int x0 = t->rx * ctb_size;
561 53475 const int y0 = t->ry * ctb_size;
562
563
2/2
✓ Branch 0 taken 53471 times.
✓ Branch 1 taken 4 times.
53475 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
564 53471 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
565 53471 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
566 }
567
2/2
✓ Branch 0 taken 53055 times.
✓ Branch 1 taken 420 times.
53475 if (fc->ps.sps->r->sps_sao_enabled_flag)
568 53055 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
569
570 53475 return 0;
571 }
572
573 53475 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
574 {
575 53475 VVCFrameContext *fc = lc->fc;
576 53475 VVCFrameThread *ft = fc->ft;
577 53475 const int ctb_size = ft->ctu_size;
578 53475 const int x0 = t->rx * ctb_size;
579 53475 const int y0 = t->ry * ctb_size;
580
581
2/2
✓ Branch 0 taken 53055 times.
✓ Branch 1 taken 420 times.
53475 if (fc->ps.sps->r->sps_sao_enabled_flag) {
582 53055 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
583 53055 ff_vvc_sao_filter(lc, x0, y0);
584 }
585
586
2/2
✓ Branch 0 taken 43437 times.
✓ Branch 1 taken 10038 times.
53475 if (fc->ps.sps->r->sps_alf_enabled_flag)
587 43437 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
588
589 53475 return 0;
590 }
591
592 53475 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
593 {
594 53475 VVCFrameContext *fc = lc->fc;
595 53475 VVCFrameThread *ft = fc->ft;
596 53475 const int ctu_size = ft->ctu_size;
597 53475 const int x0 = t->rx * ctu_size;
598 53475 const int y0 = t->ry * ctu_size;
599
600
2/2
✓ Branch 0 taken 43437 times.
✓ Branch 1 taken 10038 times.
53475 if (fc->ps.sps->r->sps_alf_enabled_flag) {
601 43437 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
602 43437 ff_vvc_alf_filter(lc, x0, y0);
603 }
604 53475 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
605
606 53475 return 0;
607 }
608
609 const static char* task_name[] = {
610 "INIT",
611 "P",
612 "B",
613 "I",
614 "R",
615 "L",
616 "V",
617 "H",
618 "S",
619 "A"
620 };
621
622 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
623
624 482341 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
625 {
626 int ret;
627 482341 VVCFrameContext *fc = t->fc;
628 482341 VVCFrameThread *ft = fc->ft;
629 482341 const VVCTaskStage stage = t->stage;
630 static const run_func run[] = {
631 run_init,
632 run_parse,
633 run_deblock_bs,
634 run_inter,
635 run_recon,
636 run_lmcs,
637 run_deblock_v,
638 run_deblock_h,
639 run_sao,
640 run_alf,
641 };
642
643 ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
644
645 482341 lc->sc = t->sc;
646
647
1/2
✓ Branch 0 taken 482341 times.
✗ Branch 1 not taken.
482341 if (!atomic_load(&ft->ret)) {
648
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 482341 times.
482341 if ((ret = run[stage](s, lc, t)) < 0) {
649 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
650 intptr_t zero = 0;
651 #else
652 int zero = 0;
653 #endif
654 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
655 av_log(s->avctx, AV_LOG_ERROR,
656 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
657 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
658 }
659
1/2
✓ Branch 0 taken 482341 times.
✗ Branch 1 not taken.
482341 if (!ret)
660 482341 task_stage_done(t, s);
661 }
662 482341 return;
663 }
664
665 266474 static int task_run(FFTask *_t, void *local_context, void *user_data)
666 {
667 266474 VVCTask *t = (VVCTask*)_t;
668 266474 VVCContext *s = (VVCContext *)user_data;
669 266474 VVCLocalContext *lc = local_context;
670 266474 VVCFrameThread *ft = t->fc->ft;
671
672 266474 lc->fc = t->fc;
673
674 do {
675 482341 task_run_stage(t, s, lc);
676 482341 t->stage++;
677
2/2
✓ Branch 1 taken 215867 times.
✓ Branch 2 taken 266474 times.
482341 } while (task_is_stage_ready(t, 1));
678
679
2/2
✓ Branch 0 taken 212999 times.
✓ Branch 1 taken 53475 times.
266474 if (t->stage != VVC_TASK_STAGE_LAST)
680 212999 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
681
682 266474 sheduled_done(ft, &ft->nb_scheduled_tasks);
683
684 266474 return 0;
685 }
686
687 94 av_cold FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
688 {
689 94 FFTaskCallbacks callbacks = {
690 s,
691 sizeof(VVCLocalContext),
692 PRIORITY_LOWEST + 1,
693 task_run,
694 };
695 94 return ff_executor_alloc(&callbacks, thread_count);
696 }
697
698 94 av_cold void ff_vvc_executor_free(FFExecutor **e)
699 {
700 94 ff_executor_free(e);
701 94 }
702
703 1038 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
704 {
705 1038 VVCFrameThread *ft = fc->ft;
706
707
2/2
✓ Branch 0 taken 752 times.
✓ Branch 1 taken 286 times.
1038 if (!ft)
708 752 return;
709
710 286 ff_mutex_destroy(&ft->lock);
711 286 ff_cond_destroy(&ft->cond);
712 286 av_freep(&ft->rows);
713 286 av_freep(&ft->tasks);
714 286 av_freep(&ft);
715 }
716
717 1066 static void frame_thread_init_score(VVCFrameContext *fc)
718 {
719 1066 const VVCFrameThread *ft = fc->ft;
720 VVCTask task;
721
722 1066 task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
723
724
2/2
✓ Branch 0 taken 9594 times.
✓ Branch 1 taken 1066 times.
10660 for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
725 9594 task.stage = i;
726
727
2/2
✓ Branch 0 taken 95301 times.
✓ Branch 1 taken 9594 times.
104895 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
728 95301 task.ry = -1; //top
729 95301 task_stage_done(&task, NULL);
730 95301 task.ry = ft->ctu_height; //bottom
731 95301 task_stage_done(&task, NULL);
732 }
733
734
2/2
✓ Branch 0 taken 43794 times.
✓ Branch 1 taken 9594 times.
53388 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
735 43794 task.rx = -1; //left
736 43794 task_stage_done(&task, NULL);
737 43794 task.rx = ft->ctu_width; //right
738 43794 task_stage_done(&task, NULL);
739 }
740 }
741 1066 }
742
743 1066 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
744 {
745 1066 const VVCSPS *sps = fc->ps.sps;
746 1066 const VVCPPS *pps = fc->ps.pps;
747 1066 VVCFrameThread *ft = fc->ft;
748 int ret;
749
750
3/4
✓ Branch 0 taken 780 times.
✓ Branch 1 taken 286 times.
✓ Branch 2 taken 780 times.
✗ Branch 3 not taken.
1066 if (!ft || ft->ctu_width != pps->ctb_width ||
751
1/2
✓ Branch 0 taken 780 times.
✗ Branch 1 not taken.
780 ft->ctu_height != pps->ctb_height ||
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 780 times.
780 ft->ctu_size != sps->ctb_size_y) {
753
754 286 ff_vvc_frame_thread_free(fc);
755 286 ft = av_calloc(1, sizeof(*fc->ft));
756
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
286 if (!ft)
757 return AVERROR(ENOMEM);
758
759 286 ft->ctu_width = fc->ps.pps->ctb_width;
760 286 ft->ctu_height = fc->ps.pps->ctb_height;
761 286 ft->ctu_count = fc->ps.pps->ctb_count;
762 286 ft->ctu_size = fc->ps.sps->ctb_size_y;
763
764 286 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
765
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
286 if (!ft->rows)
766 goto fail;
767
768 286 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
769
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 286 times.
286 if (!ft->tasks)
770 goto fail;
771
772
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 286 times.
286 if ((ret = ff_cond_init(&ft->cond, NULL)))
773 goto fail;
774
775
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 286 times.
286 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
776 ff_cond_destroy(&ft->cond);
777 goto fail;
778 }
779 }
780 1066 fc->ft = ft;
781 1066 ft->ret = 0;
782
2/2
✓ Branch 0 taken 4866 times.
✓ Branch 1 taken 1066 times.
5932 for (int y = 0; y < ft->ctu_height; y++) {
783 4866 VVCRowThread *row = ft->rows + y;
784 4866 memset(row->col_progress, 0, sizeof(row->col_progress));
785 }
786
787
2/2
✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 1066 times.
54541 for (int rs = 0; rs < ft->ctu_count; rs++) {
788 53475 VVCTask *t = ft->tasks + rs;
789 53475 task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
790 }
791
792 1066 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
793
794 1066 frame_thread_init_score(fc);
795
796 1066 return 0;
797
798 fail:
799 if (ft) {
800 av_freep(&ft->rows);
801 av_freep(&ft->tasks);
802 av_freep(&ft);
803 }
804
805 return AVERROR(ENOMEM);
806 }
807
808 1066 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
809 {
810 1066 VVCFrameThread *ft = fc->ft;
811
812
2/2
✓ Branch 0 taken 1831 times.
✓ Branch 1 taken 1066 times.
2897 for (int i = 0; i < fc->nb_slices; i++) {
813 1831 SliceContext *sc = fc->slices[i];
814
2/2
✓ Branch 0 taken 2204 times.
✓ Branch 1 taken 1831 times.
4035 for (int j = 0; j < sc->nb_eps; j++) {
815 2204 EntryPoint *ep = sc->eps + j;
816
2/2
✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 2204 times.
55679 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
817 53475 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
818 53475 VVCTask *t = ft->tasks + rs;
819 53475 const int ret = task_init_parse(t, sc, ep, k);
820
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
53475 if (ret < 0)
821 return ret;
822 }
823 }
824 }
825
2/2
✓ Branch 0 taken 53475 times.
✓ Branch 1 taken 1066 times.
54541 for (int rs = 0; rs < ft->ctu_count; rs++) {
826 53475 const VVCTask *t = ft->tasks + rs;
827
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 53475 times.
53475 if (!t->sc) {
828 av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry);
829 return AVERROR_INVALIDDATA;
830 }
831 }
832 1066 frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
833
834 1066 return 0;
835 }
836
837 1066 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
838 {
839 1066 VVCFrameThread *ft = fc->ft;
840
841 1066 ff_mutex_lock(&ft->lock);
842
843
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1066 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1066 times.
1066 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
844 ff_cond_wait(&ft->cond, &ft->lock);
845
846 1066 ff_mutex_unlock(&ft->lock);
847 1066 ff_vvc_report_frame_finished(fc->ref);
848
849 ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order);
850 1066 return ft->ret;
851 }
852