FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2025-04-25 22:50:00
Exec Total Coverage
Lines: 407 431 94.4%
Functions: 41 41 100.0%
Branches: 167 192 87.0%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavcodec/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
44 VVC_TASK_STAGE_PARSE,
45 VVC_TASK_STAGE_DEBLOCK_BS,
46 VVC_TASK_STAGE_INTER,
47 VVC_TASK_STAGE_RECON,
48 VVC_TASK_STAGE_LMCS,
49 VVC_TASK_STAGE_DEBLOCK_V,
50 VVC_TASK_STAGE_DEBLOCK_H,
51 VVC_TASK_STAGE_SAO,
52 VVC_TASK_STAGE_ALF,
53 VVC_TASK_STAGE_LAST
54 } VVCTaskStage;
55
56 typedef struct VVCTask {
57 union {
58 struct VVCTask *next; //for executor debug only
59 FFTask task;
60 } u;
61
62 VVCTaskStage stage;
63
64 // ctu x, y, and raster scan order
65 int rx, ry, rs;
66 VVCFrameContext *fc;
67
68 ProgressListener col_listener;
69 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71 // for parse task only
72 SliceContext *sc;
73 EntryPoint *ep;
74 int ctu_idx; //ctu idx in the current slice
75
76 // tasks with target scores met are ready for scheduling
77 atomic_uchar score[VVC_TASK_STAGE_LAST];
78 atomic_uchar target_inter_score;
79 } VVCTask;
80
81 typedef struct VVCRowThread {
82 atomic_int col_progress[VVC_PROGRESS_LAST];
83 } VVCRowThread;
84
85 typedef struct VVCFrameThread {
86 // error return for tasks
87 atomic_int ret;
88
89 VVCRowThread *rows;
90 VVCTask *tasks;
91
92 int ctu_size;
93 int ctu_width;
94 int ctu_height;
95 int ctu_count;
96
97 //protected by lock
98 atomic_int nb_scheduled_tasks;
99 atomic_int nb_scheduled_listeners;
100
101 int row_progress[VVC_PROGRESS_LAST];
102
103 AVMutex lock;
104 AVCond cond;
105 } VVCFrameThread;
106
107 #define PRIORITY_LOWEST 2
108 233654 static void add_task(VVCContext *s, VVCTask *t)
109 {
110 233654 VVCFrameThread *ft = t->fc->ft;
111 233654 FFTask *task = &t->u.task;
112 233654 const int priorities[] = {
113 0, // VVC_TASK_STAGE_INIT,
114 0, // VVC_TASK_STAGE_PARSE,
115 1, // VVC_TASK_STAGE_DEBLOCK_BS
116 // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117 // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118 PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER
119 1, // VVC_TASK_STAGE_RECON,
120 1, // VVC_TASK_STAGE_LMCS,
121 1, // VVC_TASK_STAGE_DEBLOCK_V,
122 1, // VVC_TASK_STAGE_DEBLOCK_H,
123 1, // VVC_TASK_STAGE_SAO,
124 1, // VVC_TASK_STAGE_ALF,
125 };
126
127 233654 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128 233654 task->priority = priorities[t->stage];
129 233654 ff_executor_execute(s->executor, task);
130 233654 }
131
132 47659 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133 {
134 47659 memset(t, 0, sizeof(*t));
135 47659 t->stage = stage;
136 47659 t->fc = fc;
137 47659 t->rx = rx;
138 47659 t->ry = ry;
139 47659 t->rs = ry * fc->ft->ctu_width + rx;
140
2/2
✓ Branch 0 taken 476590 times.
✓ Branch 1 taken 47659 times.
524249 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141 476590 atomic_store(t->score + i, 0);
142 47659 atomic_store(&t->target_inter_score, 0);
143 47659 }
144
145 46713 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146 {
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46713 times.
46713 if (t->sc) {
148 // the task already inited, error bitstream
149 return AVERROR_INVALIDDATA;
150 }
151 46713 t->sc = sc;
152 46713 t->ep = ep;
153 46713 t->ctu_idx = ctu_idx;
154
155 46713 return 0;
156 }
157
158 1440200 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159 {
160 1440200 return atomic_fetch_add(&t->score[stage], 1) + 1;
161 }
162
163 374650 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164 {
165 374650 return atomic_load(&t->score[stage]);
166 }
167
168 //first row in tile or slice
169 5230 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170 {
171 5230 const VVCFrameThread *ft = fc->ft;
172 5230 const VVCPPS *pps = fc->ps.pps;
173
174
2/2
✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 793 times.
5230 if (ry != pps->ctb_to_row_bd[ry]) {
175 4437 const int rs = ry * ft->ctu_width + rx;
176 4437 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177 }
178 793 return 1;
179 }
180
181 1814850 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182 {
183 // l:left, r:right, t: top, b: bottom
184 static const uint8_t target_score[] =
185 {
186 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187 0, //VVC_TASK_STAGE_INTER, not used
188 2, //VVC_TASK_STAGE_RECON, need l + rt recon
189 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
190 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
193 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
194 };
195 1814850 uint8_t target = 0;
196 1814850 VVCFrameContext *fc = t->fc;
197
198
2/2
✓ Branch 0 taken 946 times.
✓ Branch 1 taken 1813904 times.
1814850 if (stage == VVC_TASK_STAGE_INIT)
199 946 return 1;
200
201
2/2
✓ Branch 0 taken 95443 times.
✓ Branch 1 taken 1718461 times.
1813904 if (stage == VVC_TASK_STAGE_PARSE) {
202 95443 const H266RawSPS *rsps = fc->ps.sps->r;
203
4/4
✓ Branch 0 taken 3996 times.
✓ Branch 1 taken 91447 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 783 times.
95443 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204 95443 const int no_prev_stage = t->rs > 0;
205 95443 target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
206
2/2
✓ Branch 0 taken 130130 times.
✓ Branch 1 taken 1588331 times.
1718461 } else if (stage == VVC_TASK_STAGE_INTER) {
207 130130 target = atomic_load(&t->target_inter_score);
208 } else {
209 1588331 target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210 }
211
212 //+1 for previous stage
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1813904 times.
1813904 av_assert0(score <= target + 1);
214 1813904 return score == target + 1;
215 }
216
217 2059774 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218 const int rx, const int ry, const VVCTaskStage stage)
219 {
220 2059774 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221 uint8_t score;
222
223
8/8
✓ Branch 0 taken 1912702 times.
✓ Branch 1 taken 147072 times.
✓ Branch 2 taken 1777886 times.
✓ Branch 3 taken 134816 times.
✓ Branch 4 taken 1594361 times.
✓ Branch 5 taken 183525 times.
✓ Branch 6 taken 154161 times.
✓ Branch 7 taken 1440200 times.
2059774 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224 619574 return;
225
226 1440200 score = task_add_score(t, stage);
227
2/2
✓ Branch 1 taken 233654 times.
✓ Branch 2 taken 1206546 times.
1440200 if (task_has_target_score(t, stage, score)) {
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 233654 times.
233654 av_assert0(s);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 233654 times.
233654 av_assert0(stage == t->stage);
230 233654 add_task(s, t);
231 }
232 }
233
234 321938 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235 {
236
2/2
✓ Branch 0 taken 89230 times.
✓ Branch 1 taken 232708 times.
321938 if (atomic_fetch_sub(scheduled, 1) == 1) {
237 89230 ff_mutex_lock(&ft->lock);
238 89230 ff_cond_signal(&ft->cond);
239 89230 ff_mutex_unlock(&ft->lock);
240 }
241 321938 }
242
243 88284 static void progress_done(VVCProgressListener *_l, const int type)
244 {
245 88284 const ProgressListener *l = (ProgressListener *)_l;
246 88284 const VVCTask *t = l->task;
247 88284 VVCFrameThread *ft = t->fc->ft;
248
249 88284 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250 88284 sheduled_done(ft, &ft->nb_scheduled_listeners);
251 88284 }
252
253 83417 static void pixel_done(VVCProgressListener *l)
254 {
255 83417 progress_done(l, VVC_TASK_STAGE_INTER);
256 83417 }
257
258 4867 static void mv_done(VVCProgressListener *l)
259 {
260 4867 progress_done(l, VVC_TASK_STAGE_PARSE);
261 4867 }
262
263 88284 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264 {
265 88284 const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267 88284 l->task = t;
268 88284 l->s = s;
269 88284 l->l.vp = vp;
270 88284 l->l.y = y;
271
2/2
✓ Branch 0 taken 83417 times.
✓ Branch 1 taken 4867 times.
88284 l->l.progress_done = is_inter ? pixel_done : mv_done;
272
2/2
✓ Branch 0 taken 83417 times.
✓ Branch 1 taken 4867 times.
88284 if (is_inter)
273 83417 atomic_fetch_add(&t->target_inter_score, 1);
274 88284 }
275
276 88284 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278 {
279 88284 VVCFrameThread *ft = t->fc->ft;
280
281 88284 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282 88284 listener_init(l, t, s, vp, y);
283 88284 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284 88284 }
285
286 46713 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
287 {
288 46713 VVCFrameThread *ft = fc->ft;
289 46713 EntryPoint *ep = t->ep;
290 46713 const VVCSPS *sps = fc->ps.sps;
291
292
2/2
✓ Branch 0 taken 1436 times.
✓ Branch 1 taken 45277 times.
46713 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
293
2/2
✓ Branch 0 taken 212 times.
✓ Branch 1 taken 1224 times.
1436 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
294 212 EntryPoint *next = ep + 1;
295
4/4
✓ Branch 0 taken 159 times.
✓ Branch 1 taken 53 times.
✓ Branch 3 taken 153 times.
✓ Branch 4 taken 6 times.
212 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) {
296 153 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
297 153 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
298 }
299 }
300
4/4
✓ Branch 0 taken 1075 times.
✓ Branch 1 taken 361 times.
✓ Branch 3 taken 1071 times.
✓ Branch 4 taken 4 times.
1436 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
301 1071 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
302 }
303
304
2/2
✓ Branch 0 taken 44629 times.
✓ Branch 1 taken 2084 times.
46713 if (t->ctu_idx + 1 < t->ep->ctu_end) {
305 44629 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
306 44629 const int next_rx = next_rs % ft->ctu_width;
307 44629 const int next_ry = next_rs / ft->ctu_width;
308 44629 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
309 }
310 46713 }
311
312 46713 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
313 {
314 46713 const VVCSH *sh = &sc->sh;
315
316
2/2
✓ Branch 0 taken 40817 times.
✓ Branch 1 taken 5896 times.
46713 if (!IS_I(sh->r)) {
317 40817 CTU *ctu = fc->tab.ctus + rs;
318
2/2
✓ Branch 0 taken 81634 times.
✓ Branch 1 taken 40817 times.
122451 for (int lx = 0; lx < 2; lx++) {
319
2/2
✓ Branch 0 taken 166678 times.
✓ Branch 1 taken 81634 times.
248312 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
320 166678 int y = ctu->max_y[lx][i];
321 166678 VVCRefPic *refp = sc->rpl[lx].refs + i;
322 166678 VVCFrame *ref = refp->ref;
323
3/4
✓ Branch 0 taken 166678 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 83417 times.
✓ Branch 3 taken 83261 times.
166678 if (ref && y >= 0) {
324
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 83203 times.
83417 if (refp->is_scaled)
325 214 y = y * refp->scale[1] >> 14;
326 83417 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
327 }
328 }
329 }
330 }
331 46713 }
332
333 46713 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
334 {
335 46713 VVCFrameThread *ft = fc->ft;
336 46713 const int rs = ry * ft->ctu_width + rx;
337 46713 const int slice_idx = fc->tab.slice_idx[rs];
338 46713 VVCTask *t = ft->tasks + rs;
339 46713 const SliceContext *sc = fc->slices[slice_idx];
340
341 46713 schedule_next_parse(s, fc, sc, t);
342 46713 schedule_inter(s, fc, sc, t, rs);
343 46713 }
344
345 663805 static void task_stage_done(const VVCTask *t, VVCContext *s)
346 {
347 663805 VVCFrameContext *fc = t->fc;
348 663805 VVCFrameThread *ft = fc->ft;
349 663805 const VVCTaskStage stage = t->stage;
350
351 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
352
353 //this is a reserve map of ready_score, ordered by zigzag
354
2/2
✓ Branch 0 taken 73651 times.
✓ Branch 1 taken 590154 times.
663805 if (stage == VVC_TASK_STAGE_PARSE) {
355 73651 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS);
356 73651 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS);
357
8/8
✓ Branch 0 taken 67523 times.
✓ Branch 1 taken 6128 times.
✓ Branch 2 taken 61395 times.
✓ Branch 3 taken 6128 times.
✓ Branch 4 taken 54054 times.
✓ Branch 5 taken 7341 times.
✓ Branch 6 taken 7341 times.
✓ Branch 7 taken 46713 times.
73651 if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
358 26938 return;
359 46713 parse_task_done(s, fc, t->rx, t->ry);
360
2/2
✓ Branch 0 taken 73651 times.
✓ Branch 1 taken 516503 times.
590154 } else if (stage == VVC_TASK_STAGE_RECON) {
361 73651 ADD(-1, 1, VVC_TASK_STAGE_RECON);
362 73651 ADD( 1, 0, VVC_TASK_STAGE_RECON);
363 73651 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
364 73651 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
365 73651 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
366
2/2
✓ Branch 0 taken 73651 times.
✓ Branch 1 taken 442852 times.
516503 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
367 73651 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
368 73651 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
369
2/2
✓ Branch 0 taken 73651 times.
✓ Branch 1 taken 369201 times.
442852 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
370 73651 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
371 73651 ADD(-1, -1, VVC_TASK_STAGE_SAO);
372 73651 ADD( 0, -1, VVC_TASK_STAGE_SAO);
373 73651 ADD(-1, 0, VVC_TASK_STAGE_SAO);
374 73651 ADD( 1, -1, VVC_TASK_STAGE_SAO);
375 73651 ADD( 1, 0, VVC_TASK_STAGE_SAO);
376
2/2
✓ Branch 0 taken 73651 times.
✓ Branch 1 taken 295550 times.
369201 } else if (stage == VVC_TASK_STAGE_SAO) {
377 73651 ADD(-1, -1, VVC_TASK_STAGE_ALF);
378 73651 ADD( 0, -1, VVC_TASK_STAGE_ALF);
379 73651 ADD(-1, 0, VVC_TASK_STAGE_ALF);
380 73651 ADD( 1, -1, VVC_TASK_STAGE_ALF);
381 73651 ADD(-1, 1, VVC_TASK_STAGE_ALF);
382 73651 ADD( 1, 0, VVC_TASK_STAGE_ALF);
383 73651 ADD( 0, 1, VVC_TASK_STAGE_ALF);
384 73651 ADD( 1, 1, VVC_TASK_STAGE_ALF);
385 }
386 }
387
388 421363 static int task_is_stage_ready(VVCTask *t, int add)
389 {
390 421363 const VVCTaskStage stage = t->stage;
391 uint8_t score;
392
2/2
✓ Branch 0 taken 46713 times.
✓ Branch 1 taken 374650 times.
421363 if (stage > VVC_TASK_STAGE_ALF)
393 46713 return 0;
394 374650 score = task_get_score(t, stage) + add;
395 374650 return task_has_target_score(t, stage, score);
396 }
397
398 46713 static void check_colocation(VVCContext *s, VVCTask *t)
399 {
400 46713 const VVCFrameContext *fc = t->fc;
401
402
4/4
✓ Branch 0 taken 6000 times.
✓ Branch 1 taken 40713 times.
✓ Branch 2 taken 5420 times.
✓ Branch 3 taken 580 times.
46713 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
403 46133 VVCFrame *col = fc->ref->collocated_ref;
404 46133 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
405
4/4
✓ Branch 0 taken 40713 times.
✓ Branch 1 taken 5420 times.
✓ Branch 2 taken 4867 times.
✓ Branch 3 taken 35846 times.
46133 if (col && first_col) {
406 //we depend on bottom and right boundary, do not - 1 for y
407 4867 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
408 4867 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
409 4867 return;
410 }
411 }
412 41846 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
413 }
414
415 2084 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
416 {
417 2084 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
418 2084 VVCTask *t = ft->tasks + rs;
419
420 2084 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
421 2084 }
422
423 946 static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
424 {
425 946 VVCFrameContext *fc = lc->fc;
426 946 VVCFrameThread *ft = fc->ft;
427 946 const int ret = ff_vvc_per_frame_init(fc);
428
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 946 times.
946 if (ret < 0)
430 return ret;
431
432
2/2
✓ Branch 0 taken 1711 times.
✓ Branch 1 taken 946 times.
2657 for (int i = 0; i < fc->nb_slices; i++) {
433 1711 SliceContext *sc = fc->slices[i];
434
2/2
✓ Branch 0 taken 2084 times.
✓ Branch 1 taken 1711 times.
3795 for (int j = 0; j < sc->nb_eps; j++) {
435 2084 EntryPoint *ep = sc->eps + j;
436
2/2
✓ Branch 0 taken 46713 times.
✓ Branch 1 taken 2084 times.
48797 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
437 46713 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
438 46713 VVCTask *t = ft->tasks + rs;
439 46713 check_colocation(s, t);
440 }
441 2084 submit_entry_point(s, ft, sc, ep);
442 }
443 }
444 946 return 0;
445 }
446
447 93426 static void report_frame_progress(VVCFrameContext *fc,
448 const int ry, const VVCProgress idx)
449 {
450 93426 VVCFrameThread *ft = fc->ft;
451 93426 const int ctu_size = ft->ctu_size;
452 int old;
453
454
2/2
✓ Branch 0 taken 8472 times.
✓ Branch 1 taken 84954 times.
93426 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
455 int y;
456 8472 ff_mutex_lock(&ft->lock);
457 8472 y = old = ft->row_progress[idx];
458
4/4
✓ Branch 0 taken 15052 times.
✓ Branch 1 taken 1892 times.
✓ Branch 2 taken 8472 times.
✓ Branch 3 taken 6580 times.
16944 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
459 8472 y++;
460
2/2
✓ Branch 0 taken 8399 times.
✓ Branch 1 taken 73 times.
8472 if (old != y)
461 8399 ft->row_progress[idx] = y;
462 // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
463 // We need to unlock ft->lock first
464 8472 ff_mutex_unlock(&ft->lock);
465
466
2/2
✓ Branch 0 taken 8399 times.
✓ Branch 1 taken 73 times.
8472 if (old != y) {
467
2/2
✓ Branch 0 taken 6507 times.
✓ Branch 1 taken 1892 times.
8399 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
468 8399 ff_vvc_report_progress(fc->ref, idx, progress);
469 }
470 }
471 93426 }
472
473 46713 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
474 {
475 int ret;
476 46713 VVCFrameContext *fc = lc->fc;
477 46713 const int rs = t->rs;
478 46713 const CTU *ctu = fc->tab.ctus + rs;
479
480 46713 lc->ep = t->ep;
481
482 46713 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
483
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46713 times.
46713 if (ret < 0)
484 return ret;
485
486
2/2
✓ Branch 0 taken 26265 times.
✓ Branch 1 taken 20448 times.
46713 if (!ctu->has_dmvr)
487 26265 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
488
489 46713 return 0;
490 }
491
492 46713 static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
493 {
494
2/2
✓ Branch 0 taken 46709 times.
✓ Branch 1 taken 4 times.
46713 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
495 46709 ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
496
497 46713 return 0;
498 }
499
500 46713 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
501 {
502 46713 VVCFrameContext *fc = lc->fc;
503 46713 const CTU *ctu = fc->tab.ctus + t->rs;
504 int ret;
505
506 46713 ret = ff_vvc_predict_inter(lc, t->rs);
507
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46713 times.
46713 if (ret < 0)
508 return ret;
509
510
2/2
✓ Branch 0 taken 20448 times.
✓ Branch 1 taken 26265 times.
46713 if (ctu->has_dmvr)
511 20448 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
512
513 46713 return 0;
514 }
515
516 46713 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
517 {
518 46713 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
519 }
520
521 46713 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522 {
523 46713 VVCFrameContext *fc = lc->fc;
524 46713 VVCFrameThread *ft = fc->ft;
525 46713 const int ctu_size = ft->ctu_size;
526 46713 const int x0 = t->rx * ctu_size;
527 46713 const int y0 = t->ry * ctu_size;
528
529 46713 ff_vvc_lmcs_filter(lc, x0, y0);
530
531 46713 return 0;
532 }
533
534 46713 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
535 {
536 46713 VVCFrameContext *fc = lc->fc;
537 46713 VVCFrameThread *ft = fc->ft;
538 46713 const int ctb_size = ft->ctu_size;
539 46713 const int x0 = t->rx * ctb_size;
540 46713 const int y0 = t->ry * ctb_size;
541
542
2/2
✓ Branch 0 taken 46709 times.
✓ Branch 1 taken 4 times.
46713 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
543 46709 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
544 46709 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
545 }
546
547 46713 return 0;
548 }
549
550 46713 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
551 {
552 46713 VVCFrameContext *fc = lc->fc;
553 46713 VVCFrameThread *ft = fc->ft;
554 46713 const int ctb_size = ft->ctu_size;
555 46713 const int x0 = t->rx * ctb_size;
556 46713 const int y0 = t->ry * ctb_size;
557
558
2/2
✓ Branch 0 taken 46709 times.
✓ Branch 1 taken 4 times.
46713 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
559 46709 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
560 46709 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
561 }
562
2/2
✓ Branch 0 taken 46293 times.
✓ Branch 1 taken 420 times.
46713 if (fc->ps.sps->r->sps_sao_enabled_flag)
563 46293 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
564
565 46713 return 0;
566 }
567
568 46713 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
569 {
570 46713 VVCFrameContext *fc = lc->fc;
571 46713 VVCFrameThread *ft = fc->ft;
572 46713 const int ctb_size = ft->ctu_size;
573 46713 const int x0 = t->rx * ctb_size;
574 46713 const int y0 = t->ry * ctb_size;
575
576
2/2
✓ Branch 0 taken 46293 times.
✓ Branch 1 taken 420 times.
46713 if (fc->ps.sps->r->sps_sao_enabled_flag) {
577 46293 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
578 46293 ff_vvc_sao_filter(lc, x0, y0);
579 }
580
581
2/2
✓ Branch 0 taken 36675 times.
✓ Branch 1 taken 10038 times.
46713 if (fc->ps.sps->r->sps_alf_enabled_flag)
582 36675 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
583
584 46713 return 0;
585 }
586
587 46713 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
588 {
589 46713 VVCFrameContext *fc = lc->fc;
590 46713 VVCFrameThread *ft = fc->ft;
591 46713 const int ctu_size = ft->ctu_size;
592 46713 const int x0 = t->rx * ctu_size;
593 46713 const int y0 = t->ry * ctu_size;
594
595
2/2
✓ Branch 0 taken 36675 times.
✓ Branch 1 taken 10038 times.
46713 if (fc->ps.sps->r->sps_alf_enabled_flag) {
596 36675 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
597 36675 ff_vvc_alf_filter(lc, x0, y0);
598 }
599 46713 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
600
601 46713 return 0;
602 }
603
604 const static char* task_name[] = {
605 "INIT",
606 "P",
607 "B",
608 "I",
609 "R",
610 "L",
611 "V",
612 "H",
613 "S",
614 "A"
615 };
616
617 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
618
619 421363 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
620 {
621 int ret;
622 421363 VVCFrameContext *fc = t->fc;
623 421363 VVCFrameThread *ft = fc->ft;
624 421363 const VVCTaskStage stage = t->stage;
625 static const run_func run[] = {
626 run_init,
627 run_parse,
628 run_deblock_bs,
629 run_inter,
630 run_recon,
631 run_lmcs,
632 run_deblock_v,
633 run_deblock_h,
634 run_sao,
635 run_alf,
636 };
637
638 ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
639
640 421363 lc->sc = t->sc;
641
642
1/2
✓ Branch 0 taken 421363 times.
✗ Branch 1 not taken.
421363 if (!atomic_load(&ft->ret)) {
643
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 421363 times.
421363 if ((ret = run[stage](s, lc, t)) < 0) {
644 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
645 intptr_t zero = 0;
646 #else
647 int zero = 0;
648 #endif
649 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
650 av_log(s->avctx, AV_LOG_ERROR,
651 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
652 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
653 }
654
1/2
✓ Branch 0 taken 421363 times.
✗ Branch 1 not taken.
421363 if (!ret)
655 421363 task_stage_done(t, s);
656 }
657 421363 return;
658 }
659
660 233654 static int task_run(FFTask *_t, void *local_context, void *user_data)
661 {
662 233654 VVCTask *t = (VVCTask*)_t;
663 233654 VVCContext *s = (VVCContext *)user_data;
664 233654 VVCLocalContext *lc = local_context;
665 233654 VVCFrameThread *ft = t->fc->ft;
666
667 233654 lc->fc = t->fc;
668
669 do {
670 421363 task_run_stage(t, s, lc);
671 421363 t->stage++;
672
2/2
✓ Branch 1 taken 187709 times.
✓ Branch 2 taken 233654 times.
421363 } while (task_is_stage_ready(t, 1));
673
674
2/2
✓ Branch 0 taken 186941 times.
✓ Branch 1 taken 46713 times.
233654 if (t->stage != VVC_TASK_STAGE_LAST)
675 186941 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
676
677 233654 sheduled_done(ft, &ft->nb_scheduled_tasks);
678
679 233654 return 0;
680 }
681
682 88 FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
683 {
684 88 FFTaskCallbacks callbacks = {
685 s,
686 sizeof(VVCLocalContext),
687 PRIORITY_LOWEST + 1,
688 task_run,
689 };
690 88 return ff_executor_alloc(&callbacks, thread_count);
691 }
692
693 88 void ff_vvc_executor_free(FFExecutor **e)
694 {
695 88 ff_executor_free(e);
696 88 }
697
698 956 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
699 {
700 956 VVCFrameThread *ft = fc->ft;
701
702
2/2
✓ Branch 0 taken 704 times.
✓ Branch 1 taken 252 times.
956 if (!ft)
703 704 return;
704
705 252 ff_mutex_destroy(&ft->lock);
706 252 ff_cond_destroy(&ft->cond);
707 252 av_freep(&ft->rows);
708 252 av_freep(&ft->tasks);
709 252 av_freep(&ft);
710 }
711
712 946 static void frame_thread_init_score(VVCFrameContext *fc)
713 {
714 946 const VVCFrameThread *ft = fc->ft;
715 VVCTask task;
716
717 946 task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
718
719
2/2
✓ Branch 0 taken 8514 times.
✓ Branch 1 taken 946 times.
9460 for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
720 8514 task.stage = i;
721
722
2/2
✓ Branch 0 taken 83097 times.
✓ Branch 1 taken 8514 times.
91611 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
723 83097 task.ry = -1; //top
724 83097 task_stage_done(&task, NULL);
725 83097 task.ry = ft->ctu_height; //bottom
726 83097 task_stage_done(&task, NULL);
727 }
728
729
2/2
✓ Branch 0 taken 38124 times.
✓ Branch 1 taken 8514 times.
46638 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
730 38124 task.rx = -1; //left
731 38124 task_stage_done(&task, NULL);
732 38124 task.rx = ft->ctu_width; //right
733 38124 task_stage_done(&task, NULL);
734 }
735 }
736 946 }
737
738 946 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
739 {
740 946 const VVCSPS *sps = fc->ps.sps;
741 946 const VVCPPS *pps = fc->ps.pps;
742 946 VVCFrameThread *ft = fc->ft;
743 int ret;
744
745
3/4
✓ Branch 0 taken 694 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 694 times.
✗ Branch 3 not taken.
946 if (!ft || ft->ctu_width != pps->ctb_width ||
746
1/2
✓ Branch 0 taken 694 times.
✗ Branch 1 not taken.
694 ft->ctu_height != pps->ctb_height ||
747
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 694 times.
694 ft->ctu_size != sps->ctb_size_y) {
748
749 252 ff_vvc_frame_thread_free(fc);
750 252 ft = av_calloc(1, sizeof(*fc->ft));
751
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 252 times.
252 if (!ft)
752 return AVERROR(ENOMEM);
753
754 252 ft->ctu_width = fc->ps.pps->ctb_width;
755 252 ft->ctu_height = fc->ps.pps->ctb_height;
756 252 ft->ctu_count = fc->ps.pps->ctb_count;
757 252 ft->ctu_size = fc->ps.sps->ctb_size_y;
758
759 252 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
760
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 252 times.
252 if (!ft->rows)
761 goto fail;
762
763 252 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
764
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 252 times.
252 if (!ft->tasks)
765 goto fail;
766
767
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if ((ret = ff_cond_init(&ft->cond, NULL)))
768 goto fail;
769
770
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
771 ff_cond_destroy(&ft->cond);
772 goto fail;
773 }
774 }
775 946 fc->ft = ft;
776 946 ft->ret = 0;
777
2/2
✓ Branch 0 taken 4236 times.
✓ Branch 1 taken 946 times.
5182 for (int y = 0; y < ft->ctu_height; y++) {
778 4236 VVCRowThread *row = ft->rows + y;
779 4236 memset(row->col_progress, 0, sizeof(row->col_progress));
780 }
781
782
2/2
✓ Branch 0 taken 46713 times.
✓ Branch 1 taken 946 times.
47659 for (int rs = 0; rs < ft->ctu_count; rs++) {
783 46713 VVCTask *t = ft->tasks + rs;
784 46713 task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
785 }
786
787 946 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
788
789 946 frame_thread_init_score(fc);
790
791 946 return 0;
792
793 fail:
794 if (ft) {
795 av_freep(&ft->rows);
796 av_freep(&ft->tasks);
797 av_freep(&ft);
798 }
799
800 return AVERROR(ENOMEM);
801 }
802
803 946 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
804 {
805 946 VVCFrameThread *ft = fc->ft;
806
807
2/2
✓ Branch 0 taken 1711 times.
✓ Branch 1 taken 946 times.
2657 for (int i = 0; i < fc->nb_slices; i++) {
808 1711 SliceContext *sc = fc->slices[i];
809
2/2
✓ Branch 0 taken 2084 times.
✓ Branch 1 taken 1711 times.
3795 for (int j = 0; j < sc->nb_eps; j++) {
810 2084 EntryPoint *ep = sc->eps + j;
811
2/2
✓ Branch 0 taken 46713 times.
✓ Branch 1 taken 2084 times.
48797 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
812 46713 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
813 46713 VVCTask *t = ft->tasks + rs;
814 46713 const int ret = task_init_parse(t, sc, ep, k);
815
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46713 times.
46713 if (ret < 0)
816 return ret;
817 }
818 }
819 }
820
2/2
✓ Branch 0 taken 46713 times.
✓ Branch 1 taken 946 times.
47659 for (int rs = 0; rs < ft->ctu_count; rs++) {
821 46713 const VVCTask *t = ft->tasks + rs;
822
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46713 times.
46713 if (!t->sc) {
823 av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry);
824 return AVERROR_INVALIDDATA;
825 }
826 }
827 946 frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
828
829 946 return 0;
830 }
831
832 946 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
833 {
834 946 VVCFrameThread *ft = fc->ft;
835
836 946 ff_mutex_lock(&ft->lock);
837
838
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 946 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 946 times.
946 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
839 ff_cond_wait(&ft->cond, &ft->lock);
840
841 946 ff_mutex_unlock(&ft->lock);
842 946 ff_vvc_report_frame_finished(fc->ref);
843
844 ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order);
845 946 return ft->ret;
846 }
847