FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2024-11-20 23:03:26
Exec Total Coverage
Lines: 405 427 94.8%
Functions: 41 41 100.0%
Branches: 161 186 86.6%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavcodec/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
44 VVC_TASK_STAGE_PARSE,
45 VVC_TASK_STAGE_DEBLOCK_BS,
46 VVC_TASK_STAGE_INTER,
47 VVC_TASK_STAGE_RECON,
48 VVC_TASK_STAGE_LMCS,
49 VVC_TASK_STAGE_DEBLOCK_V,
50 VVC_TASK_STAGE_DEBLOCK_H,
51 VVC_TASK_STAGE_SAO,
52 VVC_TASK_STAGE_ALF,
53 VVC_TASK_STAGE_LAST
54 } VVCTaskStage;
55
56 typedef struct VVCTask {
57 union {
58 struct VVCTask *next; //for executor debug only
59 FFTask task;
60 } u;
61
62 VVCTaskStage stage;
63
64 // ctu x, y, and raster scan order
65 int rx, ry, rs;
66 VVCFrameContext *fc;
67
68 ProgressListener col_listener;
69 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71 // for parse task only
72 SliceContext *sc;
73 EntryPoint *ep;
74 int ctu_idx; //ctu idx in the current slice
75
76 // tasks with target scores met are ready for scheduling
77 atomic_uchar score[VVC_TASK_STAGE_LAST];
78 atomic_uchar target_inter_score;
79 } VVCTask;
80
81 typedef struct VVCRowThread {
82 atomic_int col_progress[VVC_PROGRESS_LAST];
83 } VVCRowThread;
84
85 typedef struct VVCFrameThread {
86 // error return for tasks
87 atomic_int ret;
88
89 VVCRowThread *rows;
90 VVCTask *tasks;
91
92 int ctu_size;
93 int ctu_width;
94 int ctu_height;
95 int ctu_count;
96
97 //protected by lock
98 atomic_int nb_scheduled_tasks;
99 atomic_int nb_scheduled_listeners;
100
101 int row_progress[VVC_PROGRESS_LAST];
102
103 AVMutex lock;
104 AVCond cond;
105 } VVCFrameThread;
106
107 #define PRIORITY_LOWEST 2
108 228253 static void add_task(VVCContext *s, VVCTask *t)
109 {
110 228253 VVCFrameThread *ft = t->fc->ft;
111 228253 FFTask *task = &t->u.task;
112 228253 const int priorities[] = {
113 0, // VVC_TASK_STAGE_INIT,
114 0, // VVC_TASK_STAGE_PARSE,
115 1, // VVC_TASK_STAGE_DEBLOCK_BS
116 // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117 // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118 PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER
119 1, // VVC_TASK_STAGE_RECON,
120 1, // VVC_TASK_STAGE_LMCS,
121 1, // VVC_TASK_STAGE_DEBLOCK_V,
122 1, // VVC_TASK_STAGE_DEBLOCK_H,
123 1, // VVC_TASK_STAGE_SAO,
124 1, // VVC_TASK_STAGE_ALF,
125 };
126
127 228253 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128 228253 task->priority = priorities[t->stage];
129 228253 ff_executor_execute(s->executor, task);
130 228253 }
131
132 46728 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133 {
134 46728 memset(t, 0, sizeof(*t));
135 46728 t->stage = stage;
136 46728 t->fc = fc;
137 46728 t->rx = rx;
138 46728 t->ry = ry;
139 46728 t->rs = ry * fc->ft->ctu_width + rx;
140
2/2
✓ Branch 0 taken 467280 times.
✓ Branch 1 taken 46728 times.
514008 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141 467280 atomic_store(t->score + i, 0);
142 46728 atomic_store(&t->target_inter_score, 0);
143 46728 }
144
145 45792 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146 {
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
45792 if (t->sc) {
148 // the task already inited, error bitstream
149 return AVERROR_INVALIDDATA;
150 }
151 45792 t->sc = sc;
152 45792 t->ep = ep;
153 45792 t->ctu_idx = ctu_idx;
154
155 45792 return 0;
156 }
157
158 1407224 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159 {
160 1407224 return atomic_fetch_add(&t->score[stage], 1) + 1;
161 }
162
163 367272 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164 {
165 367272 return atomic_load(&t->score[stage]);
166 }
167
168 //first row in tile or slice
169 5202 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170 {
171 5202 const VVCFrameThread *ft = fc->ft;
172 5202 const VVCPPS *pps = fc->ps.pps;
173
174
2/2
✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 765 times.
5202 if (ry != pps->ctb_to_row_bd[ry]) {
175 4437 const int rs = ry * ft->ctu_width + rx;
176 4437 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177 }
178 765 return 1;
179 }
180
181 1774496 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182 {
183 // l:left, r:right, t: top, b: bottom
184 static const uint8_t target_score[] =
185 {
186 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187 0, //VVC_TASK_STAGE_INTER, not used
188 2, //VVC_TASK_STAGE_RECON, need l + rt recon
189 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
190 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
193 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
194 };
195 1774496 uint8_t target = 0;
196 1774496 VVCFrameContext *fc = t->fc;
197
198
2/2
✓ Branch 0 taken 936 times.
✓ Branch 1 taken 1773560 times.
1774496 if (stage == VVC_TASK_STAGE_INIT)
199 936 return 1;
200
201
2/2
✓ Branch 0 taken 93591 times.
✓ Branch 1 taken 1679969 times.
1773560 if (stage == VVC_TASK_STAGE_PARSE) {
202 93591 const H266RawSPS *rsps = fc->ps.sps->r;
203
4/4
✓ Branch 0 taken 3978 times.
✓ Branch 1 taken 89613 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 765 times.
93591 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204 93591 const int no_prev_stage = t->rs > 0;
205 93591 target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
206
2/2
✓ Branch 0 taken 123748 times.
✓ Branch 1 taken 1556221 times.
1679969 } else if (stage == VVC_TASK_STAGE_INTER) {
207 123748 target = atomic_load(&t->target_inter_score);
208 } else {
209 1556221 target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210 }
211
212 //+1 for previous stage
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1773560 times.
1773560 av_assert0(score <= target + 1);
214 1773560 return score == target + 1;
215 }
216
217 2015850 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218 const int rx, const int ry, const VVCTaskStage stage)
219 {
220 2015850 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221 uint8_t score;
222
223
8/8
✓ Branch 0 taken 1872210 times.
✓ Branch 1 taken 143640 times.
✓ Branch 2 taken 1740540 times.
✓ Branch 3 taken 131670 times.
✓ Branch 4 taken 1559390 times.
✓ Branch 5 taken 181150 times.
✓ Branch 6 taken 152166 times.
✓ Branch 7 taken 1407224 times.
2015850 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224 608626 return;
225
226 1407224 score = task_add_score(t, stage);
227
2/2
✓ Branch 1 taken 228253 times.
✓ Branch 2 taken 1178971 times.
1407224 if (task_has_target_score(t, stage, score)) {
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 228253 times.
228253 av_assert0(s);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 228253 times.
228253 av_assert0(stage == t->stage);
230 228253 add_task(s, t);
231 }
232 }
233
234 310574 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235 {
236
2/2
✓ Branch 0 taken 83257 times.
✓ Branch 1 taken 227317 times.
310574 if (atomic_fetch_sub(scheduled, 1) == 1) {
237 83257 ff_mutex_lock(&ft->lock);
238 83257 ff_cond_signal(&ft->cond);
239 83257 ff_mutex_unlock(&ft->lock);
240 }
241 310574 }
242
243 82321 static void progress_done(VVCProgressListener *_l, const int type)
244 {
245 82321 const ProgressListener *l = (ProgressListener *)_l;
246 82321 const VVCTask *t = l->task;
247 82321 VVCFrameThread *ft = t->fc->ft;
248
249 82321 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250 82321 sheduled_done(ft, &ft->nb_scheduled_listeners);
251 82321 }
252
253 77956 static void pixel_done(VVCProgressListener *l)
254 {
255 77956 progress_done(l, VVC_TASK_STAGE_INTER);
256 77956 }
257
258 4365 static void mv_done(VVCProgressListener *l)
259 {
260 4365 progress_done(l, VVC_TASK_STAGE_PARSE);
261 4365 }
262
263 82321 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264 {
265 82321 const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267 82321 l->task = t;
268 82321 l->s = s;
269 82321 l->l.vp = vp;
270 82321 l->l.y = y;
271
2/2
✓ Branch 0 taken 77956 times.
✓ Branch 1 taken 4365 times.
82321 l->l.progress_done = is_inter ? pixel_done : mv_done;
272
2/2
✓ Branch 0 taken 77956 times.
✓ Branch 1 taken 4365 times.
82321 if (is_inter)
273 77956 atomic_fetch_add(&t->target_inter_score, 1);
274 82321 }
275
276 82321 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278 {
279 82321 VVCFrameThread *ft = t->fc->ft;
280
281 82321 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282 82321 listener_init(l, t, s, vp, y);
283 82321 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284 82321 }
285
286 45792 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
287 {
288 45792 VVCFrameThread *ft = fc->ft;
289 45792 EntryPoint *ep = t->ep;
290 45792 const VVCSPS *sps = fc->ps.sps;
291
292
2/2
✓ Branch 0 taken 1428 times.
✓ Branch 1 taken 44364 times.
45792 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
293
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 1224 times.
1428 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
294 204 EntryPoint *next = ep + 1;
295
3/4
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 51 times.
✓ Branch 3 taken 153 times.
✗ Branch 4 not taken.
204 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) {
296 153 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
297 153 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
298 }
299 }
300
3/4
✓ Branch 0 taken 1071 times.
✓ Branch 1 taken 357 times.
✓ Branch 3 taken 1071 times.
✗ Branch 4 not taken.
1428 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
301 1071 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
302 }
303
304
2/2
✓ Branch 0 taken 43766 times.
✓ Branch 1 taken 2026 times.
45792 if (t->ctu_idx + 1 < t->ep->ctu_end) {
305 43766 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
306 43766 const int next_rx = next_rs % ft->ctu_width;
307 43766 const int next_ry = next_rs / ft->ctu_width;
308 43766 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
309 }
310 45792 }
311
312 45792 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
313 {
314 45792 const VVCSH *sh = &sc->sh;
315
316
2/2
✓ Branch 0 taken 40006 times.
✓ Branch 1 taken 5786 times.
45792 if (!IS_I(sh->r)) {
317 40006 CTU *ctu = fc->tab.ctus + rs;
318
2/2
✓ Branch 0 taken 80012 times.
✓ Branch 1 taken 40006 times.
120018 for (int lx = 0; lx < 2; lx++) {
319
2/2
✓ Branch 0 taken 156332 times.
✓ Branch 1 taken 80012 times.
236344 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
320 156332 int y = ctu->max_y[lx][i];
321 156332 VVCRefPic *refp = sc->rpl[lx].refs + i;
322 156332 VVCFrame *ref = refp->ref;
323
3/4
✓ Branch 0 taken 156332 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 77956 times.
✓ Branch 3 taken 78376 times.
156332 if (ref && y >= 0) {
324
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 77742 times.
77956 if (refp->is_scaled)
325 214 y = y * refp->scale[1] >> 14;
326 77956 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
327 }
328 }
329 }
330 }
331 45792 }
332
333 45792 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
334 {
335 45792 VVCFrameThread *ft = fc->ft;
336 45792 const int rs = ry * ft->ctu_width + rx;
337 45792 const int slice_idx = fc->tab.slice_idx[rs];
338 45792 VVCTask *t = ft->tasks + rs;
339 45792 const SliceContext *sc = fc->slices[slice_idx];
340
341 45792 schedule_next_parse(s, fc, sc, t);
342 45792 schedule_inter(s, fc, sc, t, rs);
343 45792 }
344
345 651222 static void task_stage_done(const VVCTask *t, VVCContext *s)
346 {
347 651222 VVCFrameContext *fc = t->fc;
348 651222 VVCFrameThread *ft = fc->ft;
349 651222 const VVCTaskStage stage = t->stage;
350
351 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
352
353 //this is a reserve map of ready_score, ordered by zigzag
354
2/2
✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 578968 times.
651222 if (stage == VVC_TASK_STAGE_PARSE) {
355 72254 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS);
356 72254 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS);
357
8/8
✓ Branch 0 taken 66269 times.
✓ Branch 1 taken 5985 times.
✓ Branch 2 taken 60284 times.
✓ Branch 3 taken 5985 times.
✓ Branch 4 taken 53038 times.
✓ Branch 5 taken 7246 times.
✓ Branch 6 taken 7246 times.
✓ Branch 7 taken 45792 times.
72254 if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
358 26462 return;
359 45792 parse_task_done(s, fc, t->rx, t->ry);
360
2/2
✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 506714 times.
578968 } else if (stage == VVC_TASK_STAGE_RECON) {
361 72254 ADD(-1, 1, VVC_TASK_STAGE_RECON);
362 72254 ADD( 1, 0, VVC_TASK_STAGE_RECON);
363 72254 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
364 72254 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
365 72254 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
366
2/2
✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 434460 times.
506714 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
367 72254 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
368 72254 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
369
2/2
✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 362206 times.
434460 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
370 72254 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
371 72254 ADD(-1, -1, VVC_TASK_STAGE_SAO);
372 72254 ADD( 0, -1, VVC_TASK_STAGE_SAO);
373 72254 ADD(-1, 0, VVC_TASK_STAGE_SAO);
374 72254 ADD( 1, -1, VVC_TASK_STAGE_SAO);
375 72254 ADD( 1, 0, VVC_TASK_STAGE_SAO);
376
2/2
✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 289952 times.
362206 } else if (stage == VVC_TASK_STAGE_SAO) {
377 72254 ADD(-1, -1, VVC_TASK_STAGE_ALF);
378 72254 ADD( 0, -1, VVC_TASK_STAGE_ALF);
379 72254 ADD(-1, 0, VVC_TASK_STAGE_ALF);
380 72254 ADD( 1, -1, VVC_TASK_STAGE_ALF);
381 72254 ADD(-1, 1, VVC_TASK_STAGE_ALF);
382 72254 ADD( 1, 0, VVC_TASK_STAGE_ALF);
383 72254 ADD( 0, 1, VVC_TASK_STAGE_ALF);
384 72254 ADD( 1, 1, VVC_TASK_STAGE_ALF);
385 }
386 }
387
388 413064 static int task_is_stage_ready(VVCTask *t, int add)
389 {
390 413064 const VVCTaskStage stage = t->stage;
391 uint8_t score;
392
2/2
✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 367272 times.
413064 if (stage > VVC_TASK_STAGE_ALF)
393 45792 return 0;
394 367272 score = task_get_score(t, stage) + add;
395 367272 return task_has_target_score(t, stage, score);
396 }
397
398 45792 static void check_colocation(VVCContext *s, VVCTask *t)
399 {
400 45792 const VVCFrameContext *fc = t->fc;
401
402
4/4
✓ Branch 0 taken 5890 times.
✓ Branch 1 taken 39902 times.
✓ Branch 2 taken 5310 times.
✓ Branch 3 taken 580 times.
45792 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
403 45212 VVCFrame *col = fc->ref->collocated_ref;
404 45212 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
405
4/4
✓ Branch 0 taken 39902 times.
✓ Branch 1 taken 5310 times.
✓ Branch 2 taken 4365 times.
✓ Branch 3 taken 35537 times.
45212 if (col && first_col) {
406 //we depend on bottom and right boundary, do not - 1 for y
407 4365 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
408 4365 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
409 4365 return;
410 }
411 }
412 41427 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
413 }
414
415 2026 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
416 {
417 2026 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
418 2026 VVCTask *t = ft->tasks + rs;
419
420 2026 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
421 2026 }
422
423 936 static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
424 {
425 936 VVCFrameContext *fc = lc->fc;
426 936 VVCFrameThread *ft = fc->ft;
427 936 const int ret = ff_vvc_per_frame_init(fc);
428
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
936 if (ret < 0)
430 return ret;
431
432
2/2
✓ Branch 0 taken 1659 times.
✓ Branch 1 taken 936 times.
2595 for (int i = 0; i < fc->nb_slices; i++) {
433 1659 SliceContext *sc = fc->slices[i];
434
2/2
✓ Branch 0 taken 2026 times.
✓ Branch 1 taken 1659 times.
3685 for (int j = 0; j < sc->nb_eps; j++) {
435 2026 EntryPoint *ep = sc->eps + j;
436
2/2
✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 2026 times.
47818 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
437 45792 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
438 45792 VVCTask *t = ft->tasks + rs;
439 45792 check_colocation(s, t);
440 }
441 2026 submit_entry_point(s, ft, sc, ep);
442 }
443 }
444 936 return 0;
445 }
446
447 91584 static void report_frame_progress(VVCFrameContext *fc,
448 const int ry, const VVCProgress idx)
449 {
450 91584 VVCFrameThread *ft = fc->ft;
451 91584 const int ctu_size = ft->ctu_size;
452 int old;
453
454
2/2
✓ Branch 0 taken 8226 times.
✓ Branch 1 taken 83358 times.
91584 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
455 int y;
456 8226 ff_mutex_lock(&ft->lock);
457 8226 y = old = ft->row_progress[idx];
458
4/4
✓ Branch 0 taken 14580 times.
✓ Branch 1 taken 1872 times.
✓ Branch 2 taken 8226 times.
✓ Branch 3 taken 6354 times.
16452 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
459 8226 y++;
460
2/2
✓ Branch 0 taken 8153 times.
✓ Branch 1 taken 73 times.
8226 if (old != y)
461 8153 ft->row_progress[idx] = y;
462 // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
463 // We need to unlock ft->lock first
464 8226 ff_mutex_unlock(&ft->lock);
465
466
2/2
✓ Branch 0 taken 8153 times.
✓ Branch 1 taken 73 times.
8226 if (old != y) {
467
2/2
✓ Branch 0 taken 6281 times.
✓ Branch 1 taken 1872 times.
8153 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
468 8153 ff_vvc_report_progress(fc->ref, idx, progress);
469 }
470 }
471 91584 }
472
473 45792 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
474 {
475 int ret;
476 45792 VVCFrameContext *fc = lc->fc;
477 45792 const int rs = t->rs;
478 45792 const CTU *ctu = fc->tab.ctus + rs;
479
480 45792 lc->ep = t->ep;
481
482 45792 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
483
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
45792 if (ret < 0)
484 return ret;
485
486
2/2
✓ Branch 0 taken 25273 times.
✓ Branch 1 taken 20519 times.
45792 if (!ctu->has_dmvr)
487 25273 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
488
489 45792 return 0;
490 }
491
492 45792 static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
493 {
494
2/2
✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
45792 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
495 45788 ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
496
497 45792 return 0;
498 }
499
500 45792 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
501 {
502 45792 VVCFrameContext *fc = lc->fc;
503 45792 const CTU *ctu = fc->tab.ctus + t->rs;
504 int ret;
505
506 45792 ret = ff_vvc_predict_inter(lc, t->rs);
507
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
45792 if (ret < 0)
508 return ret;
509
510
2/2
✓ Branch 0 taken 20519 times.
✓ Branch 1 taken 25273 times.
45792 if (ctu->has_dmvr)
511 20519 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
512
513 45792 return 0;
514 }
515
516 45792 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
517 {
518 45792 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
519 }
520
521 45792 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522 {
523 45792 VVCFrameContext *fc = lc->fc;
524 45792 VVCFrameThread *ft = fc->ft;
525 45792 const int ctu_size = ft->ctu_size;
526 45792 const int x0 = t->rx * ctu_size;
527 45792 const int y0 = t->ry * ctu_size;
528
529 45792 ff_vvc_lmcs_filter(lc, x0, y0);
530
531 45792 return 0;
532 }
533
534 45792 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
535 {
536 45792 VVCFrameContext *fc = lc->fc;
537 45792 VVCFrameThread *ft = fc->ft;
538 45792 const int ctb_size = ft->ctu_size;
539 45792 const int x0 = t->rx * ctb_size;
540 45792 const int y0 = t->ry * ctb_size;
541
542
2/2
✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
45792 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
543 45788 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
544 45788 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
545 }
546
547 45792 return 0;
548 }
549
550 45792 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
551 {
552 45792 VVCFrameContext *fc = lc->fc;
553 45792 VVCFrameThread *ft = fc->ft;
554 45792 const int ctb_size = ft->ctu_size;
555 45792 const int x0 = t->rx * ctb_size;
556 45792 const int y0 = t->ry * ctb_size;
557
558
2/2
✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
45792 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
559 45788 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
560 45788 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
561 }
562
2/2
✓ Branch 0 taken 45372 times.
✓ Branch 1 taken 420 times.
45792 if (fc->ps.sps->r->sps_sao_enabled_flag)
563 45372 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
564
565 45792 return 0;
566 }
567
568 45792 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
569 {
570 45792 VVCFrameContext *fc = lc->fc;
571 45792 VVCFrameThread *ft = fc->ft;
572 45792 const int ctb_size = ft->ctu_size;
573 45792 const int x0 = t->rx * ctb_size;
574 45792 const int y0 = t->ry * ctb_size;
575
576
2/2
✓ Branch 0 taken 45372 times.
✓ Branch 1 taken 420 times.
45792 if (fc->ps.sps->r->sps_sao_enabled_flag) {
577 45372 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
578 45372 ff_vvc_sao_filter(lc, x0, y0);
579 }
580
581
2/2
✓ Branch 0 taken 35754 times.
✓ Branch 1 taken 10038 times.
45792 if (fc->ps.sps->r->sps_alf_enabled_flag)
582 35754 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
583
584 45792 return 0;
585 }
586
587 45792 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
588 {
589 45792 VVCFrameContext *fc = lc->fc;
590 45792 VVCFrameThread *ft = fc->ft;
591 45792 const int ctu_size = ft->ctu_size;
592 45792 const int x0 = t->rx * ctu_size;
593 45792 const int y0 = t->ry * ctu_size;
594
595
2/2
✓ Branch 0 taken 35754 times.
✓ Branch 1 taken 10038 times.
45792 if (fc->ps.sps->r->sps_alf_enabled_flag) {
596 35754 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
597 35754 ff_vvc_alf_filter(lc, x0, y0);
598 }
599 45792 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
600
601 45792 return 0;
602 }
603
604 #define VVC_THREAD_DEBUG
605 #ifdef VVC_THREAD_DEBUG
606 const static char* task_name[] = {
607 "INIT",
608 "P",
609 "B",
610 "I",
611 "R",
612 "L",
613 "V",
614 "H",
615 "S",
616 "A"
617 };
618 #endif
619
620 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
621
622 413064 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
623 {
624 int ret;
625 413064 VVCFrameContext *fc = t->fc;
626 413064 VVCFrameThread *ft = fc->ft;
627 413064 const VVCTaskStage stage = t->stage;
628 static const run_func run[] = {
629 run_init,
630 run_parse,
631 run_deblock_bs,
632 run_inter,
633 run_recon,
634 run_lmcs,
635 run_deblock_v,
636 run_deblock_h,
637 run_sao,
638 run_alf,
639 };
640
641 #ifdef VVC_THREAD_DEBUG
642 413064 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
643 #endif
644
645 413064 lc->sc = t->sc;
646
647
1/2
✓ Branch 0 taken 413064 times.
✗ Branch 1 not taken.
413064 if (!atomic_load(&ft->ret)) {
648
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 413064 times.
413064 if ((ret = run[stage](s, lc, t)) < 0) {
649 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
650 intptr_t zero = 0;
651 #else
652 int zero = 0;
653 #endif
654 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
655 av_log(s->avctx, AV_LOG_ERROR,
656 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
657 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
658 }
659 }
660
661 413064 task_stage_done(t, s);
662 413064 return;
663 }
664
665 228253 static int task_run(FFTask *_t, void *local_context, void *user_data)
666 {
667 228253 VVCTask *t = (VVCTask*)_t;
668 228253 VVCContext *s = (VVCContext *)user_data;
669 228253 VVCLocalContext *lc = local_context;
670 228253 VVCFrameThread *ft = t->fc->ft;
671
672 228253 lc->fc = t->fc;
673
674 do {
675 413064 task_run_stage(t, s, lc);
676 413064 t->stage++;
677
2/2
✓ Branch 1 taken 184811 times.
✓ Branch 2 taken 228253 times.
413064 } while (task_is_stage_ready(t, 1));
678
679
2/2
✓ Branch 0 taken 182461 times.
✓ Branch 1 taken 45792 times.
228253 if (t->stage != VVC_TASK_STAGE_LAST)
680 182461 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
681
682 228253 sheduled_done(ft, &ft->nb_scheduled_tasks);
683
684 228253 return 0;
685 }
686
687 82 FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
688 {
689 82 FFTaskCallbacks callbacks = {
690 s,
691 sizeof(VVCLocalContext),
692 PRIORITY_LOWEST + 1,
693 task_run,
694 };
695 82 return ff_executor_alloc(&callbacks, thread_count);
696 }
697
698 82 void ff_vvc_executor_free(FFExecutor **e)
699 {
700 82 ff_executor_free(e);
701 82 }
702
703 902 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
704 {
705 902 VVCFrameThread *ft = fc->ft;
706
707
2/2
✓ Branch 0 taken 656 times.
✓ Branch 1 taken 246 times.
902 if (!ft)
708 656 return;
709
710 246 ff_mutex_destroy(&ft->lock);
711 246 ff_cond_destroy(&ft->cond);
712 246 av_freep(&ft->rows);
713 246 av_freep(&ft->tasks);
714 246 av_freep(&ft);
715 }
716
717 936 static void frame_thread_init_score(VVCFrameContext *fc)
718 {
719 936 const VVCFrameThread *ft = fc->ft;
720 VVCTask task;
721
722 936 task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
723
724
2/2
✓ Branch 0 taken 8424 times.
✓ Branch 1 taken 936 times.
9360 for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
725 8424 task.stage = i;
726
727
2/2
✓ Branch 0 taken 82062 times.
✓ Branch 1 taken 8424 times.
90486 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
728 82062 task.ry = -1; //top
729 82062 task_stage_done(&task, NULL);
730 82062 task.ry = ft->ctu_height; //bottom
731 82062 task_stage_done(&task, NULL);
732 }
733
734
2/2
✓ Branch 0 taken 37017 times.
✓ Branch 1 taken 8424 times.
45441 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
735 37017 task.rx = -1; //left
736 37017 task_stage_done(&task, NULL);
737 37017 task.rx = ft->ctu_width; //right
738 37017 task_stage_done(&task, NULL);
739 }
740 }
741 936 }
742
743 936 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
744 {
745 936 const VVCSPS *sps = fc->ps.sps;
746 936 const VVCPPS *pps = fc->ps.pps;
747 936 VVCFrameThread *ft = fc->ft;
748 int ret;
749
750
3/4
✓ Branch 0 taken 690 times.
✓ Branch 1 taken 246 times.
✓ Branch 2 taken 690 times.
✗ Branch 3 not taken.
936 if (!ft || ft->ctu_width != pps->ctb_width ||
751
1/2
✓ Branch 0 taken 690 times.
✗ Branch 1 not taken.
690 ft->ctu_height != pps->ctb_height ||
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 690 times.
690 ft->ctu_size != sps->ctb_size_y) {
753
754 246 ff_vvc_frame_thread_free(fc);
755 246 ft = av_calloc(1, sizeof(*fc->ft));
756
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
246 if (!ft)
757 return AVERROR(ENOMEM);
758
759 246 ft->ctu_width = fc->ps.pps->ctb_width;
760 246 ft->ctu_height = fc->ps.pps->ctb_height;
761 246 ft->ctu_count = fc->ps.pps->ctb_count;
762 246 ft->ctu_size = fc->ps.sps->ctb_size_y;
763
764 246 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
765
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
246 if (!ft->rows)
766 goto fail;
767
768 246 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
769
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
246 if (!ft->tasks)
770 goto fail;
771
772
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 246 times.
246 if ((ret = ff_cond_init(&ft->cond, NULL)))
773 goto fail;
774
775
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 246 times.
246 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
776 ff_cond_destroy(&ft->cond);
777 goto fail;
778 }
779 }
780 936 fc->ft = ft;
781 936 ft->ret = 0;
782
2/2
✓ Branch 0 taken 4113 times.
✓ Branch 1 taken 936 times.
5049 for (int y = 0; y < ft->ctu_height; y++) {
783 4113 VVCRowThread *row = ft->rows + y;
784 4113 memset(row->col_progress, 0, sizeof(row->col_progress));
785 }
786
787
2/2
✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 936 times.
46728 for (int rs = 0; rs < ft->ctu_count; rs++) {
788 45792 VVCTask *t = ft->tasks + rs;
789 45792 task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
790 }
791
792 936 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
793
794 936 frame_thread_init_score(fc);
795
796 936 return 0;
797
798 fail:
799 if (ft) {
800 av_freep(&ft->rows);
801 av_freep(&ft->tasks);
802 av_freep(&ft);
803 }
804
805 return AVERROR(ENOMEM);
806 }
807
808 936 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
809 {
810 936 VVCFrameThread *ft = fc->ft;
811
812
2/2
✓ Branch 0 taken 1659 times.
✓ Branch 1 taken 936 times.
2595 for (int i = 0; i < fc->nb_slices; i++) {
813 1659 SliceContext *sc = fc->slices[i];
814
2/2
✓ Branch 0 taken 2026 times.
✓ Branch 1 taken 1659 times.
3685 for (int j = 0; j < sc->nb_eps; j++) {
815 2026 EntryPoint *ep = sc->eps + j;
816
2/2
✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 2026 times.
47818 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
817 45792 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
818 45792 VVCTask *t = ft->tasks + rs;
819 45792 const int ret = task_init_parse(t, sc, ep, k);
820
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
45792 if (ret < 0)
821 return ret;
822 }
823 }
824 }
825 936 frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
826
827 936 return 0;
828 }
829
830 936 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
831 {
832 936 VVCFrameThread *ft = fc->ft;
833
834 936 ff_mutex_lock(&ft->lock);
835
836
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 936 times.
936 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
837 ff_cond_wait(&ft->cond, &ft->lock);
838
839 936 ff_mutex_unlock(&ft->lock);
840 936 ff_vvc_report_frame_finished(fc->ref);
841
842 #ifdef VVC_THREAD_DEBUG
843 936 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d done\r\n", (int)fc->decode_order);
844 #endif
845 936 return ft->ret;
846 }
847