FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 407 431 94.4%
Functions: 41 41 100.0%
Branches: 165 192 85.9%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavcodec/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_INIT, // for CTU(0, 0) only
44 VVC_TASK_STAGE_PARSE,
45 VVC_TASK_STAGE_DEBLOCK_BS,
46 VVC_TASK_STAGE_INTER,
47 VVC_TASK_STAGE_RECON,
48 VVC_TASK_STAGE_LMCS,
49 VVC_TASK_STAGE_DEBLOCK_V,
50 VVC_TASK_STAGE_DEBLOCK_H,
51 VVC_TASK_STAGE_SAO,
52 VVC_TASK_STAGE_ALF,
53 VVC_TASK_STAGE_LAST
54 } VVCTaskStage;
55
56 typedef struct VVCTask {
57 union {
58 struct VVCTask *next; //for executor debug only
59 FFTask task;
60 } u;
61
62 VVCTaskStage stage;
63
64 // ctu x, y, and raster scan order
65 int rx, ry, rs;
66 VVCFrameContext *fc;
67
68 ProgressListener col_listener;
69 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
70
71 // for parse task only
72 SliceContext *sc;
73 EntryPoint *ep;
74 int ctu_idx; //ctu idx in the current slice
75
76 // tasks with target scores met are ready for scheduling
77 atomic_uchar score[VVC_TASK_STAGE_LAST];
78 atomic_uchar target_inter_score;
79 } VVCTask;
80
81 typedef struct VVCRowThread {
82 atomic_int col_progress[VVC_PROGRESS_LAST];
83 } VVCRowThread;
84
85 typedef struct VVCFrameThread {
86 // error return for tasks
87 atomic_int ret;
88
89 VVCRowThread *rows;
90 VVCTask *tasks;
91
92 int ctu_size;
93 int ctu_width;
94 int ctu_height;
95 int ctu_count;
96
97 //protected by lock
98 atomic_int nb_scheduled_tasks;
99 atomic_int nb_scheduled_listeners;
100
101 int row_progress[VVC_PROGRESS_LAST];
102
103 AVMutex lock;
104 AVCond cond;
105 } VVCFrameThread;
106
107 #define PRIORITY_LOWEST 2
108 233622 static void add_task(VVCContext *s, VVCTask *t)
109 {
110 233622 VVCFrameThread *ft = t->fc->ft;
111 233622 FFTask *task = &t->u.task;
112 233622 const int priorities[] = {
113 0, // VVC_TASK_STAGE_INIT,
114 0, // VVC_TASK_STAGE_PARSE,
115 1, // VVC_TASK_STAGE_DEBLOCK_BS
116 // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks.
117 // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks.
118 PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER
119 1, // VVC_TASK_STAGE_RECON,
120 1, // VVC_TASK_STAGE_LMCS,
121 1, // VVC_TASK_STAGE_DEBLOCK_V,
122 1, // VVC_TASK_STAGE_DEBLOCK_H,
123 1, // VVC_TASK_STAGE_SAO,
124 1, // VVC_TASK_STAGE_ALF,
125 };
126
127 233622 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
128 233622 task->priority = priorities[t->stage];
129 233622 ff_executor_execute(s->executor, task);
130 233622 }
131
132 47649 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
133 {
134 47649 memset(t, 0, sizeof(*t));
135 47649 t->stage = stage;
136 47649 t->fc = fc;
137 47649 t->rx = rx;
138 47649 t->ry = ry;
139 47649 t->rs = ry * fc->ft->ctu_width + rx;
140
2/2
✓ Branch 0 taken 476490 times.
✓ Branch 1 taken 47649 times.
524139 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
141 476490 atomic_store(t->score + i, 0);
142 47649 atomic_store(&t->target_inter_score, 0);
143 47649 }
144
145 46705 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
146 {
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46705 times.
46705 if (t->sc) {
148 // the task already inited, error bitstream
149 return AVERROR_INVALIDDATA;
150 }
151 46705 t->sc = sc;
152 46705 t->ep = ep;
153 46705 t->ctu_idx = ctu_idx;
154
155 46705 return 0;
156 }
157
158 1439974 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
159 {
160 1439974 return atomic_fetch_add(&t->score[stage], 1) + 1;
161 }
162
163 374584 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
164 {
165 374584 return atomic_load(&t->score[stage]);
166 }
167
168 //first row in tile or slice
169 5202 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
170 {
171 5202 const VVCFrameThread *ft = fc->ft;
172 5202 const VVCPPS *pps = fc->ps.pps;
173
174
2/2
✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 765 times.
5202 if (ry != pps->ctb_to_row_bd[ry]) {
175 4437 const int rs = ry * ft->ctu_width + rx;
176 4437 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
177 }
178 765 return 1;
179 }
180
181 1814558 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
182 {
183 // l:left, r:right, t: top, b: bottom
184 static const uint8_t target_score[] =
185 {
186 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse
187 0, //VVC_TASK_STAGE_INTER, not used
188 2, //VVC_TASK_STAGE_RECON, need l + rt recon
189 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
190 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
191 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
192 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
193 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
194 };
195 1814558 uint8_t target = 0;
196 1814558 VVCFrameContext *fc = t->fc;
197
198
2/2
✓ Branch 0 taken 944 times.
✓ Branch 1 taken 1813614 times.
1814558 if (stage == VVC_TASK_STAGE_INIT)
199 944 return 1;
200
201
2/2
✓ Branch 0 taken 95425 times.
✓ Branch 1 taken 1718189 times.
1813614 if (stage == VVC_TASK_STAGE_PARSE) {
202 95425 const H266RawSPS *rsps = fc->ps.sps->r;
203
4/4
✓ Branch 0 taken 3978 times.
✓ Branch 1 taken 91447 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 765 times.
95425 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
204 95425 const int no_prev_stage = t->rs > 0;
205 95425 target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage
206
2/2
✓ Branch 0 taken 130122 times.
✓ Branch 1 taken 1588067 times.
1718189 } else if (stage == VVC_TASK_STAGE_INTER) {
207 130122 target = atomic_load(&t->target_inter_score);
208 } else {
209 1588067 target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS];
210 }
211
212 //+1 for previous stage
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1813614 times.
1813614 av_assert0(score <= target + 1);
214 1813614 return score == target + 1;
215 }
216
217 2058996 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
218 const int rx, const int ry, const VVCTaskStage stage)
219 {
220 2058996 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
221 uint8_t score;
222
223
8/8
✓ Branch 0 taken 1912116 times.
✓ Branch 1 taken 146880 times.
✓ Branch 2 taken 1777476 times.
✓ Branch 3 taken 134640 times.
✓ Branch 4 taken 1594051 times.
✓ Branch 5 taken 183425 times.
✓ Branch 6 taken 154077 times.
✓ Branch 7 taken 1439974 times.
2058996 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
224 619022 return;
225
226 1439974 score = task_add_score(t, stage);
227
2/2
✓ Branch 1 taken 233622 times.
✓ Branch 2 taken 1206352 times.
1439974 if (task_has_target_score(t, stage, score)) {
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 233622 times.
233622 av_assert0(s);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 233622 times.
233622 av_assert0(stage == t->stage);
230 233622 add_task(s, t);
231 }
232 }
233
234 321906 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
235 {
236
2/2
✓ Branch 0 taken 89228 times.
✓ Branch 1 taken 232678 times.
321906 if (atomic_fetch_sub(scheduled, 1) == 1) {
237 89228 ff_mutex_lock(&ft->lock);
238 89228 ff_cond_signal(&ft->cond);
239 89228 ff_mutex_unlock(&ft->lock);
240 }
241 321906 }
242
243 88284 static void progress_done(VVCProgressListener *_l, const int type)
244 {
245 88284 const ProgressListener *l = (ProgressListener *)_l;
246 88284 const VVCTask *t = l->task;
247 88284 VVCFrameThread *ft = t->fc->ft;
248
249 88284 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
250 88284 sheduled_done(ft, &ft->nb_scheduled_listeners);
251 88284 }
252
253 83417 static void pixel_done(VVCProgressListener *l)
254 {
255 83417 progress_done(l, VVC_TASK_STAGE_INTER);
256 83417 }
257
258 4867 static void mv_done(VVCProgressListener *l)
259 {
260 4867 progress_done(l, VVC_TASK_STAGE_PARSE);
261 4867 }
262
263 88284 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
264 {
265 88284 const int is_inter = vp == VVC_PROGRESS_PIXEL;
266
267 88284 l->task = t;
268 88284 l->s = s;
269 88284 l->l.vp = vp;
270 88284 l->l.y = y;
271
2/2
✓ Branch 0 taken 83417 times.
✓ Branch 1 taken 4867 times.
88284 l->l.progress_done = is_inter ? pixel_done : mv_done;
272
2/2
✓ Branch 0 taken 83417 times.
✓ Branch 1 taken 4867 times.
88284 if (is_inter)
273 83417 atomic_fetch_add(&t->target_inter_score, 1);
274 88284 }
275
276 88284 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
277 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
278 {
279 88284 VVCFrameThread *ft = t->fc->ft;
280
281 88284 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
282 88284 listener_init(l, t, s, vp, y);
283 88284 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
284 88284 }
285
286 46705 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
287 {
288 46705 VVCFrameThread *ft = fc->ft;
289 46705 EntryPoint *ep = t->ep;
290 46705 const VVCSPS *sps = fc->ps.sps;
291
292
2/2
✓ Branch 0 taken 1428 times.
✓ Branch 1 taken 45277 times.
46705 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
293
2/2
✓ Branch 0 taken 204 times.
✓ Branch 1 taken 1224 times.
1428 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
294 204 EntryPoint *next = ep + 1;
295
3/4
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 51 times.
✓ Branch 3 taken 153 times.
✗ Branch 4 not taken.
204 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) {
296 153 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
297 153 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
298 }
299 }
300
3/4
✓ Branch 0 taken 1071 times.
✓ Branch 1 taken 357 times.
✓ Branch 3 taken 1071 times.
✗ Branch 4 not taken.
1428 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
301 1071 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
302 }
303
304
2/2
✓ Branch 0 taken 44629 times.
✓ Branch 1 taken 2076 times.
46705 if (t->ctu_idx + 1 < t->ep->ctu_end) {
305 44629 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
306 44629 const int next_rx = next_rs % ft->ctu_width;
307 44629 const int next_ry = next_rs / ft->ctu_width;
308 44629 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
309 }
310 46705 }
311
312 46705 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
313 {
314 46705 const VVCSH *sh = &sc->sh;
315
316
2/2
✓ Branch 0 taken 40817 times.
✓ Branch 1 taken 5888 times.
46705 if (!IS_I(sh->r)) {
317 40817 CTU *ctu = fc->tab.ctus + rs;
318
2/2
✓ Branch 0 taken 81634 times.
✓ Branch 1 taken 40817 times.
122451 for (int lx = 0; lx < 2; lx++) {
319
2/2
✓ Branch 0 taken 166678 times.
✓ Branch 1 taken 81634 times.
248312 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
320 166678 int y = ctu->max_y[lx][i];
321 166678 VVCRefPic *refp = sc->rpl[lx].refs + i;
322 166678 VVCFrame *ref = refp->ref;
323
3/4
✓ Branch 0 taken 166678 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 83417 times.
✓ Branch 3 taken 83261 times.
166678 if (ref && y >= 0) {
324
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 83203 times.
83417 if (refp->is_scaled)
325 214 y = y * refp->scale[1] >> 14;
326 83417 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
327 }
328 }
329 }
330 }
331 46705 }
332
333 46705 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
334 {
335 46705 VVCFrameThread *ft = fc->ft;
336 46705 const int rs = ry * ft->ctu_width + rx;
337 46705 const int slice_idx = fc->tab.slice_idx[rs];
338 46705 VVCTask *t = ft->tasks + rs;
339 46705 const SliceContext *sc = fc->slices[slice_idx];
340
341 46705 schedule_next_parse(s, fc, sc, t);
342 46705 schedule_inter(s, fc, sc, t, rs);
343 46705 }
344
345 663515 static void task_stage_done(const VVCTask *t, VVCContext *s)
346 {
347 663515 VVCFrameContext *fc = t->fc;
348 663515 VVCFrameThread *ft = fc->ft;
349 663515 const VVCTaskStage stage = t->stage;
350
351 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
352
353 //this is a reserve map of ready_score, ordered by zigzag
354
2/2
✓ Branch 0 taken 73619 times.
✓ Branch 1 taken 589896 times.
663515 if (stage == VVC_TASK_STAGE_PARSE) {
355 73619 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS);
356 73619 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS);
357
8/8
✓ Branch 0 taken 67499 times.
✓ Branch 1 taken 6120 times.
✓ Branch 2 taken 61379 times.
✓ Branch 3 taken 6120 times.
✓ Branch 4 taken 54042 times.
✓ Branch 5 taken 7337 times.
✓ Branch 6 taken 7337 times.
✓ Branch 7 taken 46705 times.
73619 if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height)
358 26914 return;
359 46705 parse_task_done(s, fc, t->rx, t->ry);
360
2/2
✓ Branch 0 taken 73619 times.
✓ Branch 1 taken 516277 times.
589896 } else if (stage == VVC_TASK_STAGE_RECON) {
361 73619 ADD(-1, 1, VVC_TASK_STAGE_RECON);
362 73619 ADD( 1, 0, VVC_TASK_STAGE_RECON);
363 73619 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
364 73619 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
365 73619 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
366
2/2
✓ Branch 0 taken 73619 times.
✓ Branch 1 taken 442658 times.
516277 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
367 73619 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
368 73619 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
369
2/2
✓ Branch 0 taken 73619 times.
✓ Branch 1 taken 369039 times.
442658 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
370 73619 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
371 73619 ADD(-1, -1, VVC_TASK_STAGE_SAO);
372 73619 ADD( 0, -1, VVC_TASK_STAGE_SAO);
373 73619 ADD(-1, 0, VVC_TASK_STAGE_SAO);
374 73619 ADD( 1, -1, VVC_TASK_STAGE_SAO);
375 73619 ADD( 1, 0, VVC_TASK_STAGE_SAO);
376
2/2
✓ Branch 0 taken 73619 times.
✓ Branch 1 taken 295420 times.
369039 } else if (stage == VVC_TASK_STAGE_SAO) {
377 73619 ADD(-1, -1, VVC_TASK_STAGE_ALF);
378 73619 ADD( 0, -1, VVC_TASK_STAGE_ALF);
379 73619 ADD(-1, 0, VVC_TASK_STAGE_ALF);
380 73619 ADD( 1, -1, VVC_TASK_STAGE_ALF);
381 73619 ADD(-1, 1, VVC_TASK_STAGE_ALF);
382 73619 ADD( 1, 0, VVC_TASK_STAGE_ALF);
383 73619 ADD( 0, 1, VVC_TASK_STAGE_ALF);
384 73619 ADD( 1, 1, VVC_TASK_STAGE_ALF);
385 }
386 }
387
388 421289 static int task_is_stage_ready(VVCTask *t, int add)
389 {
390 421289 const VVCTaskStage stage = t->stage;
391 uint8_t score;
392
2/2
✓ Branch 0 taken 46705 times.
✓ Branch 1 taken 374584 times.
421289 if (stage > VVC_TASK_STAGE_ALF)
393 46705 return 0;
394 374584 score = task_get_score(t, stage) + add;
395 374584 return task_has_target_score(t, stage, score);
396 }
397
398 46705 static void check_colocation(VVCContext *s, VVCTask *t)
399 {
400 46705 const VVCFrameContext *fc = t->fc;
401
402
4/4
✓ Branch 0 taken 5992 times.
✓ Branch 1 taken 40713 times.
✓ Branch 2 taken 5412 times.
✓ Branch 3 taken 580 times.
46705 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
403 46125 VVCFrame *col = fc->ref->collocated_ref;
404 46125 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
405
4/4
✓ Branch 0 taken 40713 times.
✓ Branch 1 taken 5412 times.
✓ Branch 2 taken 4867 times.
✓ Branch 3 taken 35846 times.
46125 if (col && first_col) {
406 //we depend on bottom and right boundary, do not - 1 for y
407 4867 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
408 4867 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
409 4867 return;
410 }
411 }
412 41838 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
413 }
414
415 2076 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
416 {
417 2076 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
418 2076 VVCTask *t = ft->tasks + rs;
419
420 2076 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
421 2076 }
422
423 944 static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
424 {
425 944 VVCFrameContext *fc = lc->fc;
426 944 VVCFrameThread *ft = fc->ft;
427 944 const int ret = ff_vvc_per_frame_init(fc);
428
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 944 times.
944 if (ret < 0)
430 return ret;
431
432
2/2
✓ Branch 0 taken 1709 times.
✓ Branch 1 taken 944 times.
2653 for (int i = 0; i < fc->nb_slices; i++) {
433 1709 SliceContext *sc = fc->slices[i];
434
2/2
✓ Branch 0 taken 2076 times.
✓ Branch 1 taken 1709 times.
3785 for (int j = 0; j < sc->nb_eps; j++) {
435 2076 EntryPoint *ep = sc->eps + j;
436
2/2
✓ Branch 0 taken 46705 times.
✓ Branch 1 taken 2076 times.
48781 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
437 46705 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
438 46705 VVCTask *t = ft->tasks + rs;
439 46705 check_colocation(s, t);
440 }
441 2076 submit_entry_point(s, ft, sc, ep);
442 }
443 }
444 944 return 0;
445 }
446
447 93410 static void report_frame_progress(VVCFrameContext *fc,
448 const int ry, const VVCProgress idx)
449 {
450 93410 VVCFrameThread *ft = fc->ft;
451 93410 const int ctu_size = ft->ctu_size;
452 int old;
453
454
2/2
✓ Branch 0 taken 8464 times.
✓ Branch 1 taken 84946 times.
93410 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
455 int y;
456 8464 ff_mutex_lock(&ft->lock);
457 8464 y = old = ft->row_progress[idx];
458
4/4
✓ Branch 0 taken 15040 times.
✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 8464 times.
✓ Branch 3 taken 6576 times.
16928 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
459 8464 y++;
460
2/2
✓ Branch 0 taken 8391 times.
✓ Branch 1 taken 73 times.
8464 if (old != y)
461 8391 ft->row_progress[idx] = y;
462 // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
463 // We need to unlock ft->lock first
464 8464 ff_mutex_unlock(&ft->lock);
465
466
2/2
✓ Branch 0 taken 8391 times.
✓ Branch 1 taken 73 times.
8464 if (old != y) {
467
2/2
✓ Branch 0 taken 6503 times.
✓ Branch 1 taken 1888 times.
8391 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
468 8391 ff_vvc_report_progress(fc->ref, idx, progress);
469 }
470 }
471 93410 }
472
473 46705 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
474 {
475 int ret;
476 46705 VVCFrameContext *fc = lc->fc;
477 46705 const int rs = t->rs;
478 46705 const CTU *ctu = fc->tab.ctus + rs;
479
480 46705 lc->ep = t->ep;
481
482 46705 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
483
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46705 times.
46705 if (ret < 0)
484 return ret;
485
486
2/2
✓ Branch 0 taken 26257 times.
✓ Branch 1 taken 20448 times.
46705 if (!ctu->has_dmvr)
487 26257 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
488
489 46705 return 0;
490 }
491
492 46705 static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
493 {
494
2/2
✓ Branch 0 taken 46701 times.
✓ Branch 1 taken 4 times.
46705 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag)
495 46701 ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs);
496
497 46705 return 0;
498 }
499
500 46705 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
501 {
502 46705 VVCFrameContext *fc = lc->fc;
503 46705 const CTU *ctu = fc->tab.ctus + t->rs;
504 int ret;
505
506 46705 ret = ff_vvc_predict_inter(lc, t->rs);
507
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46705 times.
46705 if (ret < 0)
508 return ret;
509
510
2/2
✓ Branch 0 taken 20448 times.
✓ Branch 1 taken 26257 times.
46705 if (ctu->has_dmvr)
511 20448 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
512
513 46705 return 0;
514 }
515
516 46705 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
517 {
518 46705 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
519 }
520
521 46705 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
522 {
523 46705 VVCFrameContext *fc = lc->fc;
524 46705 VVCFrameThread *ft = fc->ft;
525 46705 const int ctu_size = ft->ctu_size;
526 46705 const int x0 = t->rx * ctu_size;
527 46705 const int y0 = t->ry * ctu_size;
528
529 46705 ff_vvc_lmcs_filter(lc, x0, y0);
530
531 46705 return 0;
532 }
533
534 46705 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
535 {
536 46705 VVCFrameContext *fc = lc->fc;
537 46705 VVCFrameThread *ft = fc->ft;
538 46705 const int ctb_size = ft->ctu_size;
539 46705 const int x0 = t->rx * ctb_size;
540 46705 const int y0 = t->ry * ctb_size;
541
542
2/2
✓ Branch 0 taken 46701 times.
✓ Branch 1 taken 4 times.
46705 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
543 46701 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
544 46701 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
545 }
546
547 46705 return 0;
548 }
549
550 46705 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
551 {
552 46705 VVCFrameContext *fc = lc->fc;
553 46705 VVCFrameThread *ft = fc->ft;
554 46705 const int ctb_size = ft->ctu_size;
555 46705 const int x0 = t->rx * ctb_size;
556 46705 const int y0 = t->ry * ctb_size;
557
558
2/2
✓ Branch 0 taken 46701 times.
✓ Branch 1 taken 4 times.
46705 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
559 46701 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
560 46701 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
561 }
562
2/2
✓ Branch 0 taken 46285 times.
✓ Branch 1 taken 420 times.
46705 if (fc->ps.sps->r->sps_sao_enabled_flag)
563 46285 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
564
565 46705 return 0;
566 }
567
568 46705 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
569 {
570 46705 VVCFrameContext *fc = lc->fc;
571 46705 VVCFrameThread *ft = fc->ft;
572 46705 const int ctb_size = ft->ctu_size;
573 46705 const int x0 = t->rx * ctb_size;
574 46705 const int y0 = t->ry * ctb_size;
575
576
2/2
✓ Branch 0 taken 46285 times.
✓ Branch 1 taken 420 times.
46705 if (fc->ps.sps->r->sps_sao_enabled_flag) {
577 46285 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
578 46285 ff_vvc_sao_filter(lc, x0, y0);
579 }
580
581
2/2
✓ Branch 0 taken 36667 times.
✓ Branch 1 taken 10038 times.
46705 if (fc->ps.sps->r->sps_alf_enabled_flag)
582 36667 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
583
584 46705 return 0;
585 }
586
587 46705 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
588 {
589 46705 VVCFrameContext *fc = lc->fc;
590 46705 VVCFrameThread *ft = fc->ft;
591 46705 const int ctu_size = ft->ctu_size;
592 46705 const int x0 = t->rx * ctu_size;
593 46705 const int y0 = t->ry * ctu_size;
594
595
2/2
✓ Branch 0 taken 36667 times.
✓ Branch 1 taken 10038 times.
46705 if (fc->ps.sps->r->sps_alf_enabled_flag) {
596 36667 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
597 36667 ff_vvc_alf_filter(lc, x0, y0);
598 }
599 46705 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
600
601 46705 return 0;
602 }
603
604 const static char* task_name[] = {
605 "INIT",
606 "P",
607 "B",
608 "I",
609 "R",
610 "L",
611 "V",
612 "H",
613 "S",
614 "A"
615 };
616
617 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
618
619 421289 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
620 {
621 int ret;
622 421289 VVCFrameContext *fc = t->fc;
623 421289 VVCFrameThread *ft = fc->ft;
624 421289 const VVCTaskStage stage = t->stage;
625 static const run_func run[] = {
626 run_init,
627 run_parse,
628 run_deblock_bs,
629 run_inter,
630 run_recon,
631 run_lmcs,
632 run_deblock_v,
633 run_deblock_h,
634 run_sao,
635 run_alf,
636 };
637
638 ff_dlog(s->avctx, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
639
640 421289 lc->sc = t->sc;
641
642
1/2
✓ Branch 0 taken 421289 times.
✗ Branch 1 not taken.
421289 if (!atomic_load(&ft->ret)) {
643
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 421289 times.
421289 if ((ret = run[stage](s, lc, t)) < 0) {
644 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
645 intptr_t zero = 0;
646 #else
647 int zero = 0;
648 #endif
649 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
650 av_log(s->avctx, AV_LOG_ERROR,
651 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
652 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
653 }
654
1/2
✓ Branch 0 taken 421289 times.
✗ Branch 1 not taken.
421289 if (!ret)
655 421289 task_stage_done(t, s);
656 }
657 421289 return;
658 }
659
660 233622 static int task_run(FFTask *_t, void *local_context, void *user_data)
661 {
662 233622 VVCTask *t = (VVCTask*)_t;
663 233622 VVCContext *s = (VVCContext *)user_data;
664 233622 VVCLocalContext *lc = local_context;
665 233622 VVCFrameThread *ft = t->fc->ft;
666
667 233622 lc->fc = t->fc;
668
669 do {
670 421289 task_run_stage(t, s, lc);
671 421289 t->stage++;
672
2/2
✓ Branch 1 taken 187667 times.
✓ Branch 2 taken 233622 times.
421289 } while (task_is_stage_ready(t, 1));
673
674
2/2
✓ Branch 0 taken 186917 times.
✓ Branch 1 taken 46705 times.
233622 if (t->stage != VVC_TASK_STAGE_LAST)
675 186917 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
676
677 233622 sheduled_done(ft, &ft->nb_scheduled_tasks);
678
679 233622 return 0;
680 }
681
682 86 FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
683 {
684 86 FFTaskCallbacks callbacks = {
685 s,
686 sizeof(VVCLocalContext),
687 PRIORITY_LOWEST + 1,
688 task_run,
689 };
690 86 return ff_executor_alloc(&callbacks, thread_count);
691 }
692
693 86 void ff_vvc_executor_free(FFExecutor **e)
694 {
695 86 ff_executor_free(e);
696 86 }
697
698 938 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
699 {
700 938 VVCFrameThread *ft = fc->ft;
701
702
2/2
✓ Branch 0 taken 688 times.
✓ Branch 1 taken 250 times.
938 if (!ft)
703 688 return;
704
705 250 ff_mutex_destroy(&ft->lock);
706 250 ff_cond_destroy(&ft->cond);
707 250 av_freep(&ft->rows);
708 250 av_freep(&ft->tasks);
709 250 av_freep(&ft);
710 }
711
712 944 static void frame_thread_init_score(VVCFrameContext *fc)
713 {
714 944 const VVCFrameThread *ft = fc->ft;
715 VVCTask task;
716
717 944 task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0);
718
719
2/2
✓ Branch 0 taken 8496 times.
✓ Branch 1 taken 944 times.
9440 for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) {
720 8496 task.stage = i;
721
722
2/2
✓ Branch 0 taken 83025 times.
✓ Branch 1 taken 8496 times.
91521 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
723 83025 task.ry = -1; //top
724 83025 task_stage_done(&task, NULL);
725 83025 task.ry = ft->ctu_height; //bottom
726 83025 task_stage_done(&task, NULL);
727 }
728
729
2/2
✓ Branch 0 taken 38088 times.
✓ Branch 1 taken 8496 times.
46584 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
730 38088 task.rx = -1; //left
731 38088 task_stage_done(&task, NULL);
732 38088 task.rx = ft->ctu_width; //right
733 38088 task_stage_done(&task, NULL);
734 }
735 }
736 944 }
737
738 944 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
739 {
740 944 const VVCSPS *sps = fc->ps.sps;
741 944 const VVCPPS *pps = fc->ps.pps;
742 944 VVCFrameThread *ft = fc->ft;
743 int ret;
744
745
3/4
✓ Branch 0 taken 694 times.
✓ Branch 1 taken 250 times.
✓ Branch 2 taken 694 times.
✗ Branch 3 not taken.
944 if (!ft || ft->ctu_width != pps->ctb_width ||
746
1/2
✓ Branch 0 taken 694 times.
✗ Branch 1 not taken.
694 ft->ctu_height != pps->ctb_height ||
747
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 694 times.
694 ft->ctu_size != sps->ctb_size_y) {
748
749 250 ff_vvc_frame_thread_free(fc);
750 250 ft = av_calloc(1, sizeof(*fc->ft));
751
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 250 times.
250 if (!ft)
752 return AVERROR(ENOMEM);
753
754 250 ft->ctu_width = fc->ps.pps->ctb_width;
755 250 ft->ctu_height = fc->ps.pps->ctb_height;
756 250 ft->ctu_count = fc->ps.pps->ctb_count;
757 250 ft->ctu_size = fc->ps.sps->ctb_size_y;
758
759 250 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
760
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 250 times.
250 if (!ft->rows)
761 goto fail;
762
763 250 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
764
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 250 times.
250 if (!ft->tasks)
765 goto fail;
766
767
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 250 times.
250 if ((ret = ff_cond_init(&ft->cond, NULL)))
768 goto fail;
769
770
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 250 times.
250 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
771 ff_cond_destroy(&ft->cond);
772 goto fail;
773 }
774 }
775 944 fc->ft = ft;
776 944 ft->ret = 0;
777
2/2
✓ Branch 0 taken 4232 times.
✓ Branch 1 taken 944 times.
5176 for (int y = 0; y < ft->ctu_height; y++) {
778 4232 VVCRowThread *row = ft->rows + y;
779 4232 memset(row->col_progress, 0, sizeof(row->col_progress));
780 }
781
782
2/2
✓ Branch 0 taken 46705 times.
✓ Branch 1 taken 944 times.
47649 for (int rs = 0; rs < ft->ctu_count; rs++) {
783 46705 VVCTask *t = ft->tasks + rs;
784 46705 task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width);
785 }
786
787 944 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
788
789 944 frame_thread_init_score(fc);
790
791 944 return 0;
792
793 fail:
794 if (ft) {
795 av_freep(&ft->rows);
796 av_freep(&ft->tasks);
797 av_freep(&ft);
798 }
799
800 return AVERROR(ENOMEM);
801 }
802
803 944 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
804 {
805 944 VVCFrameThread *ft = fc->ft;
806
807
2/2
✓ Branch 0 taken 1709 times.
✓ Branch 1 taken 944 times.
2653 for (int i = 0; i < fc->nb_slices; i++) {
808 1709 SliceContext *sc = fc->slices[i];
809
2/2
✓ Branch 0 taken 2076 times.
✓ Branch 1 taken 1709 times.
3785 for (int j = 0; j < sc->nb_eps; j++) {
810 2076 EntryPoint *ep = sc->eps + j;
811
2/2
✓ Branch 0 taken 46705 times.
✓ Branch 1 taken 2076 times.
48781 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
812 46705 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
813 46705 VVCTask *t = ft->tasks + rs;
814 46705 const int ret = task_init_parse(t, sc, ep, k);
815
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46705 times.
46705 if (ret < 0)
816 return ret;
817 }
818 }
819 }
820
2/2
✓ Branch 0 taken 46705 times.
✓ Branch 1 taken 944 times.
47649 for (int rs = 0; rs < ft->ctu_count; rs++) {
821 46705 const VVCTask *t = ft->tasks + rs;
822
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 46705 times.
46705 if (!t->sc) {
823 av_log(s->avctx, AV_LOG_ERROR, "frame %5d, CTU(%d, %d) not belong to any slice\r\n", (int)fc->decode_order, t->rx, t->ry);
824 return AVERROR_INVALIDDATA;
825 }
826 }
827 944 frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT);
828
829 944 return 0;
830 }
831
832 944 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
833 {
834 944 VVCFrameThread *ft = fc->ft;
835
836 944 ff_mutex_lock(&ft->lock);
837
838
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 944 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 944 times.
944 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
839 ff_cond_wait(&ft->cond, &ft->lock);
840
841 944 ff_mutex_unlock(&ft->lock);
842 944 ff_vvc_report_frame_finished(fc->ref);
843
844 ff_dlog(s->avctx, "frame %5d done\r\n", (int)fc->decode_order);
845 944 return ft->ret;
846 }
847