FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2024-07-16 12:46:59
Exec Total Coverage
Lines: 392 415 94.5%
Functions: 41 41 100.0%
Branches: 153 184 83.2%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavutil/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_PARSE,
44 VVC_TASK_STAGE_INTER,
45 VVC_TASK_STAGE_RECON,
46 VVC_TASK_STAGE_LMCS,
47 VVC_TASK_STAGE_DEBLOCK_V,
48 VVC_TASK_STAGE_DEBLOCK_H,
49 VVC_TASK_STAGE_SAO,
50 VVC_TASK_STAGE_ALF,
51 VVC_TASK_STAGE_LAST
52 } VVCTaskStage;
53
54 typedef struct VVCTask {
55 union {
56 struct VVCTask *next; //for executor debug only
57 AVTask task;
58 } u;
59
60 VVCTaskStage stage;
61
62 // ctu x, y, and raster scan order
63 int rx, ry, rs;
64 VVCFrameContext *fc;
65
66 ProgressListener col_listener;
67 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
68
69 // for parse task only
70 SliceContext *sc;
71 EntryPoint *ep;
72 int ctu_idx; //ctu idx in the current slice
73
74 // tasks with target scores met are ready for scheduling
75 atomic_uchar score[VVC_TASK_STAGE_LAST];
76 atomic_uchar target_inter_score;
77 } VVCTask;
78
79 typedef struct VVCRowThread {
80 atomic_int col_progress[VVC_PROGRESS_LAST];
81 } VVCRowThread;
82
83 typedef struct VVCFrameThread {
84 // error return for tasks
85 atomic_int ret;
86
87 VVCRowThread *rows;
88 VVCTask *tasks;
89
90 int ctu_size;
91 int ctu_width;
92 int ctu_height;
93 int ctu_count;
94
95 //protected by lock
96 atomic_int nb_scheduled_tasks;
97 atomic_int nb_scheduled_listeners;
98
99 int row_progress[VVC_PROGRESS_LAST];
100
101 AVMutex lock;
102 AVCond cond;
103 } VVCFrameThread;
104
105 211207 static void add_task(VVCContext *s, VVCTask *t)
106 {
107 211207 VVCFrameThread *ft = t->fc->ft;
108
109 211207 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
110
111 211207 av_executor_execute(s->executor, &t->u.task);
112 211207 }
113
114 44060 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
115 {
116 44060 memset(t, 0, sizeof(*t));
117 44060 t->stage = stage;
118 44060 t->fc = fc;
119 44060 t->rx = rx;
120 44060 t->ry = ry;
121 44060 t->rs = ry * fc->ft->ctu_width + rx;
122
2/2
✓ Branch 0 taken 352480 times.
✓ Branch 1 taken 44060 times.
396540 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
123 352480 atomic_store(t->score + i, 0);
124 44060 atomic_store(&t->target_inter_score, 0);
125 44060 }
126
127 43183 static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
128 {
129
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43183 times.
43183 if (t->sc) {
130 // the task already inited, error bitstream
131 return AVERROR_INVALIDDATA;
132 }
133 43183 t->sc = sc;
134 43183 t->ep = ep;
135 43183 t->ctu_idx = ctu_idx;
136
137 43183 return 0;
138 }
139
140 1237983 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
141 {
142 1237983 return atomic_fetch_add(&t->score[stage], 1) + 1;
143 }
144
145 513488 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
146 {
147 513488 return atomic_load(&t->score[stage]);
148 }
149
150 //first row in tile or slice
151 6450 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
152 {
153 6450 const VVCFrameThread *ft = fc->ft;
154 6450 const VVCPPS *pps = fc->ps.pps;
155
156
2/2
✓ Branch 0 taken 5400 times.
✓ Branch 1 taken 1050 times.
6450 if (ry != pps->ctb_to_row_bd[ry]) {
157 5400 const int rs = ry * ft->ctu_width + rx;
158 5400 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
159 }
160 1050 return 1;
161 }
162
163 1751471 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
164 {
165 // l:left, r:right, t: top, b: bottom
166 static const uint8_t target_score[] =
167 {
168 2, //VVC_TASK_STAGE_RECON, need l + rt recon
169 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
170 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
171 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
172 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
173 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
174 };
175 1751471 uint8_t target = 0;
176 1751471 VVCFrameContext *fc = t->fc;
177
178
2/2
✓ Branch 0 taken 130599 times.
✓ Branch 1 taken 1620872 times.
1751471 if (stage == VVC_TASK_STAGE_PARSE) {
179 130599 const H266RawSPS *rsps = fc->ps.sps->r;
180
4/4
✓ Branch 0 taken 5250 times.
✓ Branch 1 taken 125349 times.
✓ Branch 3 taken 4200 times.
✓ Branch 4 taken 1050 times.
130599 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
181 130599 target = 2 + wpp - 1; //left parse + colocation + wpp - no previous stage
182
2/2
✓ Branch 0 taken 118883 times.
✓ Branch 1 taken 1501989 times.
1620872 } else if (stage == VVC_TASK_STAGE_INTER) {
183 118883 target = atomic_load(&t->target_inter_score);
184 } else {
185 1501989 target = target_score[stage - VVC_TASK_STAGE_RECON];
186 }
187
188 //+1 for previous stage
189
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1751471 times.
1751471 av_assert0(score <= target + 1);
190 1751471 return score == target + 1;
191 }
192
193 1757817 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
194 const int rx, const int ry, const VVCTaskStage stage)
195 {
196 1757817 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
197 uint8_t score;
198
199
8/8
✓ Branch 0 taken 1629224 times.
✓ Branch 1 taken 128593 times.
✓ Branch 2 taken 1522995 times.
✓ Branch 3 taken 106229 times.
✓ Branch 4 taken 1360131 times.
✓ Branch 5 taken 162864 times.
✓ Branch 6 taken 122148 times.
✓ Branch 7 taken 1237983 times.
1757817 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
200 519834 return;
201
202 1237983 score = task_add_score(t, stage);
203
2/2
✓ Branch 1 taken 211207 times.
✓ Branch 2 taken 1026776 times.
1237983 if (task_has_target_score(t, stage, score)) {
204
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 211207 times.
211207 av_assert0(s);
205
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 211207 times.
211207 av_assert0(stage == t->stage);
206 211207 add_task(s, t);
207 }
208 }
209
210 291137 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
211 {
212
2/2
✓ Branch 0 taken 81857 times.
✓ Branch 1 taken 209280 times.
291137 if (atomic_fetch_sub(scheduled, 1) == 1) {
213 81857 ff_mutex_lock(&ft->lock);
214 81857 ff_cond_signal(&ft->cond);
215 81857 ff_mutex_unlock(&ft->lock);
216 }
217 291137 }
218
219 79930 static void progress_done(VVCProgressListener *_l, const int type)
220 {
221 79930 const ProgressListener *l = (ProgressListener *)_l;
222 79930 const VVCTask *t = l->task;
223 79930 VVCFrameThread *ft = t->fc->ft;
224
225 79930 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
226 79930 sheduled_done(ft, &ft->nb_scheduled_listeners);
227 79930 }
228
229 75700 static void pixel_done(VVCProgressListener *l)
230 {
231 75700 progress_done(l, VVC_TASK_STAGE_INTER);
232 75700 }
233
234 4230 static void mv_done(VVCProgressListener *l)
235 {
236 4230 progress_done(l, VVC_TASK_STAGE_PARSE);
237 4230 }
238
239 79930 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
240 {
241 79930 const int is_inter = vp == VVC_PROGRESS_PIXEL;
242
243 79930 l->task = t;
244 79930 l->s = s;
245 79930 l->l.vp = vp;
246 79930 l->l.y = y;
247
2/2
✓ Branch 0 taken 75700 times.
✓ Branch 1 taken 4230 times.
79930 l->l.progress_done = is_inter ? pixel_done : mv_done;
248
2/2
✓ Branch 0 taken 75700 times.
✓ Branch 1 taken 4230 times.
79930 if (is_inter)
249 75700 atomic_fetch_add(&t->target_inter_score, 1);
250 79930 }
251
252 79930 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
253 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
254 {
255 79930 VVCFrameThread *ft = t->fc->ft;
256
257 79930 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
258 79930 listener_init(l, t, s, vp, y);
259 79930 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
260 79930 }
261
262 43183 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
263 {
264 43183 VVCFrameThread *ft = fc->ft;
265 43183 EntryPoint *ep = t->ep;
266 43183 const VVCSPS *sps = fc->ps.sps;
267
268
2/2
✓ Branch 0 taken 1400 times.
✓ Branch 1 taken 41783 times.
43183 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
269
2/2
✓ Branch 0 taken 200 times.
✓ Branch 1 taken 1200 times.
1400 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
270 200 EntryPoint *next = ep + 1;
271
3/4
✓ Branch 0 taken 150 times.
✓ Branch 1 taken 50 times.
✓ Branch 3 taken 150 times.
✗ Branch 4 not taken.
200 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) {
272 150 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
273 150 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
274 }
275 }
276
3/4
✓ Branch 0 taken 1050 times.
✓ Branch 1 taken 350 times.
✓ Branch 3 taken 1050 times.
✗ Branch 4 not taken.
1400 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
277 1050 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
278 }
279
280
2/2
✓ Branch 0 taken 41256 times.
✓ Branch 1 taken 1927 times.
43183 if (t->ctu_idx + 1 < t->ep->ctu_end) {
281 41256 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
282 41256 const int next_rx = next_rs % ft->ctu_width;
283 41256 const int next_ry = next_rs / ft->ctu_width;
284 41256 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
285 }
286 43183 }
287
288 43183 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
289 {
290 43183 const VVCSH *sh = &sc->sh;
291
292
2/2
✓ Branch 0 taken 38926 times.
✓ Branch 1 taken 4257 times.
43183 if (!IS_I(sh->r)) {
293 38926 CTU *ctu = fc->tab.ctus + rs;
294
2/2
✓ Branch 0 taken 77852 times.
✓ Branch 1 taken 38926 times.
116778 for (int lx = 0; lx < 2; lx++) {
295
2/2
✓ Branch 0 taken 150332 times.
✓ Branch 1 taken 77852 times.
228184 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
296 150332 int y = ctu->max_y[lx][i];
297 150332 VVCRefPic *refp = sc->rpl[lx].refs + i;
298 150332 VVCFrame *ref = refp->ref;
299
3/4
✓ Branch 0 taken 150332 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75700 times.
✓ Branch 3 taken 74632 times.
150332 if (ref && y >= 0) {
300
2/2
✓ Branch 0 taken 214 times.
✓ Branch 1 taken 75486 times.
75700 if (refp->is_scaled)
301 214 y = y * refp->scale[1] >> 14;
302 75700 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
303 }
304 }
305 }
306 }
307 43183 }
308
309 43183 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
310 {
311 43183 VVCFrameThread *ft = fc->ft;
312 43183 const int rs = ry * ft->ctu_width + rx;
313 43183 const int slice_idx = fc->tab.slice_idx[rs];
314 43183 VVCTask *t = ft->tasks + rs;
315 43183 const SliceContext *sc = fc->slices[slice_idx];
316
317 43183 schedule_next_parse(s, fc, sc, t);
318 43183 schedule_inter(s, fc, sc, t, rs);
319 43183 }
320
321 493988 static void task_stage_done(const VVCTask *t, VVCContext *s)
322 {
323 493988 VVCFrameContext *fc = t->fc;
324 493988 VVCFrameThread *ft = fc->ft;
325 493988 const VVCTaskStage stage = t->stage;
326
327 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
328
329 //this is a reserve map of ready_score, ordered by zigzag
330
2/2
✓ Branch 0 taken 43183 times.
✓ Branch 1 taken 450805 times.
493988 if (stage == VVC_TASK_STAGE_PARSE) {
331 43183 parse_task_done(s, fc, t->rx, t->ry);
332
2/2
✓ Branch 0 taken 67937 times.
✓ Branch 1 taken 382868 times.
450805 } else if (stage == VVC_TASK_STAGE_RECON) {
333 67937 ADD(-1, 1, VVC_TASK_STAGE_RECON);
334 67937 ADD( 1, 0, VVC_TASK_STAGE_RECON);
335 67937 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
336 67937 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
337 67937 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
338
2/2
✓ Branch 0 taken 67937 times.
✓ Branch 1 taken 314931 times.
382868 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
339 67937 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
340 67937 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
341
2/2
✓ Branch 0 taken 67937 times.
✓ Branch 1 taken 246994 times.
314931 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
342 67937 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
343 67937 ADD(-1, -1, VVC_TASK_STAGE_SAO);
344 67937 ADD( 0, -1, VVC_TASK_STAGE_SAO);
345 67937 ADD(-1, 0, VVC_TASK_STAGE_SAO);
346 67937 ADD( 1, -1, VVC_TASK_STAGE_SAO);
347 67937 ADD( 1, 0, VVC_TASK_STAGE_SAO);
348
2/2
✓ Branch 0 taken 67937 times.
✓ Branch 1 taken 179057 times.
246994 } else if (stage == VVC_TASK_STAGE_SAO) {
349 67937 ADD(-1, -1, VVC_TASK_STAGE_ALF);
350 67937 ADD( 0, -1, VVC_TASK_STAGE_ALF);
351 67937 ADD(-1, 0, VVC_TASK_STAGE_ALF);
352 67937 ADD( 1, -1, VVC_TASK_STAGE_ALF);
353 67937 ADD(-1, 1, VVC_TASK_STAGE_ALF);
354 67937 ADD( 1, 0, VVC_TASK_STAGE_ALF);
355 67937 ADD( 0, 1, VVC_TASK_STAGE_ALF);
356 67937 ADD( 1, 1, VVC_TASK_STAGE_ALF);
357 }
358 493988 }
359
360 556671 static int task_is_stage_ready(VVCTask *t, int add)
361 {
362 556671 const VVCTaskStage stage = t->stage;
363 uint8_t score;
364
2/2
✓ Branch 0 taken 43183 times.
✓ Branch 1 taken 513488 times.
556671 if (stage > VVC_TASK_STAGE_ALF)
365 43183 return 0;
366 513488 score = task_get_score(t, stage) + add;
367 513488 return task_has_target_score(t, stage, score);
368 }
369
370 211207 static int task_ready(const AVTask *_t, void *user_data)
371 {
372 211207 VVCTask *t = (VVCTask*)_t;
373
374 211207 return task_is_stage_ready(t, 0);
375 }
376
377 #define CHECK(a, b) \
378 do { \
379 if ((a) != (b)) \
380 return (a) < (b); \
381 } while (0)
382
383 2104358 static int task_priority_higher(const AVTask *_a, const AVTask *_b)
384 {
385 2104358 const VVCTask *a = (const VVCTask*)_a;
386 2104358 const VVCTask *b = (const VVCTask*)_b;
387
388
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2104358 times.
2104358 CHECK(a->fc->decode_order, b->fc->decode_order); //decode order
389
390
4/4
✓ Branch 0 taken 2066699 times.
✓ Branch 1 taken 37659 times.
✓ Branch 2 taken 32243 times.
✓ Branch 3 taken 2034456 times.
2104358 if (a->stage == VVC_TASK_STAGE_PARSE || b->stage == VVC_TASK_STAGE_PARSE) {
391
1/2
✓ Branch 0 taken 69902 times.
✗ Branch 1 not taken.
69902 CHECK(a->stage, b->stage);
392 CHECK(a->ry, b->ry);
393 return a->rx < b->rx;
394 }
395
396
2/2
✓ Branch 0 taken 1776960 times.
✓ Branch 1 taken 257496 times.
2034456 CHECK(a->rx + a->ry + a->stage, b->rx + b->ry + b->stage); //zigzag with type
397
2/2
✓ Branch 0 taken 54572 times.
✓ Branch 1 taken 202924 times.
257496 CHECK(a->rx + a->ry, b->rx + b->ry); //zigzag
398 202924 return a->ry < b->ry;
399 }
400
401 86366 static void report_frame_progress(VVCFrameContext *fc,
402 const int ry, const VVCProgress idx)
403 {
404 86366 VVCFrameThread *ft = fc->ft;
405 86366 const int ctu_size = ft->ctu_size;
406 int old;
407
408
2/2
✓ Branch 0 taken 7674 times.
✓ Branch 1 taken 78692 times.
86366 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
409 int y;
410 7674 ff_mutex_lock(&ft->lock);
411 7674 y = old = ft->row_progress[idx];
412
4/4
✓ Branch 0 taken 13594 times.
✓ Branch 1 taken 1754 times.
✓ Branch 2 taken 7674 times.
✓ Branch 3 taken 5920 times.
15348 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
413 7674 y++;
414
1/2
✓ Branch 0 taken 7674 times.
✗ Branch 1 not taken.
7674 if (old != y) {
415
2/2
✓ Branch 0 taken 5920 times.
✓ Branch 1 taken 1754 times.
7674 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
416 7674 ft->row_progress[idx] = y;
417 7674 ff_vvc_report_progress(fc->ref, idx, progress);
418 }
419 7674 ff_mutex_unlock(&ft->lock);
420 }
421 86366 }
422
423 43183 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
424 {
425 int ret;
426 43183 VVCFrameContext *fc = lc->fc;
427 43183 const int rs = t->rs;
428 43183 const CTU *ctu = fc->tab.ctus + rs;
429
430 43183 lc->ep = t->ep;
431
432 43183 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
433
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43183 times.
43183 if (ret < 0)
434 return ret;
435
436
2/2
✓ Branch 0 taken 22664 times.
✓ Branch 1 taken 20519 times.
43183 if (!ctu->has_dmvr)
437 22664 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
438
439 43183 return 0;
440 }
441
442 43183 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
443 {
444 43183 VVCFrameContext *fc = lc->fc;
445 43183 const CTU *ctu = fc->tab.ctus + t->rs;
446 int ret;
447
448 43183 ret = ff_vvc_predict_inter(lc, t->rs);
449
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43183 times.
43183 if (ret < 0)
450 return ret;
451
452
2/2
✓ Branch 0 taken 20519 times.
✓ Branch 1 taken 22664 times.
43183 if (ctu->has_dmvr)
453 20519 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
454
455 43183 return 0;
456 }
457
458 43183 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
459 {
460 43183 return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
461 }
462
463 43183 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
464 {
465 43183 VVCFrameContext *fc = lc->fc;
466 43183 VVCFrameThread *ft = fc->ft;
467 43183 const int ctu_size = ft->ctu_size;
468 43183 const int x0 = t->rx * ctu_size;
469 43183 const int y0 = t->ry * ctu_size;
470
471 43183 ff_vvc_lmcs_filter(lc, x0, y0);
472
473 43183 return 0;
474 }
475
476 43183 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
477 {
478 43183 VVCFrameContext *fc = lc->fc;
479 43183 VVCFrameThread *ft = fc->ft;
480 43183 const int ctb_size = ft->ctu_size;
481 43183 const int x0 = t->rx * ctb_size;
482 43183 const int y0 = t->ry * ctb_size;
483
484
1/2
✓ Branch 0 taken 43183 times.
✗ Branch 1 not taken.
43183 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
485 43183 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
486 43183 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
487 }
488
489 43183 return 0;
490 }
491
492 43183 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
493 {
494 43183 VVCFrameContext *fc = lc->fc;
495 43183 VVCFrameThread *ft = fc->ft;
496 43183 const int ctb_size = ft->ctu_size;
497 43183 const int x0 = t->rx * ctb_size;
498 43183 const int y0 = t->ry * ctb_size;
499
500
1/2
✓ Branch 0 taken 43183 times.
✗ Branch 1 not taken.
43183 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
501 43183 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
502 43183 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
503 }
504
2/2
✓ Branch 0 taken 42871 times.
✓ Branch 1 taken 312 times.
43183 if (fc->ps.sps->r->sps_sao_enabled_flag)
505 42871 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
506
507 43183 return 0;
508 }
509
510 43183 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
511 {
512 43183 VVCFrameContext *fc = lc->fc;
513 43183 VVCFrameThread *ft = fc->ft;
514 43183 const int ctb_size = ft->ctu_size;
515 43183 const int x0 = t->rx * ctb_size;
516 43183 const int y0 = t->ry * ctb_size;
517
518
2/2
✓ Branch 0 taken 42871 times.
✓ Branch 1 taken 312 times.
43183 if (fc->ps.sps->r->sps_sao_enabled_flag) {
519 42871 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
520 42871 ff_vvc_sao_filter(lc, x0, y0);
521 }
522
523
2/2
✓ Branch 0 taken 34636 times.
✓ Branch 1 taken 8547 times.
43183 if (fc->ps.sps->r->sps_alf_enabled_flag)
524 34636 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
525
526 43183 return 0;
527 }
528
529 43183 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
530 {
531 43183 VVCFrameContext *fc = lc->fc;
532 43183 VVCFrameThread *ft = fc->ft;
533 43183 const int ctu_size = ft->ctu_size;
534 43183 const int x0 = t->rx * ctu_size;
535 43183 const int y0 = t->ry * ctu_size;
536
537
2/2
✓ Branch 0 taken 34636 times.
✓ Branch 1 taken 8547 times.
43183 if (fc->ps.sps->r->sps_alf_enabled_flag) {
538 34636 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
539 34636 ff_vvc_alf_filter(lc, x0, y0);
540 }
541 43183 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
542
543 43183 return 0;
544 }
545
546 #define VVC_THREAD_DEBUG
547 #ifdef VVC_THREAD_DEBUG
548 const static char* task_name[] = {
549 "P",
550 "I",
551 "R",
552 "L",
553 "V",
554 "H",
555 "S",
556 "A"
557 };
558 #endif
559
560 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
561
562 345464 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
563 {
564 int ret;
565 345464 VVCFrameContext *fc = t->fc;
566 345464 VVCFrameThread *ft = fc->ft;
567 345464 const VVCTaskStage stage = t->stage;
568 static const run_func run[] = {
569 run_parse,
570 run_inter,
571 run_recon,
572 run_lmcs,
573 run_deblock_v,
574 run_deblock_h,
575 run_sao,
576 run_alf,
577 };
578
579 #ifdef VVC_THREAD_DEBUG
580 345464 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
581 #endif
582
583 345464 lc->sc = t->sc;
584
585
1/2
✓ Branch 0 taken 345464 times.
✗ Branch 1 not taken.
345464 if (!atomic_load(&ft->ret)) {
586
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 345464 times.
345464 if ((ret = run[stage](s, lc, t)) < 0) {
587 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
588 intptr_t zero = 0;
589 #else
590 int zero = 0;
591 #endif
592 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
593 av_log(s->avctx, AV_LOG_ERROR,
594 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
595 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
596 }
597 }
598
599 345464 task_stage_done(t, s);
600 345464 return;
601 }
602
603 211207 static int task_run(AVTask *_t, void *local_context, void *user_data)
604 {
605 211207 VVCTask *t = (VVCTask*)_t;
606 211207 VVCContext *s = (VVCContext *)user_data;
607 211207 VVCLocalContext *lc = local_context;
608 211207 VVCFrameThread *ft = t->fc->ft;
609
610 211207 lc->fc = t->fc;
611
612 do {
613 345464 task_run_stage(t, s, lc);
614 345464 t->stage++;
615
2/2
✓ Branch 1 taken 134257 times.
✓ Branch 2 taken 211207 times.
345464 } while (task_is_stage_ready(t, 1));
616
617
2/2
✓ Branch 0 taken 168024 times.
✓ Branch 1 taken 43183 times.
211207 if (t->stage != VVC_TASK_STAGE_LAST)
618 168024 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
619
620 211207 sheduled_done(ft, &ft->nb_scheduled_tasks);
621
622 211207 return 0;
623 }
624
625 52 AVExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
626 {
627 52 AVTaskCallbacks callbacks = {
628 s,
629 sizeof(VVCLocalContext),
630 task_priority_higher,
631 task_ready,
632 task_run,
633 };
634 52 return av_executor_alloc(&callbacks, thread_count);
635 }
636
637 52 void ff_vvc_executor_free(AVExecutor **e)
638 {
639 52 av_executor_free(e);
640 52 }
641
642 625 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
643 {
644 625 VVCFrameThread *ft = fc->ft;
645
646
2/2
✓ Branch 0 taken 416 times.
✓ Branch 1 taken 209 times.
625 if (!ft)
647 416 return;
648
649 209 ff_mutex_destroy(&ft->lock);
650 209 ff_cond_destroy(&ft->cond);
651 209 av_freep(&ft->rows);
652 209 av_freep(&ft->tasks);
653 209 av_freep(&ft);
654 }
655
656 877 static void frame_thread_init_score(VVCFrameContext *fc)
657 {
658 877 const VVCFrameThread *ft = fc->ft;
659 VVCTask task;
660
661 877 task_init(&task, VVC_TASK_STAGE_RECON, fc, 0, 0);
662
663
2/2
✓ Branch 0 taken 5262 times.
✓ Branch 1 taken 877 times.
6139 for (int i = VVC_TASK_STAGE_RECON; i < VVC_TASK_STAGE_LAST; i++) {
664 5262 task.stage = i;
665
666
2/2
✓ Branch 0 taken 51240 times.
✓ Branch 1 taken 5262 times.
56502 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
667 51240 task.ry = -1; //top
668 51240 task_stage_done(&task, NULL);
669 51240 task.ry = ft->ctu_height; //bottom
670 51240 task_stage_done(&task, NULL);
671 }
672
673
2/2
✓ Branch 0 taken 23022 times.
✓ Branch 1 taken 5262 times.
28284 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
674 23022 task.rx = -1; //left
675 23022 task_stage_done(&task, NULL);
676 23022 task.rx = ft->ctu_width; //right
677 23022 task_stage_done(&task, NULL);
678 }
679 }
680 877 }
681
682 877 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
683 {
684 877 const VVCSPS *sps = fc->ps.sps;
685 877 const VVCPPS *pps = fc->ps.pps;
686 877 VVCFrameThread *ft = fc->ft;
687 int ret;
688
689
3/4
✓ Branch 0 taken 668 times.
✓ Branch 1 taken 209 times.
✓ Branch 2 taken 668 times.
✗ Branch 3 not taken.
877 if (!ft || ft->ctu_width != pps->ctb_width ||
690
1/2
✓ Branch 0 taken 668 times.
✗ Branch 1 not taken.
668 ft->ctu_height != pps->ctb_height ||
691
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 668 times.
668 ft->ctu_size != sps->ctb_size_y) {
692
693 209 ff_vvc_frame_thread_free(fc);
694 209 ft = av_calloc(1, sizeof(*fc->ft));
695
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 209 times.
209 if (!ft)
696 return AVERROR(ENOMEM);
697
698 209 ft->ctu_width = fc->ps.pps->ctb_width;
699 209 ft->ctu_height = fc->ps.pps->ctb_height;
700 209 ft->ctu_count = fc->ps.pps->ctb_count;
701 209 ft->ctu_size = fc->ps.sps->ctb_size_y;
702
703 209 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
704
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 209 times.
209 if (!ft->rows)
705 goto fail;
706
707 209 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
708
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 209 times.
209 if (!ft->tasks)
709 goto fail;
710
711
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 209 times.
209 if ((ret = ff_cond_init(&ft->cond, NULL)))
712 goto fail;
713
714
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 209 times.
209 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
715 ff_cond_destroy(&ft->cond);
716 goto fail;
717 }
718 }
719 877 fc->ft = ft;
720 877 ft->ret = 0;
721
2/2
✓ Branch 0 taken 3837 times.
✓ Branch 1 taken 877 times.
4714 for (int y = 0; y < ft->ctu_height; y++) {
722 3837 VVCRowThread *row = ft->rows + y;
723 3837 memset(row->col_progress, 0, sizeof(row->col_progress));
724 }
725
726
2/2
✓ Branch 0 taken 43183 times.
✓ Branch 1 taken 877 times.
44060 for (int rs = 0; rs < ft->ctu_count; rs++) {
727 43183 VVCTask *t = ft->tasks + rs;
728 43183 task_init(t, VVC_TASK_STAGE_PARSE, fc, rs % ft->ctu_width, rs / ft->ctu_width);
729 }
730
731 877 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
732
733 877 frame_thread_init_score(fc);
734
735 877 return 0;
736
737 fail:
738 if (ft) {
739 av_freep(&ft->rows);
740 av_freep(&ft->tasks);
741 av_freep(&ft);
742 }
743
744 return AVERROR(ENOMEM);
745 }
746
747 43183 static void check_colocation(VVCContext *s, VVCTask *t)
748 {
749 43183 const VVCFrameContext *fc = t->fc;
750
751
4/4
✓ Branch 0 taken 4361 times.
✓ Branch 1 taken 38822 times.
✓ Branch 2 taken 4049 times.
✓ Branch 3 taken 312 times.
43183 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
752 42871 VVCFrame *col = fc->ref->collocated_ref;
753 42871 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
754
4/4
✓ Branch 0 taken 38822 times.
✓ Branch 1 taken 4049 times.
✓ Branch 2 taken 4230 times.
✓ Branch 3 taken 34592 times.
42871 if (col && first_col) {
755 //we depend on bottom and right boundary, do not - 1 for y
756 4230 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
757 4230 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
758 4230 return;
759 }
760 }
761 38953 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
762 }
763
764 1927 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
765 {
766 1927 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
767 1927 VVCTask *t = ft->tasks + rs;
768
769 1927 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
770 1927 }
771
772 877 int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
773 {
774 877 VVCFrameThread *ft = fc->ft;
775
776 // We'll handle this in two passes:
777 // Pass 0 to initialize tasks with parser, this will help detect bit stream error
778 // Pass 1 to shedule location check and submit the entry point
779
2/2
✓ Branch 0 taken 1754 times.
✓ Branch 1 taken 877 times.
2631 for (int pass = 0; pass < 2; pass++) {
780
2/2
✓ Branch 0 taken 3166 times.
✓ Branch 1 taken 1754 times.
4920 for (int i = 0; i < fc->nb_slices; i++) {
781 3166 SliceContext *sc = fc->slices[i];
782
2/2
✓ Branch 0 taken 3854 times.
✓ Branch 1 taken 3166 times.
7020 for (int j = 0; j < sc->nb_eps; j++) {
783 3854 EntryPoint *ep = sc->eps + j;
784
2/2
✓ Branch 0 taken 86366 times.
✓ Branch 1 taken 3854 times.
90220 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
785 86366 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
786 86366 VVCTask *t = ft->tasks + rs;
787
2/2
✓ Branch 0 taken 43183 times.
✓ Branch 1 taken 43183 times.
86366 if (pass) {
788 43183 check_colocation(s, t);
789 } else {
790 43183 const int ret = task_init_parse(t, sc, ep, k);
791
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43183 times.
43183 if (ret < 0)
792 return ret;
793 }
794 }
795
2/2
✓ Branch 0 taken 1927 times.
✓ Branch 1 taken 1927 times.
3854 if (pass)
796 1927 submit_entry_point(s, ft, sc, ep);
797 }
798 }
799 }
800 877 return 0;
801 }
802
803 877 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
804 {
805 877 VVCFrameThread *ft = fc->ft;
806
807 877 ff_mutex_lock(&ft->lock);
808
809
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 877 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 877 times.
877 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
810 ff_cond_wait(&ft->cond, &ft->lock);
811
812 877 ff_mutex_unlock(&ft->lock);
813 877 ff_vvc_report_frame_finished(fc->ref);
814
815 #ifdef VVC_THREAD_DEBUG
816 877 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d done\r\n", (int)fc->decode_order);
817 #endif
818 877 return ft->ret;
819 }
820