FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/vvc/thread.c
Date: 2024-05-03 15:42:48
Exec Total Coverage
Lines: 388 405 95.8%
Functions: 41 41 100.0%
Branches: 147 170 86.5%

Line Branch Exec Source
1 /*
2 * VVC thread logic
3 *
4 * Copyright (C) 2023 Nuo Mi
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <stdatomic.h>
24
25 #include "libavutil/executor.h"
26 #include "libavutil/mem.h"
27 #include "libavutil/thread.h"
28
29 #include "thread.h"
30 #include "ctu.h"
31 #include "filter.h"
32 #include "inter.h"
33 #include "intra.h"
34 #include "refs.h"
35
36 typedef struct ProgressListener {
37 VVCProgressListener l;
38 struct VVCTask *task;
39 VVCContext *s;
40 } ProgressListener;
41
42 typedef enum VVCTaskStage {
43 VVC_TASK_STAGE_PARSE,
44 VVC_TASK_STAGE_INTER,
45 VVC_TASK_STAGE_RECON,
46 VVC_TASK_STAGE_LMCS,
47 VVC_TASK_STAGE_DEBLOCK_V,
48 VVC_TASK_STAGE_DEBLOCK_H,
49 VVC_TASK_STAGE_SAO,
50 VVC_TASK_STAGE_ALF,
51 VVC_TASK_STAGE_LAST
52 } VVCTaskStage;
53
54 typedef struct VVCTask {
55 union {
56 struct VVCTask *next; //for executor debug only
57 AVTask task;
58 } u;
59
60 VVCTaskStage stage;
61
62 // ctu x, y, and raster scan order
63 int rx, ry, rs;
64 VVCFrameContext *fc;
65
66 ProgressListener col_listener;
67 ProgressListener listener[2][VVC_MAX_REF_ENTRIES];
68
69 // for parse task only
70 SliceContext *sc;
71 EntryPoint *ep;
72 int ctu_idx; //ctu idx in the current slice
73
74 // tasks with target scores met are ready for scheduling
75 atomic_uchar score[VVC_TASK_STAGE_LAST];
76 atomic_uchar target_inter_score;
77 } VVCTask;
78
79 typedef struct VVCRowThread {
80 atomic_int col_progress[VVC_PROGRESS_LAST];
81 } VVCRowThread;
82
83 typedef struct VVCFrameThread {
84 // error return for tasks
85 atomic_int ret;
86
87 VVCRowThread *rows;
88 VVCTask *tasks;
89
90 int ctu_size;
91 int ctu_width;
92 int ctu_height;
93 int ctu_count;
94
95 //protected by lock
96 atomic_int nb_scheduled_tasks;
97 atomic_int nb_scheduled_listeners;
98
99 int row_progress[VVC_PROGRESS_LAST];
100
101 AVMutex lock;
102 AVCond cond;
103 } VVCFrameThread;
104
105 208030 static void add_task(VVCContext *s, VVCTask *t)
106 {
107 208030 VVCFrameThread *ft = t->fc->ft;
108
109 208030 atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
110
111 208030 av_executor_execute(s->executor, &t->u.task);
112 208030 }
113
114 43763 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
115 {
116 43763 memset(t, 0, sizeof(*t));
117 43763 t->stage = stage;
118 43763 t->fc = fc;
119 43763 t->rx = rx;
120 43763 t->ry = ry;
121 43763 t->rs = ry * fc->ft->ctu_width + rx;
122
2/2
✓ Branch 0 taken 350104 times.
✓ Branch 1 taken 43763 times.
393867 for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++)
123 350104 atomic_store(t->score + i, 0);
124 43763 atomic_store(&t->target_inter_score, 0);
125 43763 }
126
127 42891 static void task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx)
128 {
129 42891 t->sc = sc;
130 42891 t->ep = ep;
131 42891 t->ctu_idx = ctu_idx;
132 42891 }
133
134 1227948 static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage)
135 {
136 1227948 return atomic_fetch_add(&t->score[stage], 1) + 1;
137 }
138
139 508267 static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage)
140 {
141 508267 return atomic_load(&t->score[stage]);
142 }
143
144 //first row in tile or slice
145 6450 static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry)
146 {
147 6450 const VVCFrameThread *ft = fc->ft;
148 6450 const VVCPPS *pps = fc->ps.pps;
149
150
2/2
✓ Branch 0 taken 5400 times.
✓ Branch 1 taken 1050 times.
6450 if (ry != pps->ctb_to_row_bd[ry]) {
151 5400 const int rs = ry * ft->ctu_width + rx;
152 5400 return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width];
153 }
154 1050 return 1;
155 }
156
157 1736215 static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score)
158 {
159 // l:left, r:right, t: top, b: bottom
160 static const uint8_t target_score[] =
161 {
162 2, //VVC_TASK_STAGE_RECON, need l + rt recon
163 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon
164 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v
165 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h
166 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h
167 8, //VVC_TASK_STAGE_ALF, need sao around the ctu
168 };
169 1736215 uint8_t target = 0;
170 1736215 VVCFrameContext *fc = t->fc;
171
172
2/2
✓ Branch 0 taken 129723 times.
✓ Branch 1 taken 1606492 times.
1736215 if (stage == VVC_TASK_STAGE_PARSE) {
173 129723 const H266RawSPS *rsps = fc->ps.sps->r;
174
4/4
✓ Branch 0 taken 5250 times.
✓ Branch 1 taken 124473 times.
✓ Branch 3 taken 4200 times.
✓ Branch 4 taken 1050 times.
129723 const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry);
175 129723 target = 2 + wpp - 1; //left parse + colocation + wpp - no previous stage
176
2/2
✓ Branch 0 taken 118157 times.
✓ Branch 1 taken 1488335 times.
1606492 } else if (stage == VVC_TASK_STAGE_INTER) {
177 118157 target = atomic_load(&t->target_inter_score);
178 } else {
179 1488335 target = target_score[stage - VVC_TASK_STAGE_RECON];
180 }
181
182 //+1 for previous stage
183
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1736215 times.
1736215 av_assert0(score <= target + 1);
184 1736215 return score == target + 1;
185 }
186
187 1744212 static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft,
188 const int rx, const int ry, const VVCTaskStage stage)
189 {
190 1744212 VVCTask *t = ft->tasks + ft->ctu_width * ry + rx;
191 uint8_t score;
192
193
8/8
✓ Branch 0 taken 1616493 times.
✓ Branch 1 taken 127719 times.
✓ Branch 2 taken 1510986 times.
✓ Branch 3 taken 105507 times.
✓ Branch 4 taken 1349250 times.
✓ Branch 5 taken 161736 times.
✓ Branch 6 taken 121302 times.
✓ Branch 7 taken 1227948 times.
1744212 if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height)
194 516264 return;
195
196 1227948 score = task_add_score(t, stage);
197
2/2
✓ Branch 1 taken 208030 times.
✓ Branch 2 taken 1019918 times.
1227948 if (task_has_target_score(t, stage, score)) {
198
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 208030 times.
208030 av_assert0(s);
199
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 208030 times.
208030 av_assert0(stage == t->stage);
200 208030 add_task(s, t);
201 }
202 }
203
204 287561 static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled)
205 {
206
2/2
✓ Branch 0 taken 77246 times.
✓ Branch 1 taken 210315 times.
287561 if (atomic_fetch_sub(scheduled, 1) == 1) {
207 77246 ff_mutex_lock(&ft->lock);
208 77246 ff_cond_signal(&ft->cond);
209 77246 ff_mutex_unlock(&ft->lock);
210 }
211 287561 }
212
213 79531 static void progress_done(VVCProgressListener *_l, const int type)
214 {
215 79531 const ProgressListener *l = (ProgressListener *)_l;
216 79531 const VVCTask *t = l->task;
217 79531 VVCFrameThread *ft = t->fc->ft;
218
219 79531 frame_thread_add_score(l->s, ft, t->rx, t->ry, type);
220 79531 sheduled_done(ft, &ft->nb_scheduled_listeners);
221 79531 }
222
223 75266 static void pixel_done(VVCProgressListener *l)
224 {
225 75266 progress_done(l, VVC_TASK_STAGE_INTER);
226 75266 }
227
228 4265 static void mv_done(VVCProgressListener *l)
229 {
230 4265 progress_done(l, VVC_TASK_STAGE_PARSE);
231 4265 }
232
233 79531 static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
234 {
235 79531 const int is_inter = vp == VVC_PROGRESS_PIXEL;
236
237 79531 l->task = t;
238 79531 l->s = s;
239 79531 l->l.vp = vp;
240 79531 l->l.y = y;
241
2/2
✓ Branch 0 taken 75266 times.
✓ Branch 1 taken 4265 times.
79531 l->l.progress_done = is_inter ? pixel_done : mv_done;
242
2/2
✓ Branch 0 taken 75266 times.
✓ Branch 1 taken 4265 times.
79531 if (is_inter)
243 75266 atomic_fetch_add(&t->target_inter_score, 1);
244 79531 }
245
246 79531 static void add_progress_listener(VVCFrame *ref, ProgressListener *l,
247 VVCTask *t, VVCContext *s, const VVCProgress vp, const int y)
248 {
249 79531 VVCFrameThread *ft = t->fc->ft;
250
251 79531 atomic_fetch_add(&ft->nb_scheduled_listeners, 1);
252 79531 listener_init(l, t, s, vp, y);
253 79531 ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l);
254 79531 }
255
256 42891 static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t)
257 {
258 42891 VVCFrameThread *ft = fc->ft;
259 42891 EntryPoint *ep = t->ep;
260 42891 const VVCSPS *sps = fc->ps.sps;
261
262
2/2
✓ Branch 0 taken 1400 times.
✓ Branch 1 taken 41491 times.
42891 if (sps->r->sps_entropy_coding_sync_enabled_flag) {
263
2/2
✓ Branch 0 taken 200 times.
✓ Branch 1 taken 1200 times.
1400 if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) {
264 200 EntryPoint *next = ep + 1;
265
3/4
✓ Branch 0 taken 150 times.
✓ Branch 1 taken 50 times.
✓ Branch 3 taken 150 times.
✗ Branch 4 not taken.
200 if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) {
266 150 memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state));
267 150 ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag);
268 }
269 }
270
3/4
✓ Branch 0 taken 1050 times.
✓ Branch 1 taken 350 times.
✓ Branch 3 taken 1050 times.
✗ Branch 4 not taken.
1400 if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1))
271 1050 frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE);
272 }
273
274
2/2
✓ Branch 0 taken 40969 times.
✓ Branch 1 taken 1922 times.
42891 if (t->ctu_idx + 1 < t->ep->ctu_end) {
275 40969 const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1];
276 40969 const int next_rx = next_rs % ft->ctu_width;
277 40969 const int next_ry = next_rs / ft->ctu_width;
278 40969 frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE);
279 }
280 42891 }
281
282 42891 static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs)
283 {
284 42891 const VVCSH *sh = &sc->sh;
285
286
2/2
✓ Branch 0 taken 38690 times.
✓ Branch 1 taken 4201 times.
42891 if (!IS_I(sh->r)) {
287 38690 CTU *ctu = fc->tab.ctus + rs;
288
2/2
✓ Branch 0 taken 77380 times.
✓ Branch 1 taken 38690 times.
116070 for (int lx = 0; lx < 2; lx++) {
289
2/2
✓ Branch 0 taken 149236 times.
✓ Branch 1 taken 77380 times.
226616 for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) {
290 149236 const int y = ctu->max_y[lx][i];
291 149236 VVCFrame *ref = sc->rpl[lx].ref[i];
292
3/4
✓ Branch 0 taken 149236 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 75266 times.
✓ Branch 3 taken 73970 times.
149236 if (ref && y >= 0)
293 75266 add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER);
294 }
295 }
296 }
297 42891 }
298
299 42891 static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry)
300 {
301 42891 VVCFrameThread *ft = fc->ft;
302 42891 const int rs = ry * ft->ctu_width + rx;
303 42891 const int slice_idx = fc->tab.slice_idx[rs];
304 42891 VVCTask *t = ft->tasks + rs;
305 42891 const SliceContext *sc = fc->slices[slice_idx];
306
307 42891 schedule_next_parse(s, fc, sc, t);
308 42891 schedule_inter(s, fc, sc, t, rs);
309 42891 }
310
311 490632 static void task_stage_done(const VVCTask *t, VVCContext *s)
312 {
313 490632 VVCFrameContext *fc = t->fc;
314 490632 VVCFrameThread *ft = fc->ft;
315 490632 const VVCTaskStage stage = t->stage;
316
317 #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage)
318
319 //this is a reserve map of ready_score, ordered by zigzag
320
2/2
✓ Branch 0 taken 42891 times.
✓ Branch 1 taken 447741 times.
490632 if (stage == VVC_TASK_STAGE_PARSE) {
321 42891 parse_task_done(s, fc, t->rx, t->ry);
322
2/2
✓ Branch 0 taken 67475 times.
✓ Branch 1 taken 380266 times.
447741 } else if (stage == VVC_TASK_STAGE_RECON) {
323 67475 ADD(-1, 1, VVC_TASK_STAGE_RECON);
324 67475 ADD( 1, 0, VVC_TASK_STAGE_RECON);
325 67475 ADD(-1, -1, VVC_TASK_STAGE_LMCS);
326 67475 ADD( 0, -1, VVC_TASK_STAGE_LMCS);
327 67475 ADD(-1, 0, VVC_TASK_STAGE_LMCS);
328
2/2
✓ Branch 0 taken 67475 times.
✓ Branch 1 taken 312791 times.
380266 } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) {
329 67475 ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V);
330 67475 ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H);
331
2/2
✓ Branch 0 taken 67475 times.
✓ Branch 1 taken 245316 times.
312791 } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) {
332 67475 ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H);
333 67475 ADD(-1, -1, VVC_TASK_STAGE_SAO);
334 67475 ADD( 0, -1, VVC_TASK_STAGE_SAO);
335 67475 ADD(-1, 0, VVC_TASK_STAGE_SAO);
336 67475 ADD( 1, -1, VVC_TASK_STAGE_SAO);
337 67475 ADD( 1, 0, VVC_TASK_STAGE_SAO);
338
2/2
✓ Branch 0 taken 67475 times.
✓ Branch 1 taken 177841 times.
245316 } else if (stage == VVC_TASK_STAGE_SAO) {
339 67475 ADD(-1, -1, VVC_TASK_STAGE_ALF);
340 67475 ADD( 0, -1, VVC_TASK_STAGE_ALF);
341 67475 ADD(-1, 0, VVC_TASK_STAGE_ALF);
342 67475 ADD( 1, -1, VVC_TASK_STAGE_ALF);
343 67475 ADD(-1, 1, VVC_TASK_STAGE_ALF);
344 67475 ADD( 1, 0, VVC_TASK_STAGE_ALF);
345 67475 ADD( 0, 1, VVC_TASK_STAGE_ALF);
346 67475 ADD( 1, 1, VVC_TASK_STAGE_ALF);
347 }
348 490632 }
349
350 551158 static int task_is_stage_ready(VVCTask *t, int add)
351 {
352 551158 const VVCTaskStage stage = t->stage;
353 uint8_t score;
354
2/2
✓ Branch 0 taken 42891 times.
✓ Branch 1 taken 508267 times.
551158 if (stage > VVC_TASK_STAGE_ALF)
355 42891 return 0;
356 508267 score = task_get_score(t, stage) + add;
357 508267 return task_has_target_score(t, stage, score);
358 }
359
360 208030 static int task_ready(const AVTask *_t, void *user_data)
361 {
362 208030 VVCTask *t = (VVCTask*)_t;
363
364 208030 return task_is_stage_ready(t, 0);
365 }
366
367 #define CHECK(a, b) \
368 do { \
369 if ((a) != (b)) \
370 return (a) < (b); \
371 } while (0)
372
373 2424530 static int task_priority_higher(const AVTask *_a, const AVTask *_b)
374 {
375 2424530 const VVCTask *a = (const VVCTask*)_a;
376 2424530 const VVCTask *b = (const VVCTask*)_b;
377
378
2/2
✓ Branch 0 taken 74166 times.
✓ Branch 1 taken 2350364 times.
2424530 CHECK(a->fc->decode_order, b->fc->decode_order); //decode order
379
380
4/4
✓ Branch 0 taken 2257248 times.
✓ Branch 1 taken 93116 times.
✓ Branch 2 taken 29986 times.
✓ Branch 3 taken 2227262 times.
2350364 if (a->stage == VVC_TASK_STAGE_PARSE || b->stage == VVC_TASK_STAGE_PARSE) {
381
2/2
✓ Branch 0 taken 107089 times.
✓ Branch 1 taken 16013 times.
123102 CHECK(a->stage, b->stage);
382
2/2
✓ Branch 0 taken 11397 times.
✓ Branch 1 taken 4616 times.
16013 CHECK(a->ry, b->ry);
383 4616 return a->rx < b->rx;
384 }
385
386
2/2
✓ Branch 0 taken 1947214 times.
✓ Branch 1 taken 280048 times.
2227262 CHECK(a->rx + a->ry + a->stage, b->rx + b->ry + b->stage); //zigzag with type
387
2/2
✓ Branch 0 taken 55465 times.
✓ Branch 1 taken 224583 times.
280048 CHECK(a->rx + a->ry, b->rx + b->ry); //zigzag
388 224583 return a->ry < b->ry;
389 }
390
391 85782 static void report_frame_progress(VVCFrameContext *fc,
392 const int ry, const VVCProgress idx)
393 {
394 85782 VVCFrameThread *ft = fc->ft;
395 85782 const int ctu_size = ft->ctu_size;
396 int old;
397
398
2/2
✓ Branch 0 taken 7618 times.
✓ Branch 1 taken 78164 times.
85782 if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) {
399 int y;
400 7618 ff_mutex_lock(&ft->lock);
401 7618 y = old = ft->row_progress[idx];
402
4/4
✓ Branch 0 taken 13492 times.
✓ Branch 1 taken 1744 times.
✓ Branch 2 taken 7618 times.
✓ Branch 3 taken 5874 times.
15236 while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
403 7618 y++;
404
1/2
✓ Branch 0 taken 7618 times.
✗ Branch 1 not taken.
7618 if (old != y) {
405
2/2
✓ Branch 0 taken 5874 times.
✓ Branch 1 taken 1744 times.
7618 const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
406 7618 ft->row_progress[idx] = y;
407 7618 ff_vvc_report_progress(fc->ref, idx, progress);
408 }
409 7618 ff_mutex_unlock(&ft->lock);
410 }
411 85782 }
412
413 42891 static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
414 {
415 int ret;
416 42891 VVCFrameContext *fc = lc->fc;
417 42891 const int rs = t->rs;
418 42891 const CTU *ctu = fc->tab.ctus + rs;
419
420 42891 lc->ep = t->ep;
421
422 42891 ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry);
423
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42891 times.
42891 if (ret < 0)
424 return ret;
425
426
2/2
✓ Branch 0 taken 22372 times.
✓ Branch 1 taken 20519 times.
42891 if (!ctu->has_dmvr)
427 22372 report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV);
428
429 42891 return 0;
430 }
431
432 42891 static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
433 {
434 42891 VVCFrameContext *fc = lc->fc;
435 42891 const CTU *ctu = fc->tab.ctus + t->rs;
436
437 42891 ff_vvc_predict_inter(lc, t->rs);
438
439
2/2
✓ Branch 0 taken 20519 times.
✓ Branch 1 taken 22372 times.
42891 if (ctu->has_dmvr)
440 20519 report_frame_progress(fc, t->ry, VVC_PROGRESS_MV);
441
442 42891 return 0;
443 }
444
445 42891 static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
446 {
447 42891 ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry);
448
449 42891 return 0;
450 }
451
452 42891 static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
453 {
454 42891 VVCFrameContext *fc = lc->fc;
455 42891 VVCFrameThread *ft = fc->ft;
456 42891 const int ctu_size = ft->ctu_size;
457 42891 const int x0 = t->rx * ctu_size;
458 42891 const int y0 = t->ry * ctu_size;
459
460 42891 ff_vvc_lmcs_filter(lc, x0, y0);
461
462 42891 return 0;
463 }
464
465 42891 static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
466 {
467 42891 VVCFrameContext *fc = lc->fc;
468 42891 VVCFrameThread *ft = fc->ft;
469 42891 const int ctb_size = ft->ctu_size;
470 42891 const int x0 = t->rx * ctb_size;
471 42891 const int y0 = t->ry * ctb_size;
472
473
1/2
✓ Branch 0 taken 42891 times.
✗ Branch 1 not taken.
42891 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
474 42891 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
475 42891 ff_vvc_deblock_vertical(lc, x0, y0, t->rs);
476 }
477
478 42891 return 0;
479 }
480
481 42891 static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
482 {
483 42891 VVCFrameContext *fc = lc->fc;
484 42891 VVCFrameThread *ft = fc->ft;
485 42891 const int ctb_size = ft->ctu_size;
486 42891 const int x0 = t->rx * ctb_size;
487 42891 const int y0 = t->ry * ctb_size;
488
489
1/2
✓ Branch 0 taken 42891 times.
✗ Branch 1 not taken.
42891 if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) {
490 42891 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
491 42891 ff_vvc_deblock_horizontal(lc, x0, y0, t->rs);
492 }
493
2/2
✓ Branch 0 taken 42579 times.
✓ Branch 1 taken 312 times.
42891 if (fc->ps.sps->r->sps_sao_enabled_flag)
494 42579 ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1);
495
496 42891 return 0;
497 }
498
499 42891 static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
500 {
501 42891 VVCFrameContext *fc = lc->fc;
502 42891 VVCFrameThread *ft = fc->ft;
503 42891 const int ctb_size = ft->ctu_size;
504 42891 const int x0 = t->rx * ctb_size;
505 42891 const int y0 = t->ry * ctb_size;
506
507
2/2
✓ Branch 0 taken 42579 times.
✓ Branch 1 taken 312 times.
42891 if (fc->ps.sps->r->sps_sao_enabled_flag) {
508 42579 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
509 42579 ff_vvc_sao_filter(lc, x0, y0);
510 }
511
512
2/2
✓ Branch 0 taken 34344 times.
✓ Branch 1 taken 8547 times.
42891 if (fc->ps.sps->r->sps_alf_enabled_flag)
513 34344 ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0);
514
515 42891 return 0;
516 }
517
518 42891 static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t)
519 {
520 42891 VVCFrameContext *fc = lc->fc;
521 42891 VVCFrameThread *ft = fc->ft;
522 42891 const int ctu_size = ft->ctu_size;
523 42891 const int x0 = t->rx * ctu_size;
524 42891 const int y0 = t->ry * ctu_size;
525
526
2/2
✓ Branch 0 taken 34344 times.
✓ Branch 1 taken 8547 times.
42891 if (fc->ps.sps->r->sps_alf_enabled_flag) {
527 34344 ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs);
528 34344 ff_vvc_alf_filter(lc, x0, y0);
529 }
530 42891 report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL);
531
532 42891 return 0;
533 }
534
535 #define VVC_THREAD_DEBUG
536 #ifdef VVC_THREAD_DEBUG
537 const static char* task_name[] = {
538 "P",
539 "I",
540 "R",
541 "L",
542 "V",
543 "H",
544 "S",
545 "A"
546 };
547 #endif
548
549 typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t);
550
551 343128 static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
552 {
553 int ret;
554 343128 VVCFrameContext *fc = t->fc;
555 343128 VVCFrameThread *ft = fc->ft;
556 343128 const VVCTaskStage stage = t->stage;
557 343128 run_func run[] = {
558 run_parse,
559 run_inter,
560 run_recon,
561 run_lmcs,
562 run_deblock_v,
563 run_deblock_h,
564 run_sao,
565 run_alf,
566 };
567
568 #ifdef VVC_THREAD_DEBUG
569 343128 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry);
570 #endif
571
572 343128 lc->sc = t->sc;
573
574
1/2
✓ Branch 0 taken 343128 times.
✗ Branch 1 not taken.
343128 if (!atomic_load(&ft->ret)) {
575
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 343128 times.
343128 if ((ret = run[stage](s, lc, t)) < 0) {
576 #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H
577 intptr_t zero = 0;
578 #else
579 int zero = 0;
580 #endif
581 atomic_compare_exchange_strong(&ft->ret, &zero, ret);
582 av_log(s->avctx, AV_LOG_ERROR,
583 "frame %5d, %s(%3d, %3d) failed with %d\r\n",
584 (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret);
585 }
586 }
587
588 343128 task_stage_done(t, s);
589 343128 return;
590 }
591
592 208030 static int task_run(AVTask *_t, void *local_context, void *user_data)
593 {
594 208030 VVCTask *t = (VVCTask*)_t;
595 208030 VVCContext *s = (VVCContext *)user_data;
596 208030 VVCLocalContext *lc = local_context;
597 208030 VVCFrameThread *ft = t->fc->ft;
598
599 208030 lc->fc = t->fc;
600
601 do {
602 343128 task_run_stage(t, s, lc);
603 343128 t->stage++;
604
2/2
✓ Branch 1 taken 135098 times.
✓ Branch 2 taken 208030 times.
343128 } while (task_is_stage_ready(t, 1));
605
606
2/2
✓ Branch 0 taken 165139 times.
✓ Branch 1 taken 42891 times.
208030 if (t->stage != VVC_TASK_STAGE_LAST)
607 165139 frame_thread_add_score(s, ft, t->rx, t->ry, t->stage);
608
609 208030 sheduled_done(ft, &ft->nb_scheduled_tasks);
610
611 208030 return 0;
612 }
613
614 50 AVExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
615 {
616 50 AVTaskCallbacks callbacks = {
617 s,
618 sizeof(VVCLocalContext),
619 task_priority_higher,
620 task_ready,
621 task_run,
622 };
623 50 return av_executor_alloc(&callbacks, thread_count);
624 }
625
626 50 void ff_vvc_executor_free(AVExecutor **e)
627 {
628 50 av_executor_free(e);
629 50 }
630
631 604 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
632 {
633 604 VVCFrameThread *ft = fc->ft;
634
635
2/2
✓ Branch 0 taken 400 times.
✓ Branch 1 taken 204 times.
604 if (!ft)
636 400 return;
637
638 204 ff_mutex_destroy(&ft->lock);
639 204 ff_cond_destroy(&ft->cond);
640 204 av_freep(&ft->rows);
641 204 av_freep(&ft->tasks);
642 204 av_freep(&ft);
643 }
644
645 872 static void frame_thread_init_score(VVCFrameContext *fc)
646 {
647 872 const VVCFrameThread *ft = fc->ft;
648 VVCTask task;
649
650 872 task_init(&task, VVC_TASK_STAGE_RECON, fc, 0, 0);
651
652
2/2
✓ Branch 0 taken 5232 times.
✓ Branch 1 taken 872 times.
6104 for (int i = VVC_TASK_STAGE_RECON; i < VVC_TASK_STAGE_LAST; i++) {
653 5232 task.stage = i;
654
655
2/2
✓ Branch 0 taken 50898 times.
✓ Branch 1 taken 5232 times.
56130 for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) {
656 50898 task.ry = -1; //top
657 50898 task_stage_done(&task, NULL);
658 50898 task.ry = ft->ctu_height; //bottom
659 50898 task_stage_done(&task, NULL);
660 }
661
662
2/2
✓ Branch 0 taken 22854 times.
✓ Branch 1 taken 5232 times.
28086 for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) {
663 22854 task.rx = -1; //left
664 22854 task_stage_done(&task, NULL);
665 22854 task.rx = ft->ctu_width; //right
666 22854 task_stage_done(&task, NULL);
667 }
668 }
669 872 }
670
671 872 int ff_vvc_frame_thread_init(VVCFrameContext *fc)
672 {
673 872 const VVCSPS *sps = fc->ps.sps;
674 872 const VVCPPS *pps = fc->ps.pps;
675 872 VVCFrameThread *ft = fc->ft;
676 int ret;
677
678
3/4
✓ Branch 0 taken 668 times.
✓ Branch 1 taken 204 times.
✓ Branch 2 taken 668 times.
✗ Branch 3 not taken.
872 if (!ft || ft->ctu_width != pps->ctb_width ||
679
1/2
✓ Branch 0 taken 668 times.
✗ Branch 1 not taken.
668 ft->ctu_height != pps->ctb_height ||
680
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 668 times.
668 ft->ctu_size != sps->ctb_size_y) {
681
682 204 ff_vvc_frame_thread_free(fc);
683 204 ft = av_calloc(1, sizeof(*fc->ft));
684
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204 times.
204 if (!ft)
685 return AVERROR(ENOMEM);
686
687 204 ft->ctu_width = fc->ps.pps->ctb_width;
688 204 ft->ctu_height = fc->ps.pps->ctb_height;
689 204 ft->ctu_count = fc->ps.pps->ctb_count;
690 204 ft->ctu_size = fc->ps.sps->ctb_size_y;
691
692 204 ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows));
693
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204 times.
204 if (!ft->rows)
694 goto fail;
695
696 204 ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks));
697
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 204 times.
204 if (!ft->tasks)
698 goto fail;
699
700
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 204 times.
204 if ((ret = ff_cond_init(&ft->cond, NULL)))
701 goto fail;
702
703
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 204 times.
204 if ((ret = ff_mutex_init(&ft->lock, NULL))) {
704 ff_cond_destroy(&ft->cond);
705 goto fail;
706 }
707 }
708 872 fc->ft = ft;
709 872 ft->ret = 0;
710
2/2
✓ Branch 0 taken 3809 times.
✓ Branch 1 taken 872 times.
4681 for (int y = 0; y < ft->ctu_height; y++) {
711 3809 VVCRowThread *row = ft->rows + y;
712 3809 memset(row->col_progress, 0, sizeof(row->col_progress));
713 }
714
715
2/2
✓ Branch 0 taken 42891 times.
✓ Branch 1 taken 872 times.
43763 for (int rs = 0; rs < ft->ctu_count; rs++) {
716 42891 VVCTask *t = ft->tasks + rs;
717 42891 task_init(t, VVC_TASK_STAGE_PARSE, fc, rs % ft->ctu_width, rs / ft->ctu_width);
718 }
719
720 872 memset(&ft->row_progress[0], 0, sizeof(ft->row_progress));
721
722 872 frame_thread_init_score(fc);
723
724 872 return 0;
725
726 fail:
727 if (ft) {
728 av_freep(&ft->rows);
729 av_freep(&ft->tasks);
730 av_freep(&ft);
731 }
732
733 return AVERROR(ENOMEM);
734 }
735
736 42891 static void check_colocation(VVCContext *s, VVCTask *t)
737 {
738 42891 const VVCFrameContext *fc = t->fc;
739
740
4/4
✓ Branch 0 taken 4201 times.
✓ Branch 1 taken 38690 times.
✓ Branch 2 taken 3889 times.
✓ Branch 3 taken 312 times.
42891 if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) {
741 42579 VVCFrame *col = fc->ref->collocated_ref;
742 42579 const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx];
743
4/4
✓ Branch 0 taken 38965 times.
✓ Branch 1 taken 3614 times.
✓ Branch 2 taken 4265 times.
✓ Branch 3 taken 34700 times.
42579 if (col && first_col) {
744 //we depend on bottom and right boundary, do not - 1 for y
745 4265 const int y = (t->ry << fc->ps.sps->ctb_log2_size_y);
746 4265 add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y);
747 4265 return;
748 }
749 }
750 38626 frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
751 }
752
753 1922 static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep)
754 {
755 1922 const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start];
756 1922 VVCTask *t = ft->tasks + rs;
757
758 1922 frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE);
759 1922 }
760
761 872 void ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc)
762 {
763 872 VVCFrameThread *ft = fc->ft;
764
765
2/2
✓ Branch 0 taken 1578 times.
✓ Branch 1 taken 872 times.
2450 for (int i = 0; i < fc->nb_slices; i++) {
766 1578 SliceContext *sc = fc->slices[i];
767
2/2
✓ Branch 0 taken 1922 times.
✓ Branch 1 taken 1578 times.
3500 for (int j = 0; j < sc->nb_eps; j++) {
768 1922 EntryPoint *ep = sc->eps + j;
769
2/2
✓ Branch 0 taken 42891 times.
✓ Branch 1 taken 1922 times.
44813 for (int k = ep->ctu_start; k < ep->ctu_end; k++) {
770 42891 const int rs = sc->sh.ctb_addr_in_curr_slice[k];
771 42891 VVCTask *t = ft->tasks + rs;
772
773 42891 task_init_parse(t, sc, ep, k);
774 42891 check_colocation(s, t);
775 }
776 1922 submit_entry_point(s, ft, sc, ep);
777 }
778 }
779 872 }
780
781 872 int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc)
782 {
783 872 VVCFrameThread *ft = fc->ft;
784
785 872 ff_mutex_lock(&ft->lock);
786
787
3/4
✓ Branch 0 taken 71290 times.
✓ Branch 1 taken 872 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 872 times.
72162 while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners))
788 71290 ff_cond_wait(&ft->cond, &ft->lock);
789
790 872 ff_mutex_unlock(&ft->lock);
791 872 ff_vvc_report_frame_finished(fc->ref);
792
793 #ifdef VVC_THREAD_DEBUG
794 872 av_log(s->avctx, AV_LOG_DEBUG, "frame %5d done\r\n", (int)fc->decode_order);
795 #endif
796 872 return ft->ret;
797 }
798