Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * VVC thread logic | ||
3 | * | ||
4 | * Copyright (C) 2023 Nuo Mi | ||
5 | * | ||
6 | * This file is part of FFmpeg. | ||
7 | * | ||
8 | * FFmpeg is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU Lesser General Public | ||
10 | * License as published by the Free Software Foundation; either | ||
11 | * version 2.1 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * FFmpeg is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
16 | * Lesser General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU Lesser General Public | ||
19 | * License along with FFmpeg; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
21 | */ | ||
22 | |||
23 | #include <stdatomic.h> | ||
24 | |||
25 | #include "libavcodec/executor.h" | ||
26 | #include "libavutil/mem.h" | ||
27 | #include "libavutil/thread.h" | ||
28 | |||
29 | #include "thread.h" | ||
30 | #include "ctu.h" | ||
31 | #include "filter.h" | ||
32 | #include "inter.h" | ||
33 | #include "intra.h" | ||
34 | #include "refs.h" | ||
35 | |||
36 | typedef struct ProgressListener { | ||
37 | VVCProgressListener l; | ||
38 | struct VVCTask *task; | ||
39 | VVCContext *s; | ||
40 | } ProgressListener; | ||
41 | |||
42 | typedef enum VVCTaskStage { | ||
43 | VVC_TASK_STAGE_INIT, // for CTU(0, 0) only | ||
44 | VVC_TASK_STAGE_PARSE, | ||
45 | VVC_TASK_STAGE_DEBLOCK_BS, | ||
46 | VVC_TASK_STAGE_INTER, | ||
47 | VVC_TASK_STAGE_RECON, | ||
48 | VVC_TASK_STAGE_LMCS, | ||
49 | VVC_TASK_STAGE_DEBLOCK_V, | ||
50 | VVC_TASK_STAGE_DEBLOCK_H, | ||
51 | VVC_TASK_STAGE_SAO, | ||
52 | VVC_TASK_STAGE_ALF, | ||
53 | VVC_TASK_STAGE_LAST | ||
54 | } VVCTaskStage; | ||
55 | |||
56 | typedef struct VVCTask { | ||
57 | union { | ||
58 | struct VVCTask *next; //for executor debug only | ||
59 | FFTask task; | ||
60 | } u; | ||
61 | |||
62 | VVCTaskStage stage; | ||
63 | |||
64 | // ctu x, y, and raster scan order | ||
65 | int rx, ry, rs; | ||
66 | VVCFrameContext *fc; | ||
67 | |||
68 | ProgressListener col_listener; | ||
69 | ProgressListener listener[2][VVC_MAX_REF_ENTRIES]; | ||
70 | |||
71 | // for parse task only | ||
72 | SliceContext *sc; | ||
73 | EntryPoint *ep; | ||
74 | int ctu_idx; //ctu idx in the current slice | ||
75 | |||
76 | // tasks with target scores met are ready for scheduling | ||
77 | atomic_uchar score[VVC_TASK_STAGE_LAST]; | ||
78 | atomic_uchar target_inter_score; | ||
79 | } VVCTask; | ||
80 | |||
81 | typedef struct VVCRowThread { | ||
82 | atomic_int col_progress[VVC_PROGRESS_LAST]; | ||
83 | } VVCRowThread; | ||
84 | |||
85 | typedef struct VVCFrameThread { | ||
86 | // error return for tasks | ||
87 | atomic_int ret; | ||
88 | |||
89 | VVCRowThread *rows; | ||
90 | VVCTask *tasks; | ||
91 | |||
92 | int ctu_size; | ||
93 | int ctu_width; | ||
94 | int ctu_height; | ||
95 | int ctu_count; | ||
96 | |||
97 | //protected by lock | ||
98 | atomic_int nb_scheduled_tasks; | ||
99 | atomic_int nb_scheduled_listeners; | ||
100 | |||
101 | int row_progress[VVC_PROGRESS_LAST]; | ||
102 | |||
103 | AVMutex lock; | ||
104 | AVCond cond; | ||
105 | } VVCFrameThread; | ||
106 | |||
107 | #define PRIORITY_LOWEST 2 | ||
108 | 228253 | static void add_task(VVCContext *s, VVCTask *t) | |
109 | { | ||
110 | 228253 | VVCFrameThread *ft = t->fc->ft; | |
111 | 228253 | FFTask *task = &t->u.task; | |
112 | 228253 | const int priorities[] = { | |
113 | 0, // VVC_TASK_STAGE_INIT, | ||
114 | 0, // VVC_TASK_STAGE_PARSE, | ||
115 | 1, // VVC_TASK_STAGE_DEBLOCK_BS | ||
116 | // For an 8K clip, a CTU line completed in the reference frame may trigger 64 and more inter tasks. | ||
117 | // We assign these tasks the lowest priority to avoid being overwhelmed with inter tasks. | ||
118 | PRIORITY_LOWEST, // VVC_TASK_STAGE_INTER | ||
119 | 1, // VVC_TASK_STAGE_RECON, | ||
120 | 1, // VVC_TASK_STAGE_LMCS, | ||
121 | 1, // VVC_TASK_STAGE_DEBLOCK_V, | ||
122 | 1, // VVC_TASK_STAGE_DEBLOCK_H, | ||
123 | 1, // VVC_TASK_STAGE_SAO, | ||
124 | 1, // VVC_TASK_STAGE_ALF, | ||
125 | }; | ||
126 | |||
127 | 228253 | atomic_fetch_add(&ft->nb_scheduled_tasks, 1); | |
128 | 228253 | task->priority = priorities[t->stage]; | |
129 | 228253 | ff_executor_execute(s->executor, task); | |
130 | 228253 | } | |
131 | |||
132 | 46728 | static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry) | |
133 | { | ||
134 | 46728 | memset(t, 0, sizeof(*t)); | |
135 | 46728 | t->stage = stage; | |
136 | 46728 | t->fc = fc; | |
137 | 46728 | t->rx = rx; | |
138 | 46728 | t->ry = ry; | |
139 | 46728 | t->rs = ry * fc->ft->ctu_width + rx; | |
140 |
2/2✓ Branch 0 taken 467280 times.
✓ Branch 1 taken 46728 times.
|
514008 | for (int i = 0; i < FF_ARRAY_ELEMS(t->score); i++) |
141 | 467280 | atomic_store(t->score + i, 0); | |
142 | 46728 | atomic_store(&t->target_inter_score, 0); | |
143 | 46728 | } | |
144 | |||
145 | 45792 | static int task_init_parse(VVCTask *t, SliceContext *sc, EntryPoint *ep, const int ctu_idx) | |
146 | { | ||
147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
|
45792 | if (t->sc) { |
148 | // the task already inited, error bitstream | ||
149 | ✗ | return AVERROR_INVALIDDATA; | |
150 | } | ||
151 | 45792 | t->sc = sc; | |
152 | 45792 | t->ep = ep; | |
153 | 45792 | t->ctu_idx = ctu_idx; | |
154 | |||
155 | 45792 | return 0; | |
156 | } | ||
157 | |||
158 | 1407224 | static uint8_t task_add_score(VVCTask *t, const VVCTaskStage stage) | |
159 | { | ||
160 | 1407224 | return atomic_fetch_add(&t->score[stage], 1) + 1; | |
161 | } | ||
162 | |||
163 | 367272 | static uint8_t task_get_score(VVCTask *t, const VVCTaskStage stage) | |
164 | { | ||
165 | 367272 | return atomic_load(&t->score[stage]); | |
166 | } | ||
167 | |||
168 | //first row in tile or slice | ||
169 | 5202 | static int is_first_row(const VVCFrameContext *fc, const int rx, const int ry) | |
170 | { | ||
171 | 5202 | const VVCFrameThread *ft = fc->ft; | |
172 | 5202 | const VVCPPS *pps = fc->ps.pps; | |
173 | |||
174 |
2/2✓ Branch 0 taken 4437 times.
✓ Branch 1 taken 765 times.
|
5202 | if (ry != pps->ctb_to_row_bd[ry]) { |
175 | 4437 | const int rs = ry * ft->ctu_width + rx; | |
176 | 4437 | return fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - ft->ctu_width]; | |
177 | } | ||
178 | 765 | return 1; | |
179 | } | ||
180 | |||
181 | 1774496 | static int task_has_target_score(VVCTask *t, const VVCTaskStage stage, const uint8_t score) | |
182 | { | ||
183 | // l:left, r:right, t: top, b: bottom | ||
184 | static const uint8_t target_score[] = | ||
185 | { | ||
186 | 2, //VVC_TASK_STAGE_DEBLOCK_BS,need l + t parse | ||
187 | 0, //VVC_TASK_STAGE_INTER, not used | ||
188 | 2, //VVC_TASK_STAGE_RECON, need l + rt recon | ||
189 | 3, //VVC_TASK_STAGE_LMCS, need r + b + rb recon | ||
190 | 1, //VVC_TASK_STAGE_DEBLOCK_V, need l deblock v | ||
191 | 2, //VVC_TASK_STAGE_DEBLOCK_H, need r deblock v + t deblock h | ||
192 | 5, //VVC_TASK_STAGE_SAO, need l + r + lb + b + rb deblock h | ||
193 | 8, //VVC_TASK_STAGE_ALF, need sao around the ctu | ||
194 | }; | ||
195 | 1774496 | uint8_t target = 0; | |
196 | 1774496 | VVCFrameContext *fc = t->fc; | |
197 | |||
198 |
2/2✓ Branch 0 taken 936 times.
✓ Branch 1 taken 1773560 times.
|
1774496 | if (stage == VVC_TASK_STAGE_INIT) |
199 | 936 | return 1; | |
200 | |||
201 |
2/2✓ Branch 0 taken 93591 times.
✓ Branch 1 taken 1679969 times.
|
1773560 | if (stage == VVC_TASK_STAGE_PARSE) { |
202 | 93591 | const H266RawSPS *rsps = fc->ps.sps->r; | |
203 |
4/4✓ Branch 0 taken 3978 times.
✓ Branch 1 taken 89613 times.
✓ Branch 3 taken 3213 times.
✓ Branch 4 taken 765 times.
|
93591 | const int wpp = rsps->sps_entropy_coding_sync_enabled_flag && !is_first_row(fc, t->rx, t->ry); |
204 | 93591 | const int no_prev_stage = t->rs > 0; | |
205 | 93591 | target = 2 + wpp - no_prev_stage; //left parse + colocation + wpp - no_prev_stage | |
206 |
2/2✓ Branch 0 taken 123748 times.
✓ Branch 1 taken 1556221 times.
|
1679969 | } else if (stage == VVC_TASK_STAGE_INTER) { |
207 | 123748 | target = atomic_load(&t->target_inter_score); | |
208 | } else { | ||
209 | 1556221 | target = target_score[stage - VVC_TASK_STAGE_DEBLOCK_BS]; | |
210 | } | ||
211 | |||
212 | //+1 for previous stage | ||
213 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1773560 times.
|
1773560 | av_assert0(score <= target + 1); |
214 | 1773560 | return score == target + 1; | |
215 | } | ||
216 | |||
217 | 2015850 | static void frame_thread_add_score(VVCContext *s, VVCFrameThread *ft, | |
218 | const int rx, const int ry, const VVCTaskStage stage) | ||
219 | { | ||
220 | 2015850 | VVCTask *t = ft->tasks + ft->ctu_width * ry + rx; | |
221 | uint8_t score; | ||
222 | |||
223 |
8/8✓ Branch 0 taken 1872210 times.
✓ Branch 1 taken 143640 times.
✓ Branch 2 taken 1740540 times.
✓ Branch 3 taken 131670 times.
✓ Branch 4 taken 1559390 times.
✓ Branch 5 taken 181150 times.
✓ Branch 6 taken 152166 times.
✓ Branch 7 taken 1407224 times.
|
2015850 | if (rx < 0 || rx >= ft->ctu_width || ry < 0 || ry >= ft->ctu_height) |
224 | 608626 | return; | |
225 | |||
226 | 1407224 | score = task_add_score(t, stage); | |
227 |
2/2✓ Branch 1 taken 228253 times.
✓ Branch 2 taken 1178971 times.
|
1407224 | if (task_has_target_score(t, stage, score)) { |
228 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 228253 times.
|
228253 | av_assert0(s); |
229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 228253 times.
|
228253 | av_assert0(stage == t->stage); |
230 | 228253 | add_task(s, t); | |
231 | } | ||
232 | } | ||
233 | |||
234 | 310574 | static void sheduled_done(VVCFrameThread *ft, atomic_int *scheduled) | |
235 | { | ||
236 |
2/2✓ Branch 0 taken 83257 times.
✓ Branch 1 taken 227317 times.
|
310574 | if (atomic_fetch_sub(scheduled, 1) == 1) { |
237 | 83257 | ff_mutex_lock(&ft->lock); | |
238 | 83257 | ff_cond_signal(&ft->cond); | |
239 | 83257 | ff_mutex_unlock(&ft->lock); | |
240 | } | ||
241 | 310574 | } | |
242 | |||
243 | 82321 | static void progress_done(VVCProgressListener *_l, const int type) | |
244 | { | ||
245 | 82321 | const ProgressListener *l = (ProgressListener *)_l; | |
246 | 82321 | const VVCTask *t = l->task; | |
247 | 82321 | VVCFrameThread *ft = t->fc->ft; | |
248 | |||
249 | 82321 | frame_thread_add_score(l->s, ft, t->rx, t->ry, type); | |
250 | 82321 | sheduled_done(ft, &ft->nb_scheduled_listeners); | |
251 | 82321 | } | |
252 | |||
253 | 77956 | static void pixel_done(VVCProgressListener *l) | |
254 | { | ||
255 | 77956 | progress_done(l, VVC_TASK_STAGE_INTER); | |
256 | 77956 | } | |
257 | |||
258 | 4365 | static void mv_done(VVCProgressListener *l) | |
259 | { | ||
260 | 4365 | progress_done(l, VVC_TASK_STAGE_PARSE); | |
261 | 4365 | } | |
262 | |||
263 | 82321 | static void listener_init(ProgressListener *l, VVCTask *t, VVCContext *s, const VVCProgress vp, const int y) | |
264 | { | ||
265 | 82321 | const int is_inter = vp == VVC_PROGRESS_PIXEL; | |
266 | |||
267 | 82321 | l->task = t; | |
268 | 82321 | l->s = s; | |
269 | 82321 | l->l.vp = vp; | |
270 | 82321 | l->l.y = y; | |
271 |
2/2✓ Branch 0 taken 77956 times.
✓ Branch 1 taken 4365 times.
|
82321 | l->l.progress_done = is_inter ? pixel_done : mv_done; |
272 |
2/2✓ Branch 0 taken 77956 times.
✓ Branch 1 taken 4365 times.
|
82321 | if (is_inter) |
273 | 77956 | atomic_fetch_add(&t->target_inter_score, 1); | |
274 | 82321 | } | |
275 | |||
276 | 82321 | static void add_progress_listener(VVCFrame *ref, ProgressListener *l, | |
277 | VVCTask *t, VVCContext *s, const VVCProgress vp, const int y) | ||
278 | { | ||
279 | 82321 | VVCFrameThread *ft = t->fc->ft; | |
280 | |||
281 | 82321 | atomic_fetch_add(&ft->nb_scheduled_listeners, 1); | |
282 | 82321 | listener_init(l, t, s, vp, y); | |
283 | 82321 | ff_vvc_add_progress_listener(ref, (VVCProgressListener*)l); | |
284 | 82321 | } | |
285 | |||
286 | 45792 | static void schedule_next_parse(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, const VVCTask *t) | |
287 | { | ||
288 | 45792 | VVCFrameThread *ft = fc->ft; | |
289 | 45792 | EntryPoint *ep = t->ep; | |
290 | 45792 | const VVCSPS *sps = fc->ps.sps; | |
291 | |||
292 |
2/2✓ Branch 0 taken 1428 times.
✓ Branch 1 taken 44364 times.
|
45792 | if (sps->r->sps_entropy_coding_sync_enabled_flag) { |
293 |
2/2✓ Branch 0 taken 204 times.
✓ Branch 1 taken 1224 times.
|
1428 | if (t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]) { |
294 | 204 | EntryPoint *next = ep + 1; | |
295 |
3/4✓ Branch 0 taken 153 times.
✓ Branch 1 taken 51 times.
✓ Branch 3 taken 153 times.
✗ Branch 4 not taken.
|
204 | if (next < sc->eps + sc->nb_eps && !is_first_row(fc, t->rx, t->ry + 1)) { |
296 | 153 | memcpy(next->cabac_state, ep->cabac_state, sizeof(next->cabac_state)); | |
297 | 153 | ff_vvc_ep_init_stat_coeff(next, sps->bit_depth, sps->r->sps_persistent_rice_adaptation_enabled_flag); | |
298 | } | ||
299 | } | ||
300 |
3/4✓ Branch 0 taken 1071 times.
✓ Branch 1 taken 357 times.
✓ Branch 3 taken 1071 times.
✗ Branch 4 not taken.
|
1428 | if (t->ry + 1 < ft->ctu_height && !is_first_row(fc, t->rx, t->ry + 1)) |
301 | 1071 | frame_thread_add_score(s, ft, t->rx, t->ry + 1, VVC_TASK_STAGE_PARSE); | |
302 | } | ||
303 | |||
304 |
2/2✓ Branch 0 taken 43766 times.
✓ Branch 1 taken 2026 times.
|
45792 | if (t->ctu_idx + 1 < t->ep->ctu_end) { |
305 | 43766 | const int next_rs = sc->sh.ctb_addr_in_curr_slice[t->ctu_idx + 1]; | |
306 | 43766 | const int next_rx = next_rs % ft->ctu_width; | |
307 | 43766 | const int next_ry = next_rs / ft->ctu_width; | |
308 | 43766 | frame_thread_add_score(s, ft, next_rx, next_ry, VVC_TASK_STAGE_PARSE); | |
309 | } | ||
310 | 45792 | } | |
311 | |||
312 | 45792 | static void schedule_inter(VVCContext *s, VVCFrameContext *fc, const SliceContext *sc, VVCTask *t, const int rs) | |
313 | { | ||
314 | 45792 | const VVCSH *sh = &sc->sh; | |
315 | |||
316 |
2/2✓ Branch 0 taken 40006 times.
✓ Branch 1 taken 5786 times.
|
45792 | if (!IS_I(sh->r)) { |
317 | 40006 | CTU *ctu = fc->tab.ctus + rs; | |
318 |
2/2✓ Branch 0 taken 80012 times.
✓ Branch 1 taken 40006 times.
|
120018 | for (int lx = 0; lx < 2; lx++) { |
319 |
2/2✓ Branch 0 taken 156332 times.
✓ Branch 1 taken 80012 times.
|
236344 | for (int i = 0; i < sh->r->num_ref_idx_active[lx]; i++) { |
320 | 156332 | int y = ctu->max_y[lx][i]; | |
321 | 156332 | VVCRefPic *refp = sc->rpl[lx].refs + i; | |
322 | 156332 | VVCFrame *ref = refp->ref; | |
323 |
3/4✓ Branch 0 taken 156332 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 77956 times.
✓ Branch 3 taken 78376 times.
|
156332 | if (ref && y >= 0) { |
324 |
2/2✓ Branch 0 taken 214 times.
✓ Branch 1 taken 77742 times.
|
77956 | if (refp->is_scaled) |
325 | 214 | y = y * refp->scale[1] >> 14; | |
326 | 77956 | add_progress_listener(ref, &t->listener[lx][i], t, s, VVC_PROGRESS_PIXEL, y + LUMA_EXTRA_AFTER); | |
327 | } | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | 45792 | } | |
332 | |||
333 | 45792 | static void parse_task_done(VVCContext *s, VVCFrameContext *fc, const int rx, const int ry) | |
334 | { | ||
335 | 45792 | VVCFrameThread *ft = fc->ft; | |
336 | 45792 | const int rs = ry * ft->ctu_width + rx; | |
337 | 45792 | const int slice_idx = fc->tab.slice_idx[rs]; | |
338 | 45792 | VVCTask *t = ft->tasks + rs; | |
339 | 45792 | const SliceContext *sc = fc->slices[slice_idx]; | |
340 | |||
341 | 45792 | schedule_next_parse(s, fc, sc, t); | |
342 | 45792 | schedule_inter(s, fc, sc, t, rs); | |
343 | 45792 | } | |
344 | |||
345 | 651222 | static void task_stage_done(const VVCTask *t, VVCContext *s) | |
346 | { | ||
347 | 651222 | VVCFrameContext *fc = t->fc; | |
348 | 651222 | VVCFrameThread *ft = fc->ft; | |
349 | 651222 | const VVCTaskStage stage = t->stage; | |
350 | |||
351 | #define ADD(dx, dy, stage) frame_thread_add_score(s, ft, t->rx + (dx), t->ry + (dy), stage) | ||
352 | |||
353 | //this is a reserve map of ready_score, ordered by zigzag | ||
354 |
2/2✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 578968 times.
|
651222 | if (stage == VVC_TASK_STAGE_PARSE) { |
355 | 72254 | ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_BS); | |
356 | 72254 | ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_BS); | |
357 |
8/8✓ Branch 0 taken 66269 times.
✓ Branch 1 taken 5985 times.
✓ Branch 2 taken 60284 times.
✓ Branch 3 taken 5985 times.
✓ Branch 4 taken 53038 times.
✓ Branch 5 taken 7246 times.
✓ Branch 6 taken 7246 times.
✓ Branch 7 taken 45792 times.
|
72254 | if (t->rx < 0 || t->rx >= ft->ctu_width || t->ry < 0 || t->ry >= ft->ctu_height) |
358 | 26462 | return; | |
359 | 45792 | parse_task_done(s, fc, t->rx, t->ry); | |
360 |
2/2✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 506714 times.
|
578968 | } else if (stage == VVC_TASK_STAGE_RECON) { |
361 | 72254 | ADD(-1, 1, VVC_TASK_STAGE_RECON); | |
362 | 72254 | ADD( 1, 0, VVC_TASK_STAGE_RECON); | |
363 | 72254 | ADD(-1, -1, VVC_TASK_STAGE_LMCS); | |
364 | 72254 | ADD( 0, -1, VVC_TASK_STAGE_LMCS); | |
365 | 72254 | ADD(-1, 0, VVC_TASK_STAGE_LMCS); | |
366 |
2/2✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 434460 times.
|
506714 | } else if (stage == VVC_TASK_STAGE_DEBLOCK_V) { |
367 | 72254 | ADD( 1, 0, VVC_TASK_STAGE_DEBLOCK_V); | |
368 | 72254 | ADD(-1, 0, VVC_TASK_STAGE_DEBLOCK_H); | |
369 |
2/2✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 362206 times.
|
434460 | } else if (stage == VVC_TASK_STAGE_DEBLOCK_H) { |
370 | 72254 | ADD( 0, 1, VVC_TASK_STAGE_DEBLOCK_H); | |
371 | 72254 | ADD(-1, -1, VVC_TASK_STAGE_SAO); | |
372 | 72254 | ADD( 0, -1, VVC_TASK_STAGE_SAO); | |
373 | 72254 | ADD(-1, 0, VVC_TASK_STAGE_SAO); | |
374 | 72254 | ADD( 1, -1, VVC_TASK_STAGE_SAO); | |
375 | 72254 | ADD( 1, 0, VVC_TASK_STAGE_SAO); | |
376 |
2/2✓ Branch 0 taken 72254 times.
✓ Branch 1 taken 289952 times.
|
362206 | } else if (stage == VVC_TASK_STAGE_SAO) { |
377 | 72254 | ADD(-1, -1, VVC_TASK_STAGE_ALF); | |
378 | 72254 | ADD( 0, -1, VVC_TASK_STAGE_ALF); | |
379 | 72254 | ADD(-1, 0, VVC_TASK_STAGE_ALF); | |
380 | 72254 | ADD( 1, -1, VVC_TASK_STAGE_ALF); | |
381 | 72254 | ADD(-1, 1, VVC_TASK_STAGE_ALF); | |
382 | 72254 | ADD( 1, 0, VVC_TASK_STAGE_ALF); | |
383 | 72254 | ADD( 0, 1, VVC_TASK_STAGE_ALF); | |
384 | 72254 | ADD( 1, 1, VVC_TASK_STAGE_ALF); | |
385 | } | ||
386 | } | ||
387 | |||
388 | 413064 | static int task_is_stage_ready(VVCTask *t, int add) | |
389 | { | ||
390 | 413064 | const VVCTaskStage stage = t->stage; | |
391 | uint8_t score; | ||
392 |
2/2✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 367272 times.
|
413064 | if (stage > VVC_TASK_STAGE_ALF) |
393 | 45792 | return 0; | |
394 | 367272 | score = task_get_score(t, stage) + add; | |
395 | 367272 | return task_has_target_score(t, stage, score); | |
396 | } | ||
397 | |||
398 | 45792 | static void check_colocation(VVCContext *s, VVCTask *t) | |
399 | { | ||
400 | 45792 | const VVCFrameContext *fc = t->fc; | |
401 | |||
402 |
4/4✓ Branch 0 taken 5890 times.
✓ Branch 1 taken 39902 times.
✓ Branch 2 taken 5310 times.
✓ Branch 3 taken 580 times.
|
45792 | if (fc->ps.ph.r->ph_temporal_mvp_enabled_flag || fc->ps.sps->r->sps_sbtmvp_enabled_flag) { |
403 | 45212 | VVCFrame *col = fc->ref->collocated_ref; | |
404 | 45212 | const int first_col = t->rx == fc->ps.pps->ctb_to_col_bd[t->rx]; | |
405 |
4/4✓ Branch 0 taken 39902 times.
✓ Branch 1 taken 5310 times.
✓ Branch 2 taken 4365 times.
✓ Branch 3 taken 35537 times.
|
45212 | if (col && first_col) { |
406 | //we depend on bottom and right boundary, do not - 1 for y | ||
407 | 4365 | const int y = (t->ry << fc->ps.sps->ctb_log2_size_y); | |
408 | 4365 | add_progress_listener(col, &t->col_listener, t, s, VVC_PROGRESS_MV, y); | |
409 | 4365 | return; | |
410 | } | ||
411 | } | ||
412 | 41427 | frame_thread_add_score(s, fc->ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE); | |
413 | } | ||
414 | |||
415 | 2026 | static void submit_entry_point(VVCContext *s, VVCFrameThread *ft, SliceContext *sc, EntryPoint *ep) | |
416 | { | ||
417 | 2026 | const int rs = sc->sh.ctb_addr_in_curr_slice[ep->ctu_start]; | |
418 | 2026 | VVCTask *t = ft->tasks + rs; | |
419 | |||
420 | 2026 | frame_thread_add_score(s, ft, t->rx, t->ry, VVC_TASK_STAGE_PARSE); | |
421 | 2026 | } | |
422 | |||
423 | 936 | static int run_init(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
424 | { | ||
425 | 936 | VVCFrameContext *fc = lc->fc; | |
426 | 936 | VVCFrameThread *ft = fc->ft; | |
427 | 936 | const int ret = ff_vvc_per_frame_init(fc); | |
428 | |||
429 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
|
936 | if (ret < 0) |
430 | ✗ | return ret; | |
431 | |||
432 |
2/2✓ Branch 0 taken 1659 times.
✓ Branch 1 taken 936 times.
|
2595 | for (int i = 0; i < fc->nb_slices; i++) { |
433 | 1659 | SliceContext *sc = fc->slices[i]; | |
434 |
2/2✓ Branch 0 taken 2026 times.
✓ Branch 1 taken 1659 times.
|
3685 | for (int j = 0; j < sc->nb_eps; j++) { |
435 | 2026 | EntryPoint *ep = sc->eps + j; | |
436 |
2/2✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 2026 times.
|
47818 | for (int k = ep->ctu_start; k < ep->ctu_end; k++) { |
437 | 45792 | const int rs = sc->sh.ctb_addr_in_curr_slice[k]; | |
438 | 45792 | VVCTask *t = ft->tasks + rs; | |
439 | 45792 | check_colocation(s, t); | |
440 | } | ||
441 | 2026 | submit_entry_point(s, ft, sc, ep); | |
442 | } | ||
443 | } | ||
444 | 936 | return 0; | |
445 | } | ||
446 | |||
447 | 91584 | static void report_frame_progress(VVCFrameContext *fc, | |
448 | const int ry, const VVCProgress idx) | ||
449 | { | ||
450 | 91584 | VVCFrameThread *ft = fc->ft; | |
451 | 91584 | const int ctu_size = ft->ctu_size; | |
452 | int old; | ||
453 | |||
454 |
2/2✓ Branch 0 taken 8226 times.
✓ Branch 1 taken 83358 times.
|
91584 | if (atomic_fetch_add(&ft->rows[ry].col_progress[idx], 1) == ft->ctu_width - 1) { |
455 | int y; | ||
456 | 8226 | ff_mutex_lock(&ft->lock); | |
457 | 8226 | y = old = ft->row_progress[idx]; | |
458 |
4/4✓ Branch 0 taken 14580 times.
✓ Branch 1 taken 1872 times.
✓ Branch 2 taken 8226 times.
✓ Branch 3 taken 6354 times.
|
16452 | while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width) |
459 | 8226 | y++; | |
460 |
2/2✓ Branch 0 taken 8153 times.
✓ Branch 1 taken 73 times.
|
8226 | if (old != y) |
461 | 8153 | ft->row_progress[idx] = y; | |
462 | // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock | ||
463 | // We need to unlock ft->lock first | ||
464 | 8226 | ff_mutex_unlock(&ft->lock); | |
465 | |||
466 |
2/2✓ Branch 0 taken 8153 times.
✓ Branch 1 taken 73 times.
|
8226 | if (old != y) { |
467 |
2/2✓ Branch 0 taken 6281 times.
✓ Branch 1 taken 1872 times.
|
8153 | const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size; |
468 | 8153 | ff_vvc_report_progress(fc->ref, idx, progress); | |
469 | } | ||
470 | } | ||
471 | 91584 | } | |
472 | |||
473 | 45792 | static int run_parse(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
474 | { | ||
475 | int ret; | ||
476 | 45792 | VVCFrameContext *fc = lc->fc; | |
477 | 45792 | const int rs = t->rs; | |
478 | 45792 | const CTU *ctu = fc->tab.ctus + rs; | |
479 | |||
480 | 45792 | lc->ep = t->ep; | |
481 | |||
482 | 45792 | ret = ff_vvc_coding_tree_unit(lc, t->ctu_idx, rs, t->rx, t->ry); | |
483 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
|
45792 | if (ret < 0) |
484 | ✗ | return ret; | |
485 | |||
486 |
2/2✓ Branch 0 taken 25273 times.
✓ Branch 1 taken 20519 times.
|
45792 | if (!ctu->has_dmvr) |
487 | 25273 | report_frame_progress(lc->fc, t->ry, VVC_PROGRESS_MV); | |
488 | |||
489 | 45792 | return 0; | |
490 | } | ||
491 | |||
492 | 45792 | static int run_deblock_bs(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
493 | { | ||
494 |
2/2✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
|
45792 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) |
495 | 45788 | ff_vvc_deblock_bs(lc, t->rx, t->ry, t->rs); | |
496 | |||
497 | 45792 | return 0; | |
498 | } | ||
499 | |||
500 | 45792 | static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
501 | { | ||
502 | 45792 | VVCFrameContext *fc = lc->fc; | |
503 | 45792 | const CTU *ctu = fc->tab.ctus + t->rs; | |
504 | int ret; | ||
505 | |||
506 | 45792 | ret = ff_vvc_predict_inter(lc, t->rs); | |
507 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
|
45792 | if (ret < 0) |
508 | ✗ | return ret; | |
509 | |||
510 |
2/2✓ Branch 0 taken 20519 times.
✓ Branch 1 taken 25273 times.
|
45792 | if (ctu->has_dmvr) |
511 | 20519 | report_frame_progress(fc, t->ry, VVC_PROGRESS_MV); | |
512 | |||
513 | 45792 | return 0; | |
514 | } | ||
515 | |||
516 | 45792 | static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
517 | { | ||
518 | 45792 | return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry); | |
519 | } | ||
520 | |||
521 | 45792 | static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
522 | { | ||
523 | 45792 | VVCFrameContext *fc = lc->fc; | |
524 | 45792 | VVCFrameThread *ft = fc->ft; | |
525 | 45792 | const int ctu_size = ft->ctu_size; | |
526 | 45792 | const int x0 = t->rx * ctu_size; | |
527 | 45792 | const int y0 = t->ry * ctu_size; | |
528 | |||
529 | 45792 | ff_vvc_lmcs_filter(lc, x0, y0); | |
530 | |||
531 | 45792 | return 0; | |
532 | } | ||
533 | |||
534 | 45792 | static int run_deblock_v(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
535 | { | ||
536 | 45792 | VVCFrameContext *fc = lc->fc; | |
537 | 45792 | VVCFrameThread *ft = fc->ft; | |
538 | 45792 | const int ctb_size = ft->ctu_size; | |
539 | 45792 | const int x0 = t->rx * ctb_size; | |
540 | 45792 | const int y0 = t->ry * ctb_size; | |
541 | |||
542 |
2/2✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
|
45792 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) { |
543 | 45788 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
544 | 45788 | ff_vvc_deblock_vertical(lc, x0, y0, t->rs); | |
545 | } | ||
546 | |||
547 | 45792 | return 0; | |
548 | } | ||
549 | |||
550 | 45792 | static int run_deblock_h(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
551 | { | ||
552 | 45792 | VVCFrameContext *fc = lc->fc; | |
553 | 45792 | VVCFrameThread *ft = fc->ft; | |
554 | 45792 | const int ctb_size = ft->ctu_size; | |
555 | 45792 | const int x0 = t->rx * ctb_size; | |
556 | 45792 | const int y0 = t->ry * ctb_size; | |
557 | |||
558 |
2/2✓ Branch 0 taken 45788 times.
✓ Branch 1 taken 4 times.
|
45792 | if (!lc->sc->sh.r->sh_deblocking_filter_disabled_flag) { |
559 | 45788 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
560 | 45788 | ff_vvc_deblock_horizontal(lc, x0, y0, t->rs); | |
561 | } | ||
562 |
2/2✓ Branch 0 taken 45372 times.
✓ Branch 1 taken 420 times.
|
45792 | if (fc->ps.sps->r->sps_sao_enabled_flag) |
563 | 45372 | ff_vvc_sao_copy_ctb_to_hv(lc, t->rx, t->ry, t->ry == ft->ctu_height - 1); | |
564 | |||
565 | 45792 | return 0; | |
566 | } | ||
567 | |||
568 | 45792 | static int run_sao(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
569 | { | ||
570 | 45792 | VVCFrameContext *fc = lc->fc; | |
571 | 45792 | VVCFrameThread *ft = fc->ft; | |
572 | 45792 | const int ctb_size = ft->ctu_size; | |
573 | 45792 | const int x0 = t->rx * ctb_size; | |
574 | 45792 | const int y0 = t->ry * ctb_size; | |
575 | |||
576 |
2/2✓ Branch 0 taken 45372 times.
✓ Branch 1 taken 420 times.
|
45792 | if (fc->ps.sps->r->sps_sao_enabled_flag) { |
577 | 45372 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
578 | 45372 | ff_vvc_sao_filter(lc, x0, y0); | |
579 | } | ||
580 | |||
581 |
2/2✓ Branch 0 taken 35754 times.
✓ Branch 1 taken 10038 times.
|
45792 | if (fc->ps.sps->r->sps_alf_enabled_flag) |
582 | 35754 | ff_vvc_alf_copy_ctu_to_hv(lc, x0, y0); | |
583 | |||
584 | 45792 | return 0; | |
585 | } | ||
586 | |||
587 | 45792 | static int run_alf(VVCContext *s, VVCLocalContext *lc, VVCTask *t) | |
588 | { | ||
589 | 45792 | VVCFrameContext *fc = lc->fc; | |
590 | 45792 | VVCFrameThread *ft = fc->ft; | |
591 | 45792 | const int ctu_size = ft->ctu_size; | |
592 | 45792 | const int x0 = t->rx * ctu_size; | |
593 | 45792 | const int y0 = t->ry * ctu_size; | |
594 | |||
595 |
2/2✓ Branch 0 taken 35754 times.
✓ Branch 1 taken 10038 times.
|
45792 | if (fc->ps.sps->r->sps_alf_enabled_flag) { |
596 | 35754 | ff_vvc_decode_neighbour(lc, x0, y0, t->rx, t->ry, t->rs); | |
597 | 35754 | ff_vvc_alf_filter(lc, x0, y0); | |
598 | } | ||
599 | 45792 | report_frame_progress(fc, t->ry, VVC_PROGRESS_PIXEL); | |
600 | |||
601 | 45792 | return 0; | |
602 | } | ||
603 | |||
604 | #define VVC_THREAD_DEBUG | ||
605 | #ifdef VVC_THREAD_DEBUG | ||
606 | const static char* task_name[] = { | ||
607 | "INIT", | ||
608 | "P", | ||
609 | "B", | ||
610 | "I", | ||
611 | "R", | ||
612 | "L", | ||
613 | "V", | ||
614 | "H", | ||
615 | "S", | ||
616 | "A" | ||
617 | }; | ||
618 | #endif | ||
619 | |||
620 | typedef int (*run_func)(VVCContext *s, VVCLocalContext *lc, VVCTask *t); | ||
621 | |||
622 | 413064 | static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc) | |
623 | { | ||
624 | int ret; | ||
625 | 413064 | VVCFrameContext *fc = t->fc; | |
626 | 413064 | VVCFrameThread *ft = fc->ft; | |
627 | 413064 | const VVCTaskStage stage = t->stage; | |
628 | static const run_func run[] = { | ||
629 | run_init, | ||
630 | run_parse, | ||
631 | run_deblock_bs, | ||
632 | run_inter, | ||
633 | run_recon, | ||
634 | run_lmcs, | ||
635 | run_deblock_v, | ||
636 | run_deblock_h, | ||
637 | run_sao, | ||
638 | run_alf, | ||
639 | }; | ||
640 | |||
641 | #ifdef VVC_THREAD_DEBUG | ||
642 | 413064 | av_log(s->avctx, AV_LOG_DEBUG, "frame %5d, %s(%3d, %3d)\r\n", (int)t->fc->decode_order, task_name[stage], t->rx, t->ry); | |
643 | #endif | ||
644 | |||
645 | 413064 | lc->sc = t->sc; | |
646 | |||
647 |
1/2✓ Branch 0 taken 413064 times.
✗ Branch 1 not taken.
|
413064 | if (!atomic_load(&ft->ret)) { |
648 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 413064 times.
|
413064 | if ((ret = run[stage](s, lc, t)) < 0) { |
649 | #ifdef COMPAT_ATOMICS_WIN32_STDATOMIC_H | ||
650 | intptr_t zero = 0; | ||
651 | #else | ||
652 | ✗ | int zero = 0; | |
653 | #endif | ||
654 | ✗ | atomic_compare_exchange_strong(&ft->ret, &zero, ret); | |
655 | ✗ | av_log(s->avctx, AV_LOG_ERROR, | |
656 | "frame %5d, %s(%3d, %3d) failed with %d\r\n", | ||
657 | ✗ | (int)fc->decode_order, task_name[stage], t->rx, t->ry, ret); | |
658 | } | ||
659 | } | ||
660 | |||
661 | 413064 | task_stage_done(t, s); | |
662 | 413064 | return; | |
663 | } | ||
664 | |||
665 | 228253 | static int task_run(FFTask *_t, void *local_context, void *user_data) | |
666 | { | ||
667 | 228253 | VVCTask *t = (VVCTask*)_t; | |
668 | 228253 | VVCContext *s = (VVCContext *)user_data; | |
669 | 228253 | VVCLocalContext *lc = local_context; | |
670 | 228253 | VVCFrameThread *ft = t->fc->ft; | |
671 | |||
672 | 228253 | lc->fc = t->fc; | |
673 | |||
674 | do { | ||
675 | 413064 | task_run_stage(t, s, lc); | |
676 | 413064 | t->stage++; | |
677 |
2/2✓ Branch 1 taken 184811 times.
✓ Branch 2 taken 228253 times.
|
413064 | } while (task_is_stage_ready(t, 1)); |
678 | |||
679 |
2/2✓ Branch 0 taken 182461 times.
✓ Branch 1 taken 45792 times.
|
228253 | if (t->stage != VVC_TASK_STAGE_LAST) |
680 | 182461 | frame_thread_add_score(s, ft, t->rx, t->ry, t->stage); | |
681 | |||
682 | 228253 | sheduled_done(ft, &ft->nb_scheduled_tasks); | |
683 | |||
684 | 228253 | return 0; | |
685 | } | ||
686 | |||
687 | 82 | FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count) | |
688 | { | ||
689 | 82 | FFTaskCallbacks callbacks = { | |
690 | s, | ||
691 | sizeof(VVCLocalContext), | ||
692 | PRIORITY_LOWEST + 1, | ||
693 | task_run, | ||
694 | }; | ||
695 | 82 | return ff_executor_alloc(&callbacks, thread_count); | |
696 | } | ||
697 | |||
698 | 82 | void ff_vvc_executor_free(FFExecutor **e) | |
699 | { | ||
700 | 82 | ff_executor_free(e); | |
701 | 82 | } | |
702 | |||
703 | 902 | void ff_vvc_frame_thread_free(VVCFrameContext *fc) | |
704 | { | ||
705 | 902 | VVCFrameThread *ft = fc->ft; | |
706 | |||
707 |
2/2✓ Branch 0 taken 656 times.
✓ Branch 1 taken 246 times.
|
902 | if (!ft) |
708 | 656 | return; | |
709 | |||
710 | 246 | ff_mutex_destroy(&ft->lock); | |
711 | 246 | ff_cond_destroy(&ft->cond); | |
712 | 246 | av_freep(&ft->rows); | |
713 | 246 | av_freep(&ft->tasks); | |
714 | 246 | av_freep(&ft); | |
715 | } | ||
716 | |||
717 | 936 | static void frame_thread_init_score(VVCFrameContext *fc) | |
718 | { | ||
719 | 936 | const VVCFrameThread *ft = fc->ft; | |
720 | VVCTask task; | ||
721 | |||
722 | 936 | task_init(&task, VVC_TASK_STAGE_PARSE, fc, 0, 0); | |
723 | |||
724 |
2/2✓ Branch 0 taken 8424 times.
✓ Branch 1 taken 936 times.
|
9360 | for (int i = VVC_TASK_STAGE_PARSE; i < VVC_TASK_STAGE_LAST; i++) { |
725 | 8424 | task.stage = i; | |
726 | |||
727 |
2/2✓ Branch 0 taken 82062 times.
✓ Branch 1 taken 8424 times.
|
90486 | for (task.rx = -1; task.rx <= ft->ctu_width; task.rx++) { |
728 | 82062 | task.ry = -1; //top | |
729 | 82062 | task_stage_done(&task, NULL); | |
730 | 82062 | task.ry = ft->ctu_height; //bottom | |
731 | 82062 | task_stage_done(&task, NULL); | |
732 | } | ||
733 | |||
734 |
2/2✓ Branch 0 taken 37017 times.
✓ Branch 1 taken 8424 times.
|
45441 | for (task.ry = 0; task.ry < ft->ctu_height; task.ry++) { |
735 | 37017 | task.rx = -1; //left | |
736 | 37017 | task_stage_done(&task, NULL); | |
737 | 37017 | task.rx = ft->ctu_width; //right | |
738 | 37017 | task_stage_done(&task, NULL); | |
739 | } | ||
740 | } | ||
741 | 936 | } | |
742 | |||
743 | 936 | int ff_vvc_frame_thread_init(VVCFrameContext *fc) | |
744 | { | ||
745 | 936 | const VVCSPS *sps = fc->ps.sps; | |
746 | 936 | const VVCPPS *pps = fc->ps.pps; | |
747 | 936 | VVCFrameThread *ft = fc->ft; | |
748 | int ret; | ||
749 | |||
750 |
3/4✓ Branch 0 taken 690 times.
✓ Branch 1 taken 246 times.
✓ Branch 2 taken 690 times.
✗ Branch 3 not taken.
|
936 | if (!ft || ft->ctu_width != pps->ctb_width || |
751 |
1/2✓ Branch 0 taken 690 times.
✗ Branch 1 not taken.
|
690 | ft->ctu_height != pps->ctb_height || |
752 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 690 times.
|
690 | ft->ctu_size != sps->ctb_size_y) { |
753 | |||
754 | 246 | ff_vvc_frame_thread_free(fc); | |
755 | 246 | ft = av_calloc(1, sizeof(*fc->ft)); | |
756 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
|
246 | if (!ft) |
757 | ✗ | return AVERROR(ENOMEM); | |
758 | |||
759 | 246 | ft->ctu_width = fc->ps.pps->ctb_width; | |
760 | 246 | ft->ctu_height = fc->ps.pps->ctb_height; | |
761 | 246 | ft->ctu_count = fc->ps.pps->ctb_count; | |
762 | 246 | ft->ctu_size = fc->ps.sps->ctb_size_y; | |
763 | |||
764 | 246 | ft->rows = av_calloc(ft->ctu_height, sizeof(*ft->rows)); | |
765 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
|
246 | if (!ft->rows) |
766 | ✗ | goto fail; | |
767 | |||
768 | 246 | ft->tasks = av_malloc(ft->ctu_count * sizeof(*ft->tasks)); | |
769 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 246 times.
|
246 | if (!ft->tasks) |
770 | ✗ | goto fail; | |
771 | |||
772 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 246 times.
|
246 | if ((ret = ff_cond_init(&ft->cond, NULL))) |
773 | ✗ | goto fail; | |
774 | |||
775 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 246 times.
|
246 | if ((ret = ff_mutex_init(&ft->lock, NULL))) { |
776 | ✗ | ff_cond_destroy(&ft->cond); | |
777 | ✗ | goto fail; | |
778 | } | ||
779 | } | ||
780 | 936 | fc->ft = ft; | |
781 | 936 | ft->ret = 0; | |
782 |
2/2✓ Branch 0 taken 4113 times.
✓ Branch 1 taken 936 times.
|
5049 | for (int y = 0; y < ft->ctu_height; y++) { |
783 | 4113 | VVCRowThread *row = ft->rows + y; | |
784 | 4113 | memset(row->col_progress, 0, sizeof(row->col_progress)); | |
785 | } | ||
786 | |||
787 |
2/2✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 936 times.
|
46728 | for (int rs = 0; rs < ft->ctu_count; rs++) { |
788 | 45792 | VVCTask *t = ft->tasks + rs; | |
789 | 45792 | task_init(t, rs ? VVC_TASK_STAGE_PARSE : VVC_TASK_STAGE_INIT, fc, rs % ft->ctu_width, rs / ft->ctu_width); | |
790 | } | ||
791 | |||
792 | 936 | memset(&ft->row_progress[0], 0, sizeof(ft->row_progress)); | |
793 | |||
794 | 936 | frame_thread_init_score(fc); | |
795 | |||
796 | 936 | return 0; | |
797 | |||
798 | ✗ | fail: | |
799 | ✗ | if (ft) { | |
800 | ✗ | av_freep(&ft->rows); | |
801 | ✗ | av_freep(&ft->tasks); | |
802 | ✗ | av_freep(&ft); | |
803 | } | ||
804 | |||
805 | ✗ | return AVERROR(ENOMEM); | |
806 | } | ||
807 | |||
808 | 936 | int ff_vvc_frame_submit(VVCContext *s, VVCFrameContext *fc) | |
809 | { | ||
810 | 936 | VVCFrameThread *ft = fc->ft; | |
811 | |||
812 |
2/2✓ Branch 0 taken 1659 times.
✓ Branch 1 taken 936 times.
|
2595 | for (int i = 0; i < fc->nb_slices; i++) { |
813 | 1659 | SliceContext *sc = fc->slices[i]; | |
814 |
2/2✓ Branch 0 taken 2026 times.
✓ Branch 1 taken 1659 times.
|
3685 | for (int j = 0; j < sc->nb_eps; j++) { |
815 | 2026 | EntryPoint *ep = sc->eps + j; | |
816 |
2/2✓ Branch 0 taken 45792 times.
✓ Branch 1 taken 2026 times.
|
47818 | for (int k = ep->ctu_start; k < ep->ctu_end; k++) { |
817 | 45792 | const int rs = sc->sh.ctb_addr_in_curr_slice[k]; | |
818 | 45792 | VVCTask *t = ft->tasks + rs; | |
819 | 45792 | const int ret = task_init_parse(t, sc, ep, k); | |
820 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 45792 times.
|
45792 | if (ret < 0) |
821 | ✗ | return ret; | |
822 | } | ||
823 | } | ||
824 | } | ||
825 | 936 | frame_thread_add_score(s, ft, 0, 0, VVC_TASK_STAGE_INIT); | |
826 | |||
827 | 936 | return 0; | |
828 | } | ||
829 | |||
830 | 936 | int ff_vvc_frame_wait(VVCContext *s, VVCFrameContext *fc) | |
831 | { | ||
832 | 936 | VVCFrameThread *ft = fc->ft; | |
833 | |||
834 | 936 | ff_mutex_lock(&ft->lock); | |
835 | |||
836 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 936 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 936 times.
|
936 | while (atomic_load(&ft->nb_scheduled_tasks) || atomic_load(&ft->nb_scheduled_listeners)) |
837 | ✗ | ff_cond_wait(&ft->cond, &ft->lock); | |
838 | |||
839 | 936 | ff_mutex_unlock(&ft->lock); | |
840 | 936 | ff_vvc_report_frame_finished(fc->ref); | |
841 | |||
842 | #ifdef VVC_THREAD_DEBUG | ||
843 | 936 | av_log(s->avctx, AV_LOG_DEBUG, "frame %5d done\r\n", (int)fc->decode_order); | |
844 | #endif | ||
845 | 936 | return ft->ret; | |
846 | } | ||
847 |