GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/proresenc_kostya.c Lines: 473 665 71.1 %
Date: 2021-01-20 23:14:43 Branches: 181 304 59.5 %

Line Branch Exec Source
1
/*
2
 * Apple ProRes encoder
3
 *
4
 * Copyright (c) 2012 Konstantin Shishkov
5
 *
6
 * This encoder appears to be based on Anatoliy Wassermans considering
7
 * similarities in the bugs.
8
 *
9
 * This file is part of FFmpeg.
10
 *
11
 * FFmpeg is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2.1 of the License, or (at your option) any later version.
15
 *
16
 * FFmpeg is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with FFmpeg; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
 */
25
26
#include "libavutil/mem_internal.h"
27
#include "libavutil/opt.h"
28
#include "libavutil/pixdesc.h"
29
#include "avcodec.h"
30
#include "fdctdsp.h"
31
#include "put_bits.h"
32
#include "profiles.h"
33
#include "bytestream.h"
34
#include "internal.h"
35
#include "proresdata.h"
36
37
#define CFACTOR_Y422 2
38
#define CFACTOR_Y444 3
39
40
#define MAX_MBS_PER_SLICE 8
41
42
#define MAX_PLANES 4
43
44
enum {
45
    PRORES_PROFILE_AUTO  = -1,
46
    PRORES_PROFILE_PROXY = 0,
47
    PRORES_PROFILE_LT,
48
    PRORES_PROFILE_STANDARD,
49
    PRORES_PROFILE_HQ,
50
    PRORES_PROFILE_4444,
51
    PRORES_PROFILE_4444XQ,
52
};
53
54
enum {
55
    QUANT_MAT_PROXY = 0,
56
    QUANT_MAT_PROXY_CHROMA,
57
    QUANT_MAT_LT,
58
    QUANT_MAT_STANDARD,
59
    QUANT_MAT_HQ,
60
    QUANT_MAT_XQ_LUMA,
61
    QUANT_MAT_DEFAULT,
62
};
63
64
static const uint8_t prores_quant_matrices[][64] = {
65
    { // proxy
66
         4,  7,  9, 11, 13, 14, 15, 63,
67
         7,  7, 11, 12, 14, 15, 63, 63,
68
         9, 11, 13, 14, 15, 63, 63, 63,
69
        11, 11, 13, 14, 63, 63, 63, 63,
70
        11, 13, 14, 63, 63, 63, 63, 63,
71
        13, 14, 63, 63, 63, 63, 63, 63,
72
        13, 63, 63, 63, 63, 63, 63, 63,
73
        63, 63, 63, 63, 63, 63, 63, 63,
74
    },
75
    { // proxy chromas
76
        4,  7,  9, 11, 13, 14, 63, 63,
77
        7,  7, 11, 12, 14, 63, 63, 63,
78
        9, 11, 13, 14, 63, 63, 63, 63,
79
        11, 11, 13, 14, 63, 63, 63, 63,
80
        11, 13, 14, 63, 63, 63, 63, 63,
81
        13, 14, 63, 63, 63, 63, 63, 63,
82
        13, 63, 63, 63, 63, 63, 63, 63,
83
        63, 63, 63, 63, 63, 63, 63, 63
84
    },
85
    { // LT
86
         4,  5,  6,  7,  9, 11, 13, 15,
87
         5,  5,  7,  8, 11, 13, 15, 17,
88
         6,  7,  9, 11, 13, 15, 15, 17,
89
         7,  7,  9, 11, 13, 15, 17, 19,
90
         7,  9, 11, 13, 14, 16, 19, 23,
91
         9, 11, 13, 14, 16, 19, 23, 29,
92
         9, 11, 13, 15, 17, 21, 28, 35,
93
        11, 13, 16, 17, 21, 28, 35, 41,
94
    },
95
    { // standard
96
         4,  4,  5,  5,  6,  7,  7,  9,
97
         4,  4,  5,  6,  7,  7,  9,  9,
98
         5,  5,  6,  7,  7,  9,  9, 10,
99
         5,  5,  6,  7,  7,  9,  9, 10,
100
         5,  6,  7,  7,  8,  9, 10, 12,
101
         6,  7,  7,  8,  9, 10, 12, 15,
102
         6,  7,  7,  9, 10, 11, 14, 17,
103
         7,  7,  9, 10, 11, 14, 17, 21,
104
    },
105
    { // high quality
106
         4,  4,  4,  4,  4,  4,  4,  4,
107
         4,  4,  4,  4,  4,  4,  4,  4,
108
         4,  4,  4,  4,  4,  4,  4,  4,
109
         4,  4,  4,  4,  4,  4,  4,  5,
110
         4,  4,  4,  4,  4,  4,  5,  5,
111
         4,  4,  4,  4,  4,  5,  5,  6,
112
         4,  4,  4,  4,  5,  5,  6,  7,
113
         4,  4,  4,  4,  5,  6,  7,  7,
114
    },
115
    { // XQ luma
116
        2,  2,  2,  2,  2,  2,  2,  2,
117
        2,  2,  2,  2,  2,  2,  2,  2,
118
        2,  2,  2,  2,  2,  2,  2,  2,
119
        2,  2,  2,  2,  2,  2,  2,  3,
120
        2,  2,  2,  2,  2,  2,  3,  3,
121
        2,  2,  2,  2,  2,  3,  3,  3,
122
        2,  2,  2,  2,  3,  3,  3,  4,
123
        2,  2,  2,  2,  3,  3,  4,  4,
124
    },
125
    { // codec default
126
         4,  4,  4,  4,  4,  4,  4,  4,
127
         4,  4,  4,  4,  4,  4,  4,  4,
128
         4,  4,  4,  4,  4,  4,  4,  4,
129
         4,  4,  4,  4,  4,  4,  4,  4,
130
         4,  4,  4,  4,  4,  4,  4,  4,
131
         4,  4,  4,  4,  4,  4,  4,  4,
132
         4,  4,  4,  4,  4,  4,  4,  4,
133
         4,  4,  4,  4,  4,  4,  4,  4,
134
    },
135
};
136
137
#define NUM_MB_LIMITS 4
138
static const int prores_mb_limits[NUM_MB_LIMITS] = {
139
    1620, // up to 720x576
140
    2700, // up to 960x720
141
    6075, // up to 1440x1080
142
    9216, // up to 2048x1152
143
};
144
145
static const struct prores_profile {
146
    const char *full_name;
147
    uint32_t    tag;
148
    int         min_quant;
149
    int         max_quant;
150
    int         br_tab[NUM_MB_LIMITS];
151
    int         quant;
152
    int         quant_chroma;
153
} prores_profile_info[6] = {
154
    {
155
        .full_name = "proxy",
156
        .tag       = MKTAG('a', 'p', 'c', 'o'),
157
        .min_quant = 4,
158
        .max_quant = 8,
159
        .br_tab    = { 300, 242, 220, 194 },
160
        .quant     = QUANT_MAT_PROXY,
161
        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
162
    },
163
    {
164
        .full_name = "LT",
165
        .tag       = MKTAG('a', 'p', 'c', 's'),
166
        .min_quant = 1,
167
        .max_quant = 9,
168
        .br_tab    = { 720, 560, 490, 440 },
169
        .quant     = QUANT_MAT_LT,
170
        .quant_chroma = QUANT_MAT_LT,
171
    },
172
    {
173
        .full_name = "standard",
174
        .tag       = MKTAG('a', 'p', 'c', 'n'),
175
        .min_quant = 1,
176
        .max_quant = 6,
177
        .br_tab    = { 1050, 808, 710, 632 },
178
        .quant     = QUANT_MAT_STANDARD,
179
        .quant_chroma = QUANT_MAT_STANDARD,
180
    },
181
    {
182
        .full_name = "high quality",
183
        .tag       = MKTAG('a', 'p', 'c', 'h'),
184
        .min_quant = 1,
185
        .max_quant = 6,
186
        .br_tab    = { 1566, 1216, 1070, 950 },
187
        .quant     = QUANT_MAT_HQ,
188
        .quant_chroma = QUANT_MAT_HQ,
189
    },
190
    {
191
        .full_name = "4444",
192
        .tag       = MKTAG('a', 'p', '4', 'h'),
193
        .min_quant = 1,
194
        .max_quant = 6,
195
        .br_tab    = { 2350, 1828, 1600, 1425 },
196
        .quant     = QUANT_MAT_HQ,
197
        .quant_chroma = QUANT_MAT_HQ,
198
    },
199
    {
200
        .full_name = "4444XQ",
201
        .tag       = MKTAG('a', 'p', '4', 'x'),
202
        .min_quant = 1,
203
        .max_quant = 6,
204
        .br_tab    = { 3525, 2742, 2400, 2137 },
205
        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
206
        .quant_chroma = QUANT_MAT_HQ,
207
    }
208
};
209
210
#define TRELLIS_WIDTH 16
211
#define SCORE_LIMIT   INT_MAX / 2
212
213
struct TrellisNode {
214
    int prev_node;
215
    int quant;
216
    int bits;
217
    int score;
218
};
219
220
#define MAX_STORED_Q 16
221
222
typedef struct ProresThreadData {
223
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
224
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
225
    int16_t custom_q[64];
226
    int16_t custom_chroma_q[64];
227
    struct TrellisNode *nodes;
228
} ProresThreadData;
229
230
typedef struct ProresContext {
231
    AVClass *class;
232
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
233
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
234
    int16_t quants[MAX_STORED_Q][64];
235
    int16_t quants_chroma[MAX_STORED_Q][64];
236
    int16_t custom_q[64];
237
    int16_t custom_chroma_q[64];
238
    const uint8_t *quant_mat;
239
    const uint8_t *quant_chroma_mat;
240
    const uint8_t *scantable;
241
242
    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
243
                 ptrdiff_t linesize, int16_t *block);
244
    FDCTDSPContext fdsp;
245
246
    const AVFrame *pic;
247
    int mb_width, mb_height;
248
    int mbs_per_slice;
249
    int num_chroma_blocks, chroma_factor;
250
    int slices_width;
251
    int slices_per_picture;
252
    int pictures_per_frame; // 1 for progressive, 2 for interlaced
253
    int cur_picture_idx;
254
    int num_planes;
255
    int bits_per_mb;
256
    int force_quant;
257
    int alpha_bits;
258
    int warn;
259
260
    char *vendor;
261
    int quant_sel;
262
263
    int frame_size_upper_bound;
264
265
    int profile;
266
    const struct prores_profile *profile_info;
267
268
    int *slice_q;
269
270
    ProresThreadData *tdata;
271
} ProresContext;
272
273
66600
static void get_slice_data(ProresContext *ctx, const uint16_t *src,
274
                           ptrdiff_t linesize, int x, int y, int w, int h,
275
                           int16_t *blocks, uint16_t *emu_buf,
276
                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
277
{
278
    const uint16_t *esrc;
279
66600
    const int mb_width = 4 * blocks_per_mb;
280
    ptrdiff_t elinesize;
281
    int i, j, k;
282
283
425700
    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
284
359100
        if (x >= w) {
285
            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
286
                              * sizeof(*blocks));
287
            return;
288
        }
289

359100
        if (x + mb_width <= w && y + 16 <= h) {
290
357600
            esrc      = src;
291
357600
            elinesize = linesize;
292
        } else {
293
            int bw, bh, pix;
294
295
1500
            esrc      = emu_buf;
296
1500
            elinesize = 16 * sizeof(*emu_buf);
297
298
1500
            bw = FFMIN(w - x, mb_width);
299
1500
            bh = FFMIN(h - y, 16);
300
301
12900
            for (j = 0; j < bh; j++) {
302
11400
                memcpy(emu_buf + j * 16,
303
11400
                       (const uint8_t*)src + j * linesize,
304
                       bw * sizeof(*src));
305
11400
                pix = emu_buf[j * 16 + bw - 1];
306
106600
                for (k = bw; k < mb_width; k++)
307
95200
                    emu_buf[j * 16 + k] = pix;
308
            }
309
14100
            for (; j < 16; j++)
310
12600
                memcpy(emu_buf + j * 16,
311
12600
                       emu_buf + (bh - 1) * 16,
312
                       mb_width * sizeof(*emu_buf));
313
        }
314
359100
        if (!is_chroma) {
315
119700
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
316
119700
            blocks += 64;
317
119700
            if (blocks_per_mb > 2) {
318
119700
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
319
119700
                blocks += 64;
320
            }
321
119700
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
322
119700
            blocks += 64;
323
119700
            if (blocks_per_mb > 2) {
324
119700
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
325
119700
                blocks += 64;
326
            }
327
        } else {
328
239400
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
329
239400
            blocks += 64;
330
239400
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
331
239400
            blocks += 64;
332
239400
            if (blocks_per_mb > 2) {
333
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
334
                blocks += 64;
335
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
336
                blocks += 64;
337
            }
338
        }
339
340
359100
        x += mb_width;
341
    }
342
}
343
344
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
345
                           ptrdiff_t linesize, int x, int y, int w, int h,
346
                           int16_t *blocks, int mbs_per_slice, int abits)
347
{
348
    const int slice_width = 16 * mbs_per_slice;
349
    int i, j, copy_w, copy_h;
350
351
    copy_w = FFMIN(w - x, slice_width);
352
    copy_h = FFMIN(h - y, 16);
353
    for (i = 0; i < copy_h; i++) {
354
        memcpy(blocks, src, copy_w * sizeof(*src));
355
        if (abits == 8)
356
            for (j = 0; j < copy_w; j++)
357
                blocks[j] >>= 2;
358
        else
359
            for (j = 0; j < copy_w; j++)
360
                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
361
        for (j = copy_w; j < slice_width; j++)
362
            blocks[j] = blocks[copy_w - 1];
363
        blocks += slice_width;
364
        src    += linesize >> 1;
365
    }
366
    for (; i < 16; i++) {
367
        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
368
        blocks += slice_width;
369
    }
370
}
371
372
/**
373
 * Write an unsigned rice/exp golomb codeword.
374
 */
375
26885174
static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
376
{
377
    unsigned int rice_order, exp_order, switch_bits, switch_val;
378
    int exponent;
379
380
    /* number of prefix bits to switch between Rice and expGolomb */
381
26885174
    switch_bits = (codebook & 3) + 1;
382
26885174
    rice_order  =  codebook >> 5;       /* rice code order */
383
26885174
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
384
385
26885174
    switch_val  = switch_bits << rice_order;
386
387
26885174
    if (val >= switch_val) {
388
8074309
        val -= switch_val - (1 << exp_order);
389
8074309
        exponent = av_log2(val);
390
391
8074309
        put_bits(pb, exponent - exp_order + switch_bits, 0);
392
8074309
        put_bits(pb, exponent + 1, val);
393
    } else {
394
18810865
        exponent = val >> rice_order;
395
396
18810865
        if (exponent)
397
4838901
            put_bits(pb, exponent, 0);
398
18810865
        put_bits(pb, 1, 1);
399
18810865
        if (rice_order)
400
1904515
            put_sbits(pb, rice_order, val);
401
    }
402
26885174
}
403
404
#define GET_SIGN(x)  ((x) >> 31)
405
#define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
406
407
33300
static void encode_dcs(PutBitContext *pb, int16_t *blocks,
408
                       int blocks_per_slice, int scale)
409
{
410
    int i;
411
33300
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
412
413
33300
    prev_dc = (blocks[0] - 0x4000) / scale;
414
33300
    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
415
33300
    sign     = 0;
416
33300
    codebook = 3;
417
33300
    blocks  += 64;
418
419
478800
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
420
445500
        dc       = (blocks[0] - 0x4000) / scale;
421
445500
        delta    = dc - prev_dc;
422
445500
        new_sign = GET_SIGN(delta);
423
445500
        delta    = (delta ^ sign) - sign;
424
445500
        code     = MAKE_CODE(delta);
425
445500
        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
426
445500
        codebook = (code + (code & 1)) >> 1;
427
445500
        codebook = FFMIN(codebook, 3);
428
445500
        sign     = new_sign;
429
445500
        prev_dc  = dc;
430
    }
431
33300
}
432
433
33300
static void encode_acs(PutBitContext *pb, int16_t *blocks,
434
                       int blocks_per_slice,
435
                       int plane_size_factor,
436
                       const uint8_t *scan, const int16_t *qmat)
437
{
438
    int idx, i;
439
    int run, level, run_cb, lev_cb;
440
    int max_coeffs, abs_level;
441
442
33300
    max_coeffs = blocks_per_slice << 6;
443
33300
    run_cb     = ff_prores_run_to_cb_index[4];
444
33300
    lev_cb     = ff_prores_lev_to_cb_index[2];
445
33300
    run        = 0;
446
447
2131200
    for (i = 1; i < 64; i++) {
448
32262300
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
449
30164400
            level = blocks[idx] / qmat[scan[i]];
450
30164400
            if (level) {
451
13203187
                abs_level = FFABS(level);
452
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
453
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
454
                                    abs_level - 1);
455
13203187
                put_sbits(pb, 1, GET_SIGN(level));
456
457
13203187
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
458
13203187
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
459
13203187
                run    = 0;
460
            } else {
461
16961213
                run++;
462
            }
463
        }
464
    }
465
33300
}
466
467
33300
static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
468
                              const uint16_t *src, ptrdiff_t linesize,
469
                              int mbs_per_slice, int16_t *blocks,
470
                              int blocks_per_mb, int plane_size_factor,
471
                              const int16_t *qmat)
472
{
473
    int blocks_per_slice, saved_pos;
474
475
33300
    saved_pos = put_bits_count(pb);
476
33300
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
477
478
33300
    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
479
33300
    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
480
               ctx->scantable, qmat);
481
33300
    flush_put_bits(pb);
482
483
33300
    return (put_bits_count(pb) - saved_pos) >> 3;
484
}
485
486
static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
487
{
488
    const int dbits = (abits == 8) ? 4 : 7;
489
    const int dsize = 1 << dbits - 1;
490
    int diff = cur - prev;
491
492
    diff = av_mod_uintp2(diff, abits);
493
    if (diff >= (1 << abits) - dsize)
494
        diff -= 1 << abits;
495
    if (diff < -dsize || diff > dsize || !diff) {
496
        put_bits(pb, 1, 1);
497
        put_bits(pb, abits, diff);
498
    } else {
499
        put_bits(pb, 1, 0);
500
        put_bits(pb, dbits - 1, FFABS(diff) - 1);
501
        put_bits(pb, 1, diff < 0);
502
    }
503
}
504
505
static void put_alpha_run(PutBitContext *pb, int run)
506
{
507
    if (run) {
508
        put_bits(pb, 1, 0);
509
        if (run < 0x10)
510
            put_bits(pb, 4, run);
511
        else
512
            put_bits(pb, 15, run);
513
    } else {
514
        put_bits(pb, 1, 1);
515
    }
516
}
517
518
// todo alpha quantisation for high quants
519
static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
520
                              int mbs_per_slice, uint16_t *blocks,
521
                              int quant)
522
{
523
    const int abits = ctx->alpha_bits;
524
    const int mask  = (1 << abits) - 1;
525
    const int num_coeffs = mbs_per_slice * 256;
526
    int saved_pos = put_bits_count(pb);
527
    int prev = mask, cur;
528
    int idx = 0;
529
    int run = 0;
530
531
    cur = blocks[idx++];
532
    put_alpha_diff(pb, cur, prev, abits);
533
    prev = cur;
534
    do {
535
        cur = blocks[idx++];
536
        if (cur != prev) {
537
            put_alpha_run (pb, run);
538
            put_alpha_diff(pb, cur, prev, abits);
539
            prev = cur;
540
            run  = 0;
541
        } else {
542
            run++;
543
        }
544
    } while (idx < num_coeffs);
545
    if (run)
546
        put_alpha_run(pb, run);
547
    flush_put_bits(pb);
548
    return (put_bits_count(pb) - saved_pos) >> 3;
549
}
550
551
11100
static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
552
                        PutBitContext *pb,
553
                        int sizes[4], int x, int y, int quant,
554
                        int mbs_per_slice)
555
{
556
11100
    ProresContext *ctx = avctx->priv_data;
557
    int i, xp, yp;
558
11100
    int total_size = 0;
559
    const uint16_t *src;
560
11100
    int slice_width_factor = av_log2(mbs_per_slice);
561
    int num_cblocks, pwidth, line_add;
562
    ptrdiff_t linesize;
563
    int plane_factor, is_chroma;
564
    uint16_t *qmat;
565
    uint16_t *qmat_chroma;
566
567
11100
    if (ctx->pictures_per_frame == 1)
568
11100
        line_add = 0;
569
    else
570
        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
571
572
11100
    if (ctx->force_quant) {
573
        qmat = ctx->quants[0];
574
        qmat_chroma = ctx->quants_chroma[0];
575
11100
    } else if (quant < MAX_STORED_Q) {
576
8353
        qmat = ctx->quants[quant];
577
8353
        qmat_chroma = ctx->quants_chroma[quant];
578
    } else {
579
2747
        qmat = ctx->custom_q;
580
2747
        qmat_chroma = ctx->custom_chroma_q;
581
178555
        for (i = 0; i < 64; i++) {
582
175808
            qmat[i] = ctx->quant_mat[i] * quant;
583
175808
            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
584
        }
585
    }
586
587
44400
    for (i = 0; i < ctx->num_planes; i++) {
588

33300
        is_chroma    = (i == 1 || i == 2);
589
33300
        plane_factor = slice_width_factor + 2;
590
33300
        if (is_chroma)
591
22200
            plane_factor += ctx->chroma_factor - 3;
592

33300
        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
593
11100
            xp          = x << 4;
594
11100
            yp          = y << 4;
595
11100
            num_cblocks = 4;
596
11100
            pwidth      = avctx->width;
597
        } else {
598
22200
            xp          = x << 3;
599
22200
            yp          = y << 4;
600
22200
            num_cblocks = 2;
601
22200
            pwidth      = avctx->width >> 1;
602
        }
603
604
33300
        linesize = pic->linesize[i] * ctx->pictures_per_frame;
605
33300
        src = (const uint16_t*)(pic->data[i] + yp * linesize +
606
33300
                                line_add * pic->linesize[i]) + xp;
607
608
33300
        if (i < 3) {
609
33300
            get_slice_data(ctx, src, linesize, xp, yp,
610
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
611
33300
                           ctx->blocks[0], ctx->emu_buf,
612
                           mbs_per_slice, num_cblocks, is_chroma);
613
33300
            if (!is_chroma) {/* luma quant */
614
11100
                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
615
11100
                                              mbs_per_slice, ctx->blocks[0],
616
                                              num_cblocks, plane_factor,
617
                                              qmat);
618
            } else { /* chroma plane */
619
22200
                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
620
22200
                                              mbs_per_slice, ctx->blocks[0],
621
                                              num_cblocks, plane_factor,
622
                                              qmat_chroma);
623
            }
624
        } else {
625
            get_alpha_data(ctx, src, linesize, xp, yp,
626
                           pwidth, avctx->height / ctx->pictures_per_frame,
627
                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
628
            sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
629
                                          ctx->blocks[0], quant);
630
        }
631
33300
        total_size += sizes[i];
632
33300
        if (put_bits_left(pb) < 0) {
633
            av_log(avctx, AV_LOG_ERROR,
634
                   "Underestimated required buffer size.\n");
635
            return AVERROR_BUG;
636
        }
637
    }
638
11100
    return total_size;
639
}
640
641
390320122
static inline int estimate_vlc(unsigned codebook, int val)
642
{
643
    unsigned int rice_order, exp_order, switch_bits, switch_val;
644
    int exponent;
645
646
    /* number of prefix bits to switch between Rice and expGolomb */
647
390320122
    switch_bits = (codebook & 3) + 1;
648
390320122
    rice_order  =  codebook >> 5;       /* rice code order */
649
390320122
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
650
651
390320122
    switch_val  = switch_bits << rice_order;
652
653
390320122
    if (val >= switch_val) {
654
135366530
        val -= switch_val - (1 << exp_order);
655
135366530
        exponent = av_log2(val);
656
657
135366530
        return exponent * 2 - exp_order + switch_bits + 1;
658
    } else {
659
254953592
        return (val >> rice_order) + rice_order + 1;
660
    }
661
}
662
663
379686
static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
664
                        int scale)
665
{
666
    int i;
667
379686
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
668
    int bits;
669
670
379686
    prev_dc  = (blocks[0] - 0x4000) / scale;
671
379686
    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
672
379686
    sign     = 0;
673
379686
    codebook = 3;
674
379686
    blocks  += 64;
675
379686
    *error  += FFABS(blocks[0] - 0x4000) % scale;
676
677
5449672
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
678
5069986
        dc       = (blocks[0] - 0x4000) / scale;
679
5069986
        *error  += FFABS(blocks[0] - 0x4000) % scale;
680
5069986
        delta    = dc - prev_dc;
681
5069986
        new_sign = GET_SIGN(delta);
682
5069986
        delta    = (delta ^ sign) - sign;
683
5069986
        code     = MAKE_CODE(delta);
684
5069986
        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
685
5069986
        codebook = (code + (code & 1)) >> 1;
686
5069986
        codebook = FFMIN(codebook, 3);
687
5069986
        sign     = new_sign;
688
5069986
        prev_dc  = dc;
689
    }
690
691
379686
    return bits;
692
}
693
694
379686
static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
695
                        int plane_size_factor,
696
                        const uint8_t *scan, const int16_t *qmat)
697
{
698
    int idx, i;
699
    int run, level, run_cb, lev_cb;
700
    int max_coeffs, abs_level;
701
379686
    int bits = 0;
702
703
379686
    max_coeffs = blocks_per_slice << 6;
704
379686
    run_cb     = ff_prores_run_to_cb_index[4];
705
379686
    lev_cb     = ff_prores_lev_to_cb_index[2];
706
379686
    run        = 0;
707
708
24299904
    for (i = 1; i < 64; i++) {
709
367249554
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
710
343329336
            level   = blocks[idx] / qmat[scan[i]];
711
343329336
            *error += FFABS(blocks[idx]) % qmat[scan[i]];
712
343329336
            if (level) {
713
192435225
                abs_level = FFABS(level);
714
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
715
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
716
192435225
                                     abs_level - 1) + 1;
717
718
192435225
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
719
192435225
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
720
192435225
                run    = 0;
721
            } else {
722
150894111
                run++;
723
            }
724
        }
725
    }
726
727
379686
    return bits;
728
}
729
730
379686
static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
731
                                const uint16_t *src, ptrdiff_t linesize,
732
                                int mbs_per_slice,
733
                                int blocks_per_mb, int plane_size_factor,
734
                                const int16_t *qmat, ProresThreadData *td)
735
{
736
    int blocks_per_slice;
737
    int bits;
738
739
379686
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
740
741
379686
    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
742
379686
    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
743
                         plane_size_factor, ctx->scantable, qmat);
744
745
379686
    return FFALIGN(bits, 8);
746
}
747
748
static int est_alpha_diff(int cur, int prev, int abits)
749
{
750
    const int dbits = (abits == 8) ? 4 : 7;
751
    const int dsize = 1 << dbits - 1;
752
    int diff = cur - prev;
753
754
    diff = av_mod_uintp2(diff, abits);
755
    if (diff >= (1 << abits) - dsize)
756
        diff -= 1 << abits;
757
    if (diff < -dsize || diff > dsize || !diff)
758
        return abits + 1;
759
    else
760
        return dbits + 1;
761
}
762
763
static int estimate_alpha_plane(ProresContext *ctx,
764
                                const uint16_t *src, ptrdiff_t linesize,
765
                                int mbs_per_slice, int16_t *blocks)
766
{
767
    const int abits = ctx->alpha_bits;
768
    const int mask  = (1 << abits) - 1;
769
    const int num_coeffs = mbs_per_slice * 256;
770
    int prev = mask, cur;
771
    int idx = 0;
772
    int run = 0;
773
    int bits;
774
775
    cur = blocks[idx++];
776
    bits = est_alpha_diff(cur, prev, abits);
777
    prev = cur;
778
    do {
779
        cur = blocks[idx++];
780
        if (cur != prev) {
781
            if (!run)
782
                bits++;
783
            else if (run < 0x10)
784
                bits += 4;
785
            else
786
                bits += 15;
787
            bits += est_alpha_diff(cur, prev, abits);
788
            prev = cur;
789
            run  = 0;
790
        } else {
791
            run++;
792
        }
793
    } while (idx < num_coeffs);
794
795
    if (run) {
796
        if (run < 0x10)
797
            bits += 4;
798
        else
799
            bits += 15;
800
    }
801
802
    return bits;
803
}
804
805
11100
static int find_slice_quant(AVCodecContext *avctx,
806
                            int trellis_node, int x, int y, int mbs_per_slice,
807
                            ProresThreadData *td)
808
{
809
11100
    ProresContext *ctx = avctx->priv_data;
810
    int i, q, pq, xp, yp;
811
    const uint16_t *src;
812
11100
    int slice_width_factor = av_log2(mbs_per_slice);
813
    int num_cblocks[MAX_PLANES], pwidth;
814
    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
815
11100
    const int min_quant = ctx->profile_info->min_quant;
816
11100
    const int max_quant = ctx->profile_info->max_quant;
817
    int error, bits, bits_limit;
818
    int mbs, prev, cur, new_score;
819
    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
820
    int overquant;
821
    uint16_t *qmat;
822
    uint16_t *qmat_chroma;
823
    int linesize[4], line_add;
824
11100
    int alpha_bits = 0;
825
826
11100
    if (ctx->pictures_per_frame == 1)
827
11100
        line_add = 0;
828
    else
829
        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
830
11100
    mbs = x + mbs_per_slice;
831
832
44400
    for (i = 0; i < ctx->num_planes; i++) {
833

33300
        is_chroma[i]    = (i == 1 || i == 2);
834
33300
        plane_factor[i] = slice_width_factor + 2;
835
33300
        if (is_chroma[i])
836
22200
            plane_factor[i] += ctx->chroma_factor - 3;
837

33300
        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
838
11100
            xp             = x << 4;
839
11100
            yp             = y << 4;
840
11100
            num_cblocks[i] = 4;
841
11100
            pwidth         = avctx->width;
842
        } else {
843
22200
            xp             = x << 3;
844
22200
            yp             = y << 4;
845
22200
            num_cblocks[i] = 2;
846
22200
            pwidth         = avctx->width >> 1;
847
        }
848
849
33300
        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
850
33300
        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
851
33300
                                 line_add * ctx->pic->linesize[i]) + xp;
852
853
33300
        if (i < 3) {
854
33300
            get_slice_data(ctx, src, linesize[i], xp, yp,
855
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
856
33300
                           td->blocks[i], td->emu_buf,
857
                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
858
        } else {
859
            get_alpha_data(ctx, src, linesize[i], xp, yp,
860
                           pwidth, avctx->height / ctx->pictures_per_frame,
861
                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
862
        }
863
    }
864
865
88800
    for (q = min_quant; q < max_quant + 2; q++) {
866
77700
        td->nodes[trellis_node + q].prev_node = -1;
867
77700
        td->nodes[trellis_node + q].quant     = q;
868
    }
869
870
11100
    if (ctx->alpha_bits)
871
        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
872
                                          mbs_per_slice, td->blocks[3]);
873
    // todo: maybe perform coarser quantising to fit into frame size when needed
874
77700
    for (q = min_quant; q <= max_quant; q++) {
875
66600
        bits  = alpha_bits;
876
66600
        error = 0;
877
133200
        bits += estimate_slice_plane(ctx, &error, 0,
878
66600
                                     src, linesize[0],
879
                                     mbs_per_slice,
880
                                     num_cblocks[0], plane_factor[0],
881
66600
                                     ctx->quants[q], td); /* estimate luma plane */
882
199800
        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
883
133200
            bits += estimate_slice_plane(ctx, &error, i,
884
133200
                                         src, linesize[i],
885
                                         mbs_per_slice,
886
                                         num_cblocks[i], plane_factor[i],
887
133200
                                         ctx->quants_chroma[q], td);
888
        }
889
66600
        if (bits > 65000 * 8)
890
            error = SCORE_LIMIT;
891
892
66600
        slice_bits[q]  = bits;
893
66600
        slice_score[q] = error;
894
    }
895
11100
    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
896
3643
        slice_bits[max_quant + 1]  = slice_bits[max_quant];
897
3643
        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
898
3643
        overquant = max_quant;
899
    } else {
900
59962
        for (q = max_quant + 1; q < 128; q++) {
901
59962
            bits  = alpha_bits;
902
59962
            error = 0;
903
59962
            if (q < MAX_STORED_Q) {
904
44939
                qmat = ctx->quants[q];
905
44939
                qmat_chroma = ctx->quants_chroma[q];
906
            } else {
907
15023
                qmat = td->custom_q;
908
15023
                qmat_chroma = td->custom_chroma_q;
909
976495
                for (i = 0; i < 64; i++) {
910
961472
                    qmat[i] = ctx->quant_mat[i] * q;
911
961472
                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
912
                }
913
            }
914
119924
            bits += estimate_slice_plane(ctx, &error, 0,
915
59962
                                         src, linesize[0],
916
                                         mbs_per_slice,
917
                                         num_cblocks[0], plane_factor[0],
918
                                         qmat, td);/* estimate luma plane */
919
179886
            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
920
119924
                bits += estimate_slice_plane(ctx, &error, i,
921
119924
                                             src, linesize[i],
922
                                             mbs_per_slice,
923
                                             num_cblocks[i], plane_factor[i],
924
                                             qmat_chroma, td);
925
            }
926
59962
            if (bits <= ctx->bits_per_mb * mbs_per_slice)
927
7457
                break;
928
        }
929
930
7457
        slice_bits[max_quant + 1]  = bits;
931
7457
        slice_score[max_quant + 1] = error;
932
7457
        overquant = q;
933
    }
934
11100
    td->nodes[trellis_node + max_quant + 1].quant = overquant;
935
936
11100
    bits_limit = mbs * ctx->bits_per_mb;
937
88800
    for (pq = min_quant; pq < max_quant + 2; pq++) {
938
77700
        prev = trellis_node - TRELLIS_WIDTH + pq;
939
940
621600
        for (q = min_quant; q < max_quant + 2; q++) {
941
543900
            cur = trellis_node + q;
942
943
543900
            bits  = td->nodes[prev].bits + slice_bits[q];
944
543900
            error = slice_score[q];
945
543900
            if (bits > bits_limit)
946
438632
                error = SCORE_LIMIT;
947
948

543900
            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
949
98480
                new_score = td->nodes[prev].score + error;
950
            else
951
445420
                new_score = SCORE_LIMIT;
952
543900
            if (td->nodes[cur].prev_node == -1 ||
953
466200
                td->nodes[cur].score >= new_score) {
954
955
505564
                td->nodes[cur].bits      = bits;
956
505564
                td->nodes[cur].score     = new_score;
957
505564
                td->nodes[cur].prev_node = prev;
958
            }
959
        }
960
    }
961
962
11100
    error = td->nodes[trellis_node + min_quant].score;
963
11100
    pq    = trellis_node + min_quant;
964
77700
    for (q = min_quant + 1; q < max_quant + 2; q++) {
965
66600
        if (td->nodes[trellis_node + q].score <= error) {
966
52033
            error = td->nodes[trellis_node + q].score;
967
52033
            pq    = trellis_node + q;
968
        }
969
    }
970
971
11100
    return pq;
972
}
973
974
2850
static int find_quant_thread(AVCodecContext *avctx, void *arg,
975
                             int jobnr, int threadnr)
976
{
977
2850
    ProresContext *ctx = avctx->priv_data;
978
2850
    ProresThreadData *td = ctx->tdata + threadnr;
979
2850
    int mbs_per_slice = ctx->mbs_per_slice;
980
2850
    int x, y = jobnr, mb, q = 0;
981
982
13950
    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
983
16950
        while (ctx->mb_width - x < mbs_per_slice)
984
5850
            mbs_per_slice >>= 1;
985
11100
        q = find_slice_quant(avctx,
986
11100
                             (mb + 1) * TRELLIS_WIDTH, x, y,
987
                             mbs_per_slice, td);
988
    }
989
990
13950
    for (x = ctx->slices_width - 1; x >= 0; x--) {
991
11100
        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
992
11100
        q = td->nodes[q].prev_node;
993
    }
994
995
2850
    return 0;
996
}
997
998
200
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
999
                        const AVFrame *pic, int *got_packet)
1000
{
1001
200
    ProresContext *ctx = avctx->priv_data;
1002
    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
1003
    uint8_t *picture_size_pos;
1004
    PutBitContext pb;
1005
200
    int x, y, i, mb, q = 0;
1006
200
    int sizes[4] = { 0 };
1007
200
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
1008
    int frame_size, picture_size, slice_size;
1009
    int pkt_size, ret;
1010
200
    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1011
    uint8_t frame_flags;
1012
1013
200
    ctx->pic = pic;
1014
200
    pkt_size = ctx->frame_size_upper_bound;
1015
1016
200
    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1017
        return ret;
1018
1019
200
    orig_buf = pkt->data;
1020
1021
    // frame atom
1022
200
    orig_buf += 4;                              // frame size
1023
200
    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1024
200
    buf = orig_buf;
1025
1026
    // frame header
1027
200
    tmp = buf;
1028
200
    buf += 2;                                   // frame header size will be stored here
1029
200
    bytestream_put_be16  (&buf, 0);             // version 1
1030
200
    bytestream_put_buffer(&buf, ctx->vendor, 4);
1031
200
    bytestream_put_be16  (&buf, avctx->width);
1032
200
    bytestream_put_be16  (&buf, avctx->height);
1033
1034
200
    frame_flags = ctx->chroma_factor << 6;
1035
200
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1036
        frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1037
200
    bytestream_put_byte  (&buf, frame_flags);
1038
1039
200
    bytestream_put_byte  (&buf, 0);             // reserved
1040
200
    bytestream_put_byte  (&buf, pic->color_primaries);
1041
200
    bytestream_put_byte  (&buf, pic->color_trc);
1042
200
    bytestream_put_byte  (&buf, pic->colorspace);
1043
200
    bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1044
200
    bytestream_put_byte  (&buf, 0);             // reserved
1045
200
    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1046
200
        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1047
        // luma quantisation matrix
1048
13000
        for (i = 0; i < 64; i++)
1049
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1050
        // chroma quantisation matrix
1051
13000
        for (i = 0; i < 64; i++)
1052
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1053
    } else {
1054
        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1055
    }
1056
200
    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1057
1058
200
    for (ctx->cur_picture_idx = 0;
1059
400
         ctx->cur_picture_idx < ctx->pictures_per_frame;
1060
200
         ctx->cur_picture_idx++) {
1061
        // picture header
1062
200
        picture_size_pos = buf + 1;
1063
200
        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1064
200
        buf += 4;                                   // picture data size will be stored here
1065
200
        bytestream_put_be16  (&buf, ctx->slices_per_picture);
1066
200
        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1067
1068
        // seek table - will be filled during slice encoding
1069
200
        slice_sizes = buf;
1070
200
        buf += ctx->slices_per_picture * 2;
1071
1072
        // slices
1073
200
        if (!ctx->force_quant) {
1074
200
            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1075
                                  ctx->mb_height);
1076
200
            if (ret)
1077
                return ret;
1078
        }
1079
1080
3050
        for (y = 0; y < ctx->mb_height; y++) {
1081
2850
            int mbs_per_slice = ctx->mbs_per_slice;
1082
13950
            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1083
22200
                q = ctx->force_quant ? ctx->force_quant
1084
11100
                                     : ctx->slice_q[mb + y * ctx->slices_width];
1085
1086
16950
                while (ctx->mb_width - x < mbs_per_slice)
1087
5850
                    mbs_per_slice >>= 1;
1088
1089
11100
                bytestream_put_byte(&buf, slice_hdr_size << 3);
1090
11100
                slice_hdr = buf;
1091
11100
                buf += slice_hdr_size - 1;
1092
11100
                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1093
                    uint8_t *start = pkt->data;
1094
                    // Recompute new size according to max_slice_size
1095
                    // and deduce delta
1096
                    int delta = 200 + (ctx->pictures_per_frame *
1097
                                ctx->slices_per_picture + 1) *
1098
                                max_slice_size - pkt_size;
1099
1100
                    delta = FFMAX(delta, 2 * max_slice_size);
1101
                    ctx->frame_size_upper_bound += delta;
1102
1103
                    if (!ctx->warn) {
1104
                        avpriv_request_sample(avctx,
1105
                                              "Packet too small: is %i,"
1106
                                              " needs %i (slice: %i). "
1107
                                              "Correct allocation",
1108
                                              pkt_size, delta, max_slice_size);
1109
                        ctx->warn = 1;
1110
                    }
1111
1112
                    ret = av_grow_packet(pkt, delta);
1113
                    if (ret < 0)
1114
                        return ret;
1115
1116
                    pkt_size += delta;
1117
                    // restore pointers
1118
                    orig_buf         = pkt->data + (orig_buf         - start);
1119
                    buf              = pkt->data + (buf              - start);
1120
                    picture_size_pos = pkt->data + (picture_size_pos - start);
1121
                    slice_sizes      = pkt->data + (slice_sizes      - start);
1122
                    slice_hdr        = pkt->data + (slice_hdr        - start);
1123
                    tmp              = pkt->data + (tmp              - start);
1124
                }
1125
11100
                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1126
11100
                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1127
                                   mbs_per_slice);
1128
11100
                if (ret < 0)
1129
                    return ret;
1130
1131
11100
                bytestream_put_byte(&slice_hdr, q);
1132
11100
                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1133
33300
                for (i = 0; i < ctx->num_planes - 1; i++) {
1134
22200
                    bytestream_put_be16(&slice_hdr, sizes[i]);
1135
22200
                    slice_size += sizes[i];
1136
                }
1137
11100
                bytestream_put_be16(&slice_sizes, slice_size);
1138
11100
                buf += slice_size - slice_hdr_size;
1139
11100
                if (max_slice_size < slice_size)
1140
216
                    max_slice_size = slice_size;
1141
            }
1142
        }
1143
1144
200
        picture_size = buf - (picture_size_pos - 1);
1145
200
        bytestream_put_be32(&picture_size_pos, picture_size);
1146
    }
1147
1148
200
    orig_buf -= 8;
1149
200
    frame_size = buf - orig_buf;
1150
200
    bytestream_put_be32(&orig_buf, frame_size);
1151
1152
200
    pkt->size   = frame_size;
1153
200
    pkt->flags |= AV_PKT_FLAG_KEY;
1154
200
    *got_packet = 1;
1155
1156
200
    return 0;
1157
}
1158
1159
4
static av_cold int encode_close(AVCodecContext *avctx)
1160
{
1161
4
    ProresContext *ctx = avctx->priv_data;
1162
    int i;
1163
1164
4
    if (ctx->tdata) {
1165
8
        for (i = 0; i < avctx->thread_count; i++)
1166
4
            av_freep(&ctx->tdata[i].nodes);
1167
    }
1168
4
    av_freep(&ctx->tdata);
1169
4
    av_freep(&ctx->slice_q);
1170
1171
4
    return 0;
1172
}
1173
1174
957600
static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1175
                        ptrdiff_t linesize, int16_t *block)
1176
{
1177
    int x, y;
1178
957600
    const uint16_t *tsrc = src;
1179
1180
8618400
    for (y = 0; y < 8; y++) {
1181
68947200
        for (x = 0; x < 8; x++)
1182
61286400
            block[y * 8 + x] = tsrc[x];
1183
7660800
        tsrc += linesize >> 1;
1184
    }
1185
957600
    fdsp->fdct(block);
1186
957600
}
1187
1188
4
static av_cold int encode_init(AVCodecContext *avctx)
1189
{
1190
4
    ProresContext *ctx = avctx->priv_data;
1191
    int mps;
1192
    int i, j;
1193
    int min_quant, max_quant;
1194
4
    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1195
1196
4
    avctx->bits_per_raw_sample = 10;
1197
#if FF_API_CODED_FRAME
1198
FF_DISABLE_DEPRECATION_WARNINGS
1199
4
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1200
4
    avctx->coded_frame->key_frame = 1;
1201
FF_ENABLE_DEPRECATION_WARNINGS
1202
#endif
1203
1204
4
    ctx->fdct      = prores_fdct;
1205
4
    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1206
4
                                : ff_prores_progressive_scan;
1207
4
    ff_fdctdsp_init(&ctx->fdsp, avctx);
1208
1209
4
    mps = ctx->mbs_per_slice;
1210
4
    if (mps & (mps - 1)) {
1211
        av_log(avctx, AV_LOG_ERROR,
1212
               "there should be an integer power of two MBs per slice\n");
1213
        return AVERROR(EINVAL);
1214
    }
1215
4
    if (ctx->profile == PRORES_PROFILE_AUTO) {
1216
        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1217
        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1218
                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1219
                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1220
        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1221
               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1222
               ? "4:4:4:4 profile because of the used input colorspace"
1223
               : "HQ profile to keep best quality");
1224
    }
1225
4
    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1226
        if (ctx->profile != PRORES_PROFILE_4444 &&
1227
            ctx->profile != PRORES_PROFILE_4444XQ) {
1228
            // force alpha and warn
1229
            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1230
                   "encode alpha. Override with -profile if needed.\n");
1231
            ctx->alpha_bits = 0;
1232
        }
1233
        if (ctx->alpha_bits & 7) {
1234
            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1235
            return AVERROR(EINVAL);
1236
        }
1237
        avctx->bits_per_coded_sample = 32;
1238
    } else {
1239
4
        ctx->alpha_bits = 0;
1240
    }
1241
1242
8
    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1243
                         ? CFACTOR_Y422
1244
4
                         : CFACTOR_Y444;
1245
4
    ctx->profile_info  = prores_profile_info + ctx->profile;
1246
4
    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1247
1248
4
    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1249
1250
4
    if (interlaced)
1251
        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1252
    else
1253
4
        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1254
1255
4
    ctx->slices_width  = ctx->mb_width / mps;
1256
4
    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1257
4
    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1258
4
    ctx->pictures_per_frame = 1 + interlaced;
1259
1260
4
    if (ctx->quant_sel == -1) {
1261
4
        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1262
4
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1263
    } else {
1264
        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1265
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1266
    }
1267
1268
4
    if (strlen(ctx->vendor) != 4) {
1269
        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1270
        return AVERROR_INVALIDDATA;
1271
    }
1272
1273
4
    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1274
4
    if (!ctx->force_quant) {
1275
4
        if (!ctx->bits_per_mb) {
1276
4
            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1277
4
                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1278
4
                                           ctx->pictures_per_frame)
1279
4
                    break;
1280
4
            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1281
4
            if (ctx->alpha_bits)
1282
                ctx->bits_per_mb *= 20;
1283
        } else if (ctx->bits_per_mb < 128) {
1284
            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1285
            return AVERROR_INVALIDDATA;
1286
        }
1287
1288
4
        min_quant = ctx->profile_info->min_quant;
1289
4
        max_quant = ctx->profile_info->max_quant;
1290
64
        for (i = min_quant; i < MAX_STORED_Q; i++) {
1291
3900
            for (j = 0; j < 64; j++) {
1292
3840
                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1293
3840
                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1294
            }
1295
        }
1296
1297
4
        ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1298
4
        if (!ctx->slice_q) {
1299
            encode_close(avctx);
1300
            return AVERROR(ENOMEM);
1301
        }
1302
1303
4
        ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1304
4
        if (!ctx->tdata) {
1305
            encode_close(avctx);
1306
            return AVERROR(ENOMEM);
1307
        }
1308
1309
8
        for (j = 0; j < avctx->thread_count; j++) {
1310
8
            ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1311
4
                                            * TRELLIS_WIDTH
1312
                                            * sizeof(*ctx->tdata->nodes));
1313
4
            if (!ctx->tdata[j].nodes) {
1314
                encode_close(avctx);
1315
                return AVERROR(ENOMEM);
1316
            }
1317
32
            for (i = min_quant; i < max_quant + 2; i++) {
1318
28
                ctx->tdata[j].nodes[i].prev_node = -1;
1319
28
                ctx->tdata[j].nodes[i].bits      = 0;
1320
28
                ctx->tdata[j].nodes[i].score     = 0;
1321
            }
1322
        }
1323
    } else {
1324
        int ls = 0;
1325
        int ls_chroma = 0;
1326
1327
        if (ctx->force_quant > 64) {
1328
            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1329
            return AVERROR_INVALIDDATA;
1330
        }
1331
1332
        for (j = 0; j < 64; j++) {
1333
            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1334
            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1335
            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1336
            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1337
        }
1338
1339
        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1340
        if (ctx->chroma_factor == CFACTOR_Y444)
1341
            ctx->bits_per_mb += ls_chroma * 4;
1342
    }
1343
1344
4
    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1345
4
                                   ctx->slices_per_picture + 1) *
1346
4
                                  (2 + 2 * ctx->num_planes +
1347
4
                                   (mps * ctx->bits_per_mb) / 8)
1348
4
                                  + 200;
1349
1350
4
    if (ctx->alpha_bits) {
1351
         // The alpha plane is run-coded and might exceed the bit budget.
1352
         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1353
                                         ctx->slices_per_picture + 1) *
1354
         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1355
         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1356
    }
1357
1358
4
    avctx->codec_tag   = ctx->profile_info->tag;
1359
1360
4
    av_log(avctx, AV_LOG_DEBUG,
1361
           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1362
4
           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1363
           interlaced ? "yes" : "no", ctx->bits_per_mb);
1364
4
    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1365
           ctx->frame_size_upper_bound);
1366
1367
4
    return 0;
1368
}
1369
1370
#define OFFSET(x) offsetof(ProresContext, x)
1371
#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1372
1373
static const AVOption options[] = {
1374
    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1375
        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1376
    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1377
        { .i64 = PRORES_PROFILE_AUTO },
1378
        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1379
    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1380
        0, 0, VE, "profile" },
1381
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1382
        0, 0, VE, "profile" },
1383
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1384
        0, 0, VE, "profile" },
1385
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1386
        0, 0, VE, "profile" },
1387
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1388
        0, 0, VE, "profile" },
1389
    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1390
        0, 0, VE, "profile" },
1391
    { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1392
        0, 0, VE, "profile" },
1393
    { "vendor", "vendor ID", OFFSET(vendor),
1394
        AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1395
    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1396
        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1397
    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1398
        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1399
    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1400
        0, 0, VE, "quant_mat" },
1401
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1402
        0, 0, VE, "quant_mat" },
1403
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1404
        0, 0, VE, "quant_mat" },
1405
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1406
        0, 0, VE, "quant_mat" },
1407
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1408
        0, 0, VE, "quant_mat" },
1409
    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1410
        0, 0, VE, "quant_mat" },
1411
    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1412
        { .i64 = 16 }, 0, 16, VE },
1413
    { NULL }
1414
};
1415
1416
static const AVClass proresenc_class = {
1417
    .class_name = "ProRes encoder",
1418
    .item_name  = av_default_item_name,
1419
    .option     = options,
1420
    .version    = LIBAVUTIL_VERSION_INT,
1421
};
1422
1423
AVCodec ff_prores_ks_encoder = {
1424
    .name           = "prores_ks",
1425
    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1426
    .type           = AVMEDIA_TYPE_VIDEO,
1427
    .id             = AV_CODEC_ID_PRORES,
1428
    .priv_data_size = sizeof(ProresContext),
1429
    .init           = encode_init,
1430
    .close          = encode_close,
1431
    .encode2        = encode_frame,
1432
    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1433
    .pix_fmts       = (const enum AVPixelFormat[]) {
1434
                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1435
                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1436
                      },
1437
    .priv_class     = &proresenc_class,
1438
    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1439
};