GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/proresenc_kostya.c Lines: 472 658 71.7 %
Date: 2021-04-18 21:26:34 Branches: 180 302 59.6 %

Line Branch Exec Source
1
/*
2
 * Apple ProRes encoder
3
 *
4
 * Copyright (c) 2012 Konstantin Shishkov
5
 *
6
 * This encoder appears to be based on Anatoliy Wassermans considering
7
 * similarities in the bugs.
8
 *
9
 * This file is part of FFmpeg.
10
 *
11
 * FFmpeg is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2.1 of the License, or (at your option) any later version.
15
 *
16
 * FFmpeg is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with FFmpeg; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
 */
25
26
#include "libavutil/mem_internal.h"
27
#include "libavutil/opt.h"
28
#include "libavutil/pixdesc.h"
29
#include "avcodec.h"
30
#include "fdctdsp.h"
31
#include "put_bits.h"
32
#include "profiles.h"
33
#include "bytestream.h"
34
#include "internal.h"
35
#include "proresdata.h"
36
37
#define CFACTOR_Y422 2
38
#define CFACTOR_Y444 3
39
40
#define MAX_MBS_PER_SLICE 8
41
42
#define MAX_PLANES 4
43
44
enum {
45
    PRORES_PROFILE_AUTO  = -1,
46
    PRORES_PROFILE_PROXY = 0,
47
    PRORES_PROFILE_LT,
48
    PRORES_PROFILE_STANDARD,
49
    PRORES_PROFILE_HQ,
50
    PRORES_PROFILE_4444,
51
    PRORES_PROFILE_4444XQ,
52
};
53
54
enum {
55
    QUANT_MAT_PROXY = 0,
56
    QUANT_MAT_PROXY_CHROMA,
57
    QUANT_MAT_LT,
58
    QUANT_MAT_STANDARD,
59
    QUANT_MAT_HQ,
60
    QUANT_MAT_XQ_LUMA,
61
    QUANT_MAT_DEFAULT,
62
};
63
64
static const uint8_t prores_quant_matrices[][64] = {
65
    { // proxy
66
         4,  7,  9, 11, 13, 14, 15, 63,
67
         7,  7, 11, 12, 14, 15, 63, 63,
68
         9, 11, 13, 14, 15, 63, 63, 63,
69
        11, 11, 13, 14, 63, 63, 63, 63,
70
        11, 13, 14, 63, 63, 63, 63, 63,
71
        13, 14, 63, 63, 63, 63, 63, 63,
72
        13, 63, 63, 63, 63, 63, 63, 63,
73
        63, 63, 63, 63, 63, 63, 63, 63,
74
    },
75
    { // proxy chromas
76
        4,  7,  9, 11, 13, 14, 63, 63,
77
        7,  7, 11, 12, 14, 63, 63, 63,
78
        9, 11, 13, 14, 63, 63, 63, 63,
79
        11, 11, 13, 14, 63, 63, 63, 63,
80
        11, 13, 14, 63, 63, 63, 63, 63,
81
        13, 14, 63, 63, 63, 63, 63, 63,
82
        13, 63, 63, 63, 63, 63, 63, 63,
83
        63, 63, 63, 63, 63, 63, 63, 63
84
    },
85
    { // LT
86
         4,  5,  6,  7,  9, 11, 13, 15,
87
         5,  5,  7,  8, 11, 13, 15, 17,
88
         6,  7,  9, 11, 13, 15, 15, 17,
89
         7,  7,  9, 11, 13, 15, 17, 19,
90
         7,  9, 11, 13, 14, 16, 19, 23,
91
         9, 11, 13, 14, 16, 19, 23, 29,
92
         9, 11, 13, 15, 17, 21, 28, 35,
93
        11, 13, 16, 17, 21, 28, 35, 41,
94
    },
95
    { // standard
96
         4,  4,  5,  5,  6,  7,  7,  9,
97
         4,  4,  5,  6,  7,  7,  9,  9,
98
         5,  5,  6,  7,  7,  9,  9, 10,
99
         5,  5,  6,  7,  7,  9,  9, 10,
100
         5,  6,  7,  7,  8,  9, 10, 12,
101
         6,  7,  7,  8,  9, 10, 12, 15,
102
         6,  7,  7,  9, 10, 11, 14, 17,
103
         7,  7,  9, 10, 11, 14, 17, 21,
104
    },
105
    { // high quality
106
         4,  4,  4,  4,  4,  4,  4,  4,
107
         4,  4,  4,  4,  4,  4,  4,  4,
108
         4,  4,  4,  4,  4,  4,  4,  4,
109
         4,  4,  4,  4,  4,  4,  4,  5,
110
         4,  4,  4,  4,  4,  4,  5,  5,
111
         4,  4,  4,  4,  4,  5,  5,  6,
112
         4,  4,  4,  4,  5,  5,  6,  7,
113
         4,  4,  4,  4,  5,  6,  7,  7,
114
    },
115
    { // XQ luma
116
        2,  2,  2,  2,  2,  2,  2,  2,
117
        2,  2,  2,  2,  2,  2,  2,  2,
118
        2,  2,  2,  2,  2,  2,  2,  2,
119
        2,  2,  2,  2,  2,  2,  2,  3,
120
        2,  2,  2,  2,  2,  2,  3,  3,
121
        2,  2,  2,  2,  2,  3,  3,  3,
122
        2,  2,  2,  2,  3,  3,  3,  4,
123
        2,  2,  2,  2,  3,  3,  4,  4,
124
    },
125
    { // codec default
126
         4,  4,  4,  4,  4,  4,  4,  4,
127
         4,  4,  4,  4,  4,  4,  4,  4,
128
         4,  4,  4,  4,  4,  4,  4,  4,
129
         4,  4,  4,  4,  4,  4,  4,  4,
130
         4,  4,  4,  4,  4,  4,  4,  4,
131
         4,  4,  4,  4,  4,  4,  4,  4,
132
         4,  4,  4,  4,  4,  4,  4,  4,
133
         4,  4,  4,  4,  4,  4,  4,  4,
134
    },
135
};
136
137
#define NUM_MB_LIMITS 4
138
static const int prores_mb_limits[NUM_MB_LIMITS] = {
139
    1620, // up to 720x576
140
    2700, // up to 960x720
141
    6075, // up to 1440x1080
142
    9216, // up to 2048x1152
143
};
144
145
static const struct prores_profile {
146
    const char *full_name;
147
    uint32_t    tag;
148
    int         min_quant;
149
    int         max_quant;
150
    int         br_tab[NUM_MB_LIMITS];
151
    int         quant;
152
    int         quant_chroma;
153
} prores_profile_info[6] = {
154
    {
155
        .full_name = "proxy",
156
        .tag       = MKTAG('a', 'p', 'c', 'o'),
157
        .min_quant = 4,
158
        .max_quant = 8,
159
        .br_tab    = { 300, 242, 220, 194 },
160
        .quant     = QUANT_MAT_PROXY,
161
        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
162
    },
163
    {
164
        .full_name = "LT",
165
        .tag       = MKTAG('a', 'p', 'c', 's'),
166
        .min_quant = 1,
167
        .max_quant = 9,
168
        .br_tab    = { 720, 560, 490, 440 },
169
        .quant     = QUANT_MAT_LT,
170
        .quant_chroma = QUANT_MAT_LT,
171
    },
172
    {
173
        .full_name = "standard",
174
        .tag       = MKTAG('a', 'p', 'c', 'n'),
175
        .min_quant = 1,
176
        .max_quant = 6,
177
        .br_tab    = { 1050, 808, 710, 632 },
178
        .quant     = QUANT_MAT_STANDARD,
179
        .quant_chroma = QUANT_MAT_STANDARD,
180
    },
181
    {
182
        .full_name = "high quality",
183
        .tag       = MKTAG('a', 'p', 'c', 'h'),
184
        .min_quant = 1,
185
        .max_quant = 6,
186
        .br_tab    = { 1566, 1216, 1070, 950 },
187
        .quant     = QUANT_MAT_HQ,
188
        .quant_chroma = QUANT_MAT_HQ,
189
    },
190
    {
191
        .full_name = "4444",
192
        .tag       = MKTAG('a', 'p', '4', 'h'),
193
        .min_quant = 1,
194
        .max_quant = 6,
195
        .br_tab    = { 2350, 1828, 1600, 1425 },
196
        .quant     = QUANT_MAT_HQ,
197
        .quant_chroma = QUANT_MAT_HQ,
198
    },
199
    {
200
        .full_name = "4444XQ",
201
        .tag       = MKTAG('a', 'p', '4', 'x'),
202
        .min_quant = 1,
203
        .max_quant = 6,
204
        .br_tab    = { 3525, 2742, 2400, 2137 },
205
        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
206
        .quant_chroma = QUANT_MAT_HQ,
207
    }
208
};
209
210
#define TRELLIS_WIDTH 16
211
#define SCORE_LIMIT   INT_MAX / 2
212
213
struct TrellisNode {
214
    int prev_node;
215
    int quant;
216
    int bits;
217
    int score;
218
};
219
220
#define MAX_STORED_Q 16
221
222
typedef struct ProresThreadData {
223
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
224
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
225
    int16_t custom_q[64];
226
    int16_t custom_chroma_q[64];
227
    struct TrellisNode *nodes;
228
} ProresThreadData;
229
230
typedef struct ProresContext {
231
    AVClass *class;
232
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
233
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
234
    int16_t quants[MAX_STORED_Q][64];
235
    int16_t quants_chroma[MAX_STORED_Q][64];
236
    int16_t custom_q[64];
237
    int16_t custom_chroma_q[64];
238
    const uint8_t *quant_mat;
239
    const uint8_t *quant_chroma_mat;
240
    const uint8_t *scantable;
241
242
    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
243
                 ptrdiff_t linesize, int16_t *block);
244
    FDCTDSPContext fdsp;
245
246
    const AVFrame *pic;
247
    int mb_width, mb_height;
248
    int mbs_per_slice;
249
    int num_chroma_blocks, chroma_factor;
250
    int slices_width;
251
    int slices_per_picture;
252
    int pictures_per_frame; // 1 for progressive, 2 for interlaced
253
    int cur_picture_idx;
254
    int num_planes;
255
    int bits_per_mb;
256
    int force_quant;
257
    int alpha_bits;
258
    int warn;
259
260
    char *vendor;
261
    int quant_sel;
262
263
    int frame_size_upper_bound;
264
265
    int profile;
266
    const struct prores_profile *profile_info;
267
268
    int *slice_q;
269
270
    ProresThreadData *tdata;
271
} ProresContext;
272
273
66600
static void get_slice_data(ProresContext *ctx, const uint16_t *src,
274
                           ptrdiff_t linesize, int x, int y, int w, int h,
275
                           int16_t *blocks, uint16_t *emu_buf,
276
                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
277
{
278
    const uint16_t *esrc;
279
66600
    const int mb_width = 4 * blocks_per_mb;
280
    ptrdiff_t elinesize;
281
    int i, j, k;
282
283
425700
    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
284
359100
        if (x >= w) {
285
            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
286
                              * sizeof(*blocks));
287
            return;
288
        }
289

359100
        if (x + mb_width <= w && y + 16 <= h) {
290
357600
            esrc      = src;
291
357600
            elinesize = linesize;
292
        } else {
293
            int bw, bh, pix;
294
295
1500
            esrc      = emu_buf;
296
1500
            elinesize = 16 * sizeof(*emu_buf);
297
298
1500
            bw = FFMIN(w - x, mb_width);
299
1500
            bh = FFMIN(h - y, 16);
300
301
12900
            for (j = 0; j < bh; j++) {
302
11400
                memcpy(emu_buf + j * 16,
303
11400
                       (const uint8_t*)src + j * linesize,
304
                       bw * sizeof(*src));
305
11400
                pix = emu_buf[j * 16 + bw - 1];
306
106600
                for (k = bw; k < mb_width; k++)
307
95200
                    emu_buf[j * 16 + k] = pix;
308
            }
309
14100
            for (; j < 16; j++)
310
12600
                memcpy(emu_buf + j * 16,
311
12600
                       emu_buf + (bh - 1) * 16,
312
                       mb_width * sizeof(*emu_buf));
313
        }
314
359100
        if (!is_chroma) {
315
119700
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
316
119700
            blocks += 64;
317
119700
            if (blocks_per_mb > 2) {
318
119700
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
319
119700
                blocks += 64;
320
            }
321
119700
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
322
119700
            blocks += 64;
323
119700
            if (blocks_per_mb > 2) {
324
119700
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
325
119700
                blocks += 64;
326
            }
327
        } else {
328
239400
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
329
239400
            blocks += 64;
330
239400
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
331
239400
            blocks += 64;
332
239400
            if (blocks_per_mb > 2) {
333
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
334
                blocks += 64;
335
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
336
                blocks += 64;
337
            }
338
        }
339
340
359100
        x += mb_width;
341
    }
342
}
343
344
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
345
                           ptrdiff_t linesize, int x, int y, int w, int h,
346
                           int16_t *blocks, int mbs_per_slice, int abits)
347
{
348
    const int slice_width = 16 * mbs_per_slice;
349
    int i, j, copy_w, copy_h;
350
351
    copy_w = FFMIN(w - x, slice_width);
352
    copy_h = FFMIN(h - y, 16);
353
    for (i = 0; i < copy_h; i++) {
354
        memcpy(blocks, src, copy_w * sizeof(*src));
355
        if (abits == 8)
356
            for (j = 0; j < copy_w; j++)
357
                blocks[j] >>= 2;
358
        else
359
            for (j = 0; j < copy_w; j++)
360
                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
361
        for (j = copy_w; j < slice_width; j++)
362
            blocks[j] = blocks[copy_w - 1];
363
        blocks += slice_width;
364
        src    += linesize >> 1;
365
    }
366
    for (; i < 16; i++) {
367
        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
368
        blocks += slice_width;
369
    }
370
}
371
372
/**
373
 * Write an unsigned rice/exp golomb codeword.
374
 */
375
26885174
static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
376
{
377
    unsigned int rice_order, exp_order, switch_bits, switch_val;
378
    int exponent;
379
380
    /* number of prefix bits to switch between Rice and expGolomb */
381
26885174
    switch_bits = (codebook & 3) + 1;
382
26885174
    rice_order  =  codebook >> 5;       /* rice code order */
383
26885174
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
384
385
26885174
    switch_val  = switch_bits << rice_order;
386
387
26885174
    if (val >= switch_val) {
388
8074309
        val -= switch_val - (1 << exp_order);
389
8074309
        exponent = av_log2(val);
390
391
8074309
        put_bits(pb, exponent - exp_order + switch_bits, 0);
392
8074309
        put_bits(pb, exponent + 1, val);
393
    } else {
394
18810865
        exponent = val >> rice_order;
395
396
18810865
        if (exponent)
397
4838901
            put_bits(pb, exponent, 0);
398
18810865
        put_bits(pb, 1, 1);
399
18810865
        if (rice_order)
400
1904515
            put_sbits(pb, rice_order, val);
401
    }
402
26885174
}
403
404
#define GET_SIGN(x)  ((x) >> 31)
405
#define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
406
407
33300
static void encode_dcs(PutBitContext *pb, int16_t *blocks,
408
                       int blocks_per_slice, int scale)
409
{
410
    int i;
411
33300
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
412
413
33300
    prev_dc = (blocks[0] - 0x4000) / scale;
414
33300
    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
415
33300
    sign     = 0;
416
33300
    codebook = 3;
417
33300
    blocks  += 64;
418
419
478800
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
420
445500
        dc       = (blocks[0] - 0x4000) / scale;
421
445500
        delta    = dc - prev_dc;
422
445500
        new_sign = GET_SIGN(delta);
423
445500
        delta    = (delta ^ sign) - sign;
424
445500
        code     = MAKE_CODE(delta);
425
445500
        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
426
445500
        codebook = (code + (code & 1)) >> 1;
427
445500
        codebook = FFMIN(codebook, 3);
428
445500
        sign     = new_sign;
429
445500
        prev_dc  = dc;
430
    }
431
33300
}
432
433
33300
static void encode_acs(PutBitContext *pb, int16_t *blocks,
434
                       int blocks_per_slice,
435
                       int plane_size_factor,
436
                       const uint8_t *scan, const int16_t *qmat)
437
{
438
    int idx, i;
439
    int run, level, run_cb, lev_cb;
440
    int max_coeffs, abs_level;
441
442
33300
    max_coeffs = blocks_per_slice << 6;
443
33300
    run_cb     = ff_prores_run_to_cb_index[4];
444
33300
    lev_cb     = ff_prores_lev_to_cb_index[2];
445
33300
    run        = 0;
446
447
2131200
    for (i = 1; i < 64; i++) {
448
32262300
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
449
30164400
            level = blocks[idx] / qmat[scan[i]];
450
30164400
            if (level) {
451
13203187
                abs_level = FFABS(level);
452
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
453
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
454
                                    abs_level - 1);
455
13203187
                put_sbits(pb, 1, GET_SIGN(level));
456
457
13203187
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
458
13203187
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
459
13203187
                run    = 0;
460
            } else {
461
16961213
                run++;
462
            }
463
        }
464
    }
465
33300
}
466
467
33300
static void encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
468
                              const uint16_t *src, ptrdiff_t linesize,
469
                              int mbs_per_slice, int16_t *blocks,
470
                              int blocks_per_mb, int plane_size_factor,
471
                              const int16_t *qmat)
472
{
473
33300
    int blocks_per_slice = mbs_per_slice * blocks_per_mb;
474
475
33300
    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
476
33300
    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
477
               ctx->scantable, qmat);
478
33300
}
479
480
static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
481
{
482
    const int dbits = (abits == 8) ? 4 : 7;
483
    const int dsize = 1 << dbits - 1;
484
    int diff = cur - prev;
485
486
    diff = av_mod_uintp2(diff, abits);
487
    if (diff >= (1 << abits) - dsize)
488
        diff -= 1 << abits;
489
    if (diff < -dsize || diff > dsize || !diff) {
490
        put_bits(pb, 1, 1);
491
        put_bits(pb, abits, diff);
492
    } else {
493
        put_bits(pb, 1, 0);
494
        put_bits(pb, dbits - 1, FFABS(diff) - 1);
495
        put_bits(pb, 1, diff < 0);
496
    }
497
}
498
499
static void put_alpha_run(PutBitContext *pb, int run)
500
{
501
    if (run) {
502
        put_bits(pb, 1, 0);
503
        if (run < 0x10)
504
            put_bits(pb, 4, run);
505
        else
506
            put_bits(pb, 15, run);
507
    } else {
508
        put_bits(pb, 1, 1);
509
    }
510
}
511
512
// todo alpha quantisation for high quants
513
static void encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
514
                              int mbs_per_slice, uint16_t *blocks,
515
                              int quant)
516
{
517
    const int abits = ctx->alpha_bits;
518
    const int mask  = (1 << abits) - 1;
519
    const int num_coeffs = mbs_per_slice * 256;
520
    int prev = mask, cur;
521
    int idx = 0;
522
    int run = 0;
523
524
    cur = blocks[idx++];
525
    put_alpha_diff(pb, cur, prev, abits);
526
    prev = cur;
527
    do {
528
        cur = blocks[idx++];
529
        if (cur != prev) {
530
            put_alpha_run (pb, run);
531
            put_alpha_diff(pb, cur, prev, abits);
532
            prev = cur;
533
            run  = 0;
534
        } else {
535
            run++;
536
        }
537
    } while (idx < num_coeffs);
538
    if (run)
539
        put_alpha_run(pb, run);
540
}
541
542
11100
static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
543
                        PutBitContext *pb,
544
                        int sizes[4], int x, int y, int quant,
545
                        int mbs_per_slice)
546
{
547
11100
    ProresContext *ctx = avctx->priv_data;
548
    int i, xp, yp;
549
11100
    int total_size = 0;
550
    const uint16_t *src;
551
11100
    int slice_width_factor = av_log2(mbs_per_slice);
552
    int num_cblocks, pwidth, line_add;
553
    ptrdiff_t linesize;
554
    int plane_factor, is_chroma;
555
    uint16_t *qmat;
556
    uint16_t *qmat_chroma;
557
558
11100
    if (ctx->pictures_per_frame == 1)
559
11100
        line_add = 0;
560
    else
561
        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
562
563
11100
    if (ctx->force_quant) {
564
        qmat = ctx->quants[0];
565
        qmat_chroma = ctx->quants_chroma[0];
566
11100
    } else if (quant < MAX_STORED_Q) {
567
8353
        qmat = ctx->quants[quant];
568
8353
        qmat_chroma = ctx->quants_chroma[quant];
569
    } else {
570
2747
        qmat = ctx->custom_q;
571
2747
        qmat_chroma = ctx->custom_chroma_q;
572
178555
        for (i = 0; i < 64; i++) {
573
175808
            qmat[i] = ctx->quant_mat[i] * quant;
574
175808
            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
575
        }
576
    }
577
578
44400
    for (i = 0; i < ctx->num_planes; i++) {
579

33300
        is_chroma    = (i == 1 || i == 2);
580
33300
        plane_factor = slice_width_factor + 2;
581
33300
        if (is_chroma)
582
22200
            plane_factor += ctx->chroma_factor - 3;
583

33300
        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
584
11100
            xp          = x << 4;
585
11100
            yp          = y << 4;
586
11100
            num_cblocks = 4;
587
11100
            pwidth      = avctx->width;
588
        } else {
589
22200
            xp          = x << 3;
590
22200
            yp          = y << 4;
591
22200
            num_cblocks = 2;
592
22200
            pwidth      = avctx->width >> 1;
593
        }
594
595
33300
        linesize = pic->linesize[i] * ctx->pictures_per_frame;
596
33300
        src = (const uint16_t*)(pic->data[i] + yp * linesize +
597
33300
                                line_add * pic->linesize[i]) + xp;
598
599
33300
        if (i < 3) {
600
33300
            get_slice_data(ctx, src, linesize, xp, yp,
601
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
602
33300
                           ctx->blocks[0], ctx->emu_buf,
603
                           mbs_per_slice, num_cblocks, is_chroma);
604
33300
            if (!is_chroma) {/* luma quant */
605
11100
                encode_slice_plane(ctx, pb, src, linesize,
606
11100
                                   mbs_per_slice, ctx->blocks[0],
607
                                   num_cblocks, plane_factor, qmat);
608
            } else { /* chroma plane */
609
22200
                encode_slice_plane(ctx, pb, src, linesize,
610
22200
                                   mbs_per_slice, ctx->blocks[0],
611
                                   num_cblocks, plane_factor, qmat_chroma);
612
            }
613
        } else {
614
            get_alpha_data(ctx, src, linesize, xp, yp,
615
                           pwidth, avctx->height / ctx->pictures_per_frame,
616
                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
617
            encode_alpha_plane(ctx, pb, mbs_per_slice, ctx->blocks[0], quant);
618
        }
619
33300
        flush_put_bits(pb);
620
33300
        sizes[i]   = put_bytes_output(pb) - total_size;
621
33300
        total_size = put_bytes_output(pb);
622
    }
623
11100
    return total_size;
624
}
625
626
390320122
static inline int estimate_vlc(unsigned codebook, int val)
627
{
628
    unsigned int rice_order, exp_order, switch_bits, switch_val;
629
    int exponent;
630
631
    /* number of prefix bits to switch between Rice and expGolomb */
632
390320122
    switch_bits = (codebook & 3) + 1;
633
390320122
    rice_order  =  codebook >> 5;       /* rice code order */
634
390320122
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
635
636
390320122
    switch_val  = switch_bits << rice_order;
637
638
390320122
    if (val >= switch_val) {
639
135366530
        val -= switch_val - (1 << exp_order);
640
135366530
        exponent = av_log2(val);
641
642
135366530
        return exponent * 2 - exp_order + switch_bits + 1;
643
    } else {
644
254953592
        return (val >> rice_order) + rice_order + 1;
645
    }
646
}
647
648
379686
static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
649
                        int scale)
650
{
651
    int i;
652
379686
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
653
    int bits;
654
655
379686
    prev_dc  = (blocks[0] - 0x4000) / scale;
656
379686
    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
657
379686
    sign     = 0;
658
379686
    codebook = 3;
659
379686
    blocks  += 64;
660
379686
    *error  += FFABS(blocks[0] - 0x4000) % scale;
661
662
5449672
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
663
5069986
        dc       = (blocks[0] - 0x4000) / scale;
664
5069986
        *error  += FFABS(blocks[0] - 0x4000) % scale;
665
5069986
        delta    = dc - prev_dc;
666
5069986
        new_sign = GET_SIGN(delta);
667
5069986
        delta    = (delta ^ sign) - sign;
668
5069986
        code     = MAKE_CODE(delta);
669
5069986
        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
670
5069986
        codebook = (code + (code & 1)) >> 1;
671
5069986
        codebook = FFMIN(codebook, 3);
672
5069986
        sign     = new_sign;
673
5069986
        prev_dc  = dc;
674
    }
675
676
379686
    return bits;
677
}
678
679
379686
static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
680
                        int plane_size_factor,
681
                        const uint8_t *scan, const int16_t *qmat)
682
{
683
    int idx, i;
684
    int run, level, run_cb, lev_cb;
685
    int max_coeffs, abs_level;
686
379686
    int bits = 0;
687
688
379686
    max_coeffs = blocks_per_slice << 6;
689
379686
    run_cb     = ff_prores_run_to_cb_index[4];
690
379686
    lev_cb     = ff_prores_lev_to_cb_index[2];
691
379686
    run        = 0;
692
693
24299904
    for (i = 1; i < 64; i++) {
694
367249554
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
695
343329336
            level   = blocks[idx] / qmat[scan[i]];
696
343329336
            *error += FFABS(blocks[idx]) % qmat[scan[i]];
697
343329336
            if (level) {
698
192435225
                abs_level = FFABS(level);
699
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
700
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
701
192435225
                                     abs_level - 1) + 1;
702
703
192435225
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
704
192435225
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
705
192435225
                run    = 0;
706
            } else {
707
150894111
                run++;
708
            }
709
        }
710
    }
711
712
379686
    return bits;
713
}
714
715
379686
static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
716
                                const uint16_t *src, ptrdiff_t linesize,
717
                                int mbs_per_slice,
718
                                int blocks_per_mb, int plane_size_factor,
719
                                const int16_t *qmat, ProresThreadData *td)
720
{
721
    int blocks_per_slice;
722
    int bits;
723
724
379686
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
725
726
379686
    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
727
379686
    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
728
                         plane_size_factor, ctx->scantable, qmat);
729
730
379686
    return FFALIGN(bits, 8);
731
}
732
733
static int est_alpha_diff(int cur, int prev, int abits)
734
{
735
    const int dbits = (abits == 8) ? 4 : 7;
736
    const int dsize = 1 << dbits - 1;
737
    int diff = cur - prev;
738
739
    diff = av_mod_uintp2(diff, abits);
740
    if (diff >= (1 << abits) - dsize)
741
        diff -= 1 << abits;
742
    if (diff < -dsize || diff > dsize || !diff)
743
        return abits + 1;
744
    else
745
        return dbits + 1;
746
}
747
748
static int estimate_alpha_plane(ProresContext *ctx,
749
                                const uint16_t *src, ptrdiff_t linesize,
750
                                int mbs_per_slice, int16_t *blocks)
751
{
752
    const int abits = ctx->alpha_bits;
753
    const int mask  = (1 << abits) - 1;
754
    const int num_coeffs = mbs_per_slice * 256;
755
    int prev = mask, cur;
756
    int idx = 0;
757
    int run = 0;
758
    int bits;
759
760
    cur = blocks[idx++];
761
    bits = est_alpha_diff(cur, prev, abits);
762
    prev = cur;
763
    do {
764
        cur = blocks[idx++];
765
        if (cur != prev) {
766
            if (!run)
767
                bits++;
768
            else if (run < 0x10)
769
                bits += 4;
770
            else
771
                bits += 15;
772
            bits += est_alpha_diff(cur, prev, abits);
773
            prev = cur;
774
            run  = 0;
775
        } else {
776
            run++;
777
        }
778
    } while (idx < num_coeffs);
779
780
    if (run) {
781
        if (run < 0x10)
782
            bits += 4;
783
        else
784
            bits += 15;
785
    }
786
787
    return bits;
788
}
789
790
11100
static int find_slice_quant(AVCodecContext *avctx,
791
                            int trellis_node, int x, int y, int mbs_per_slice,
792
                            ProresThreadData *td)
793
{
794
11100
    ProresContext *ctx = avctx->priv_data;
795
    int i, q, pq, xp, yp;
796
    const uint16_t *src;
797
11100
    int slice_width_factor = av_log2(mbs_per_slice);
798
    int num_cblocks[MAX_PLANES], pwidth;
799
    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
800
11100
    const int min_quant = ctx->profile_info->min_quant;
801
11100
    const int max_quant = ctx->profile_info->max_quant;
802
    int error, bits, bits_limit;
803
    int mbs, prev, cur, new_score;
804
    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
805
    int overquant;
806
    uint16_t *qmat;
807
    uint16_t *qmat_chroma;
808
    int linesize[4], line_add;
809
11100
    int alpha_bits = 0;
810
811
11100
    if (ctx->pictures_per_frame == 1)
812
11100
        line_add = 0;
813
    else
814
        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
815
11100
    mbs = x + mbs_per_slice;
816
817
44400
    for (i = 0; i < ctx->num_planes; i++) {
818

33300
        is_chroma[i]    = (i == 1 || i == 2);
819
33300
        plane_factor[i] = slice_width_factor + 2;
820
33300
        if (is_chroma[i])
821
22200
            plane_factor[i] += ctx->chroma_factor - 3;
822

33300
        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
823
11100
            xp             = x << 4;
824
11100
            yp             = y << 4;
825
11100
            num_cblocks[i] = 4;
826
11100
            pwidth         = avctx->width;
827
        } else {
828
22200
            xp             = x << 3;
829
22200
            yp             = y << 4;
830
22200
            num_cblocks[i] = 2;
831
22200
            pwidth         = avctx->width >> 1;
832
        }
833
834
33300
        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
835
33300
        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
836
33300
                                 line_add * ctx->pic->linesize[i]) + xp;
837
838
33300
        if (i < 3) {
839
33300
            get_slice_data(ctx, src, linesize[i], xp, yp,
840
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
841
33300
                           td->blocks[i], td->emu_buf,
842
                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
843
        } else {
844
            get_alpha_data(ctx, src, linesize[i], xp, yp,
845
                           pwidth, avctx->height / ctx->pictures_per_frame,
846
                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
847
        }
848
    }
849
850
88800
    for (q = min_quant; q < max_quant + 2; q++) {
851
77700
        td->nodes[trellis_node + q].prev_node = -1;
852
77700
        td->nodes[trellis_node + q].quant     = q;
853
    }
854
855
11100
    if (ctx->alpha_bits)
856
        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
857
                                          mbs_per_slice, td->blocks[3]);
858
    // todo: maybe perform coarser quantising to fit into frame size when needed
859
77700
    for (q = min_quant; q <= max_quant; q++) {
860
66600
        bits  = alpha_bits;
861
66600
        error = 0;
862
133200
        bits += estimate_slice_plane(ctx, &error, 0,
863
66600
                                     src, linesize[0],
864
                                     mbs_per_slice,
865
                                     num_cblocks[0], plane_factor[0],
866
66600
                                     ctx->quants[q], td); /* estimate luma plane */
867
199800
        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
868
133200
            bits += estimate_slice_plane(ctx, &error, i,
869
133200
                                         src, linesize[i],
870
                                         mbs_per_slice,
871
                                         num_cblocks[i], plane_factor[i],
872
133200
                                         ctx->quants_chroma[q], td);
873
        }
874
66600
        if (bits > 65000 * 8)
875
            error = SCORE_LIMIT;
876
877
66600
        slice_bits[q]  = bits;
878
66600
        slice_score[q] = error;
879
    }
880
11100
    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
881
3643
        slice_bits[max_quant + 1]  = slice_bits[max_quant];
882
3643
        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
883
3643
        overquant = max_quant;
884
    } else {
885
59962
        for (q = max_quant + 1; q < 128; q++) {
886
59962
            bits  = alpha_bits;
887
59962
            error = 0;
888
59962
            if (q < MAX_STORED_Q) {
889
44939
                qmat = ctx->quants[q];
890
44939
                qmat_chroma = ctx->quants_chroma[q];
891
            } else {
892
15023
                qmat = td->custom_q;
893
15023
                qmat_chroma = td->custom_chroma_q;
894
976495
                for (i = 0; i < 64; i++) {
895
961472
                    qmat[i] = ctx->quant_mat[i] * q;
896
961472
                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
897
                }
898
            }
899
119924
            bits += estimate_slice_plane(ctx, &error, 0,
900
59962
                                         src, linesize[0],
901
                                         mbs_per_slice,
902
                                         num_cblocks[0], plane_factor[0],
903
                                         qmat, td);/* estimate luma plane */
904
179886
            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
905
119924
                bits += estimate_slice_plane(ctx, &error, i,
906
119924
                                             src, linesize[i],
907
                                             mbs_per_slice,
908
                                             num_cblocks[i], plane_factor[i],
909
                                             qmat_chroma, td);
910
            }
911
59962
            if (bits <= ctx->bits_per_mb * mbs_per_slice)
912
7457
                break;
913
        }
914
915
7457
        slice_bits[max_quant + 1]  = bits;
916
7457
        slice_score[max_quant + 1] = error;
917
7457
        overquant = q;
918
    }
919
11100
    td->nodes[trellis_node + max_quant + 1].quant = overquant;
920
921
11100
    bits_limit = mbs * ctx->bits_per_mb;
922
88800
    for (pq = min_quant; pq < max_quant + 2; pq++) {
923
77700
        prev = trellis_node - TRELLIS_WIDTH + pq;
924
925
621600
        for (q = min_quant; q < max_quant + 2; q++) {
926
543900
            cur = trellis_node + q;
927
928
543900
            bits  = td->nodes[prev].bits + slice_bits[q];
929
543900
            error = slice_score[q];
930
543900
            if (bits > bits_limit)
931
438632
                error = SCORE_LIMIT;
932
933

543900
            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
934
98480
                new_score = td->nodes[prev].score + error;
935
            else
936
445420
                new_score = SCORE_LIMIT;
937
543900
            if (td->nodes[cur].prev_node == -1 ||
938
466200
                td->nodes[cur].score >= new_score) {
939
940
505564
                td->nodes[cur].bits      = bits;
941
505564
                td->nodes[cur].score     = new_score;
942
505564
                td->nodes[cur].prev_node = prev;
943
            }
944
        }
945
    }
946
947
11100
    error = td->nodes[trellis_node + min_quant].score;
948
11100
    pq    = trellis_node + min_quant;
949
77700
    for (q = min_quant + 1; q < max_quant + 2; q++) {
950
66600
        if (td->nodes[trellis_node + q].score <= error) {
951
52033
            error = td->nodes[trellis_node + q].score;
952
52033
            pq    = trellis_node + q;
953
        }
954
    }
955
956
11100
    return pq;
957
}
958
959
2850
static int find_quant_thread(AVCodecContext *avctx, void *arg,
960
                             int jobnr, int threadnr)
961
{
962
2850
    ProresContext *ctx = avctx->priv_data;
963
2850
    ProresThreadData *td = ctx->tdata + threadnr;
964
2850
    int mbs_per_slice = ctx->mbs_per_slice;
965
2850
    int x, y = jobnr, mb, q = 0;
966
967
13950
    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
968
16950
        while (ctx->mb_width - x < mbs_per_slice)
969
5850
            mbs_per_slice >>= 1;
970
11100
        q = find_slice_quant(avctx,
971
11100
                             (mb + 1) * TRELLIS_WIDTH, x, y,
972
                             mbs_per_slice, td);
973
    }
974
975
13950
    for (x = ctx->slices_width - 1; x >= 0; x--) {
976
11100
        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
977
11100
        q = td->nodes[q].prev_node;
978
    }
979
980
2850
    return 0;
981
}
982
983
200
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
984
                        const AVFrame *pic, int *got_packet)
985
{
986
200
    ProresContext *ctx = avctx->priv_data;
987
    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
988
    uint8_t *picture_size_pos;
989
    PutBitContext pb;
990
200
    int x, y, i, mb, q = 0;
991
200
    int sizes[4] = { 0 };
992
200
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
993
    int frame_size, picture_size, slice_size;
994
    int pkt_size, ret;
995
200
    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
996
    uint8_t frame_flags;
997
998
200
    ctx->pic = pic;
999
200
    pkt_size = ctx->frame_size_upper_bound;
1000
1001
200
    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1002
        return ret;
1003
1004
200
    orig_buf = pkt->data;
1005
1006
    // frame atom
1007
200
    orig_buf += 4;                              // frame size
1008
200
    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1009
200
    buf = orig_buf;
1010
1011
    // frame header
1012
200
    tmp = buf;
1013
200
    buf += 2;                                   // frame header size will be stored here
1014
200
    bytestream_put_be16  (&buf, 0);             // version 1
1015
200
    bytestream_put_buffer(&buf, ctx->vendor, 4);
1016
200
    bytestream_put_be16  (&buf, avctx->width);
1017
200
    bytestream_put_be16  (&buf, avctx->height);
1018
1019
200
    frame_flags = ctx->chroma_factor << 6;
1020
200
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1021
        frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1022
200
    bytestream_put_byte  (&buf, frame_flags);
1023
1024
200
    bytestream_put_byte  (&buf, 0);             // reserved
1025
200
    bytestream_put_byte  (&buf, pic->color_primaries);
1026
200
    bytestream_put_byte  (&buf, pic->color_trc);
1027
200
    bytestream_put_byte  (&buf, pic->colorspace);
1028
200
    bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1029
200
    bytestream_put_byte  (&buf, 0);             // reserved
1030
200
    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1031
200
        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1032
        // luma quantisation matrix
1033
13000
        for (i = 0; i < 64; i++)
1034
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1035
        // chroma quantisation matrix
1036
13000
        for (i = 0; i < 64; i++)
1037
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1038
    } else {
1039
        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1040
    }
1041
200
    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1042
1043
200
    for (ctx->cur_picture_idx = 0;
1044
400
         ctx->cur_picture_idx < ctx->pictures_per_frame;
1045
200
         ctx->cur_picture_idx++) {
1046
        // picture header
1047
200
        picture_size_pos = buf + 1;
1048
200
        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1049
200
        buf += 4;                                   // picture data size will be stored here
1050
200
        bytestream_put_be16  (&buf, ctx->slices_per_picture);
1051
200
        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1052
1053
        // seek table - will be filled during slice encoding
1054
200
        slice_sizes = buf;
1055
200
        buf += ctx->slices_per_picture * 2;
1056
1057
        // slices
1058
200
        if (!ctx->force_quant) {
1059
200
            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1060
                                  ctx->mb_height);
1061
200
            if (ret)
1062
                return ret;
1063
        }
1064
1065
3050
        for (y = 0; y < ctx->mb_height; y++) {
1066
2850
            int mbs_per_slice = ctx->mbs_per_slice;
1067
13950
            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1068
22200
                q = ctx->force_quant ? ctx->force_quant
1069
11100
                                     : ctx->slice_q[mb + y * ctx->slices_width];
1070
1071
16950
                while (ctx->mb_width - x < mbs_per_slice)
1072
5850
                    mbs_per_slice >>= 1;
1073
1074
11100
                bytestream_put_byte(&buf, slice_hdr_size << 3);
1075
11100
                slice_hdr = buf;
1076
11100
                buf += slice_hdr_size - 1;
1077
11100
                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1078
                    uint8_t *start = pkt->data;
1079
                    // Recompute new size according to max_slice_size
1080
                    // and deduce delta
1081
                    int delta = 200 + (ctx->pictures_per_frame *
1082
                                ctx->slices_per_picture + 1) *
1083
                                max_slice_size - pkt_size;
1084
1085
                    delta = FFMAX(delta, 2 * max_slice_size);
1086
                    ctx->frame_size_upper_bound += delta;
1087
1088
                    if (!ctx->warn) {
1089
                        avpriv_request_sample(avctx,
1090
                                              "Packet too small: is %i,"
1091
                                              " needs %i (slice: %i). "
1092
                                              "Correct allocation",
1093
                                              pkt_size, delta, max_slice_size);
1094
                        ctx->warn = 1;
1095
                    }
1096
1097
                    ret = av_grow_packet(pkt, delta);
1098
                    if (ret < 0)
1099
                        return ret;
1100
1101
                    pkt_size += delta;
1102
                    // restore pointers
1103
                    orig_buf         = pkt->data + (orig_buf         - start);
1104
                    buf              = pkt->data + (buf              - start);
1105
                    picture_size_pos = pkt->data + (picture_size_pos - start);
1106
                    slice_sizes      = pkt->data + (slice_sizes      - start);
1107
                    slice_hdr        = pkt->data + (slice_hdr        - start);
1108
                    tmp              = pkt->data + (tmp              - start);
1109
                }
1110
11100
                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1111
11100
                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1112
                                   mbs_per_slice);
1113
11100
                if (ret < 0)
1114
                    return ret;
1115
1116
11100
                bytestream_put_byte(&slice_hdr, q);
1117
11100
                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1118
33300
                for (i = 0; i < ctx->num_planes - 1; i++) {
1119
22200
                    bytestream_put_be16(&slice_hdr, sizes[i]);
1120
22200
                    slice_size += sizes[i];
1121
                }
1122
11100
                bytestream_put_be16(&slice_sizes, slice_size);
1123
11100
                buf += slice_size - slice_hdr_size;
1124
11100
                if (max_slice_size < slice_size)
1125
216
                    max_slice_size = slice_size;
1126
            }
1127
        }
1128
1129
200
        picture_size = buf - (picture_size_pos - 1);
1130
200
        bytestream_put_be32(&picture_size_pos, picture_size);
1131
    }
1132
1133
200
    orig_buf -= 8;
1134
200
    frame_size = buf - orig_buf;
1135
200
    bytestream_put_be32(&orig_buf, frame_size);
1136
1137
200
    pkt->size   = frame_size;
1138
200
    pkt->flags |= AV_PKT_FLAG_KEY;
1139
200
    *got_packet = 1;
1140
1141
200
    return 0;
1142
}
1143
1144
4
static av_cold int encode_close(AVCodecContext *avctx)
1145
{
1146
4
    ProresContext *ctx = avctx->priv_data;
1147
    int i;
1148
1149
4
    if (ctx->tdata) {
1150
8
        for (i = 0; i < avctx->thread_count; i++)
1151
4
            av_freep(&ctx->tdata[i].nodes);
1152
    }
1153
4
    av_freep(&ctx->tdata);
1154
4
    av_freep(&ctx->slice_q);
1155
1156
4
    return 0;
1157
}
1158
1159
957600
static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1160
                        ptrdiff_t linesize, int16_t *block)
1161
{
1162
    int x, y;
1163
957600
    const uint16_t *tsrc = src;
1164
1165
8618400
    for (y = 0; y < 8; y++) {
1166
68947200
        for (x = 0; x < 8; x++)
1167
61286400
            block[y * 8 + x] = tsrc[x];
1168
7660800
        tsrc += linesize >> 1;
1169
    }
1170
957600
    fdsp->fdct(block);
1171
957600
}
1172
1173
4
static av_cold int encode_init(AVCodecContext *avctx)
1174
{
1175
4
    ProresContext *ctx = avctx->priv_data;
1176
    int mps;
1177
    int i, j;
1178
    int min_quant, max_quant;
1179
4
    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1180
1181
4
    avctx->bits_per_raw_sample = 10;
1182
#if FF_API_CODED_FRAME
1183
FF_DISABLE_DEPRECATION_WARNINGS
1184
4
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1185
4
    avctx->coded_frame->key_frame = 1;
1186
FF_ENABLE_DEPRECATION_WARNINGS
1187
#endif
1188
1189
4
    ctx->fdct      = prores_fdct;
1190
4
    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1191
4
                                : ff_prores_progressive_scan;
1192
4
    ff_fdctdsp_init(&ctx->fdsp, avctx);
1193
1194
4
    mps = ctx->mbs_per_slice;
1195
4
    if (mps & (mps - 1)) {
1196
        av_log(avctx, AV_LOG_ERROR,
1197
               "there should be an integer power of two MBs per slice\n");
1198
        return AVERROR(EINVAL);
1199
    }
1200
4
    if (ctx->profile == PRORES_PROFILE_AUTO) {
1201
        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1202
        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1203
                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1204
                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1205
        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1206
               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1207
               ? "4:4:4:4 profile because of the used input colorspace"
1208
               : "HQ profile to keep best quality");
1209
    }
1210
4
    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1211
        if (ctx->profile != PRORES_PROFILE_4444 &&
1212
            ctx->profile != PRORES_PROFILE_4444XQ) {
1213
            // force alpha and warn
1214
            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1215
                   "encode alpha. Override with -profile if needed.\n");
1216
            ctx->alpha_bits = 0;
1217
        }
1218
        if (ctx->alpha_bits & 7) {
1219
            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1220
            return AVERROR(EINVAL);
1221
        }
1222
        avctx->bits_per_coded_sample = 32;
1223
    } else {
1224
4
        ctx->alpha_bits = 0;
1225
    }
1226
1227
8
    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1228
                         ? CFACTOR_Y422
1229
4
                         : CFACTOR_Y444;
1230
4
    ctx->profile_info  = prores_profile_info + ctx->profile;
1231
4
    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1232
1233
4
    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1234
1235
4
    if (interlaced)
1236
        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1237
    else
1238
4
        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1239
1240
4
    ctx->slices_width  = ctx->mb_width / mps;
1241
4
    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1242
4
    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1243
4
    ctx->pictures_per_frame = 1 + interlaced;
1244
1245
4
    if (ctx->quant_sel == -1) {
1246
4
        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1247
4
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1248
    } else {
1249
        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1250
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1251
    }
1252
1253
4
    if (strlen(ctx->vendor) != 4) {
1254
        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1255
        return AVERROR_INVALIDDATA;
1256
    }
1257
1258
4
    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1259
4
    if (!ctx->force_quant) {
1260
4
        if (!ctx->bits_per_mb) {
1261
4
            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1262
4
                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1263
4
                                           ctx->pictures_per_frame)
1264
4
                    break;
1265
4
            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1266
4
            if (ctx->alpha_bits)
1267
                ctx->bits_per_mb *= 20;
1268
        } else if (ctx->bits_per_mb < 128) {
1269
            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1270
            return AVERROR_INVALIDDATA;
1271
        }
1272
1273
4
        min_quant = ctx->profile_info->min_quant;
1274
4
        max_quant = ctx->profile_info->max_quant;
1275
64
        for (i = min_quant; i < MAX_STORED_Q; i++) {
1276
3900
            for (j = 0; j < 64; j++) {
1277
3840
                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1278
3840
                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1279
            }
1280
        }
1281
1282
4
        ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1283
4
        if (!ctx->slice_q) {
1284
            encode_close(avctx);
1285
            return AVERROR(ENOMEM);
1286
        }
1287
1288
4
        ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1289
4
        if (!ctx->tdata) {
1290
            encode_close(avctx);
1291
            return AVERROR(ENOMEM);
1292
        }
1293
1294
8
        for (j = 0; j < avctx->thread_count; j++) {
1295
8
            ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1296
4
                                            * TRELLIS_WIDTH
1297
                                            * sizeof(*ctx->tdata->nodes));
1298
4
            if (!ctx->tdata[j].nodes) {
1299
                encode_close(avctx);
1300
                return AVERROR(ENOMEM);
1301
            }
1302
32
            for (i = min_quant; i < max_quant + 2; i++) {
1303
28
                ctx->tdata[j].nodes[i].prev_node = -1;
1304
28
                ctx->tdata[j].nodes[i].bits      = 0;
1305
28
                ctx->tdata[j].nodes[i].score     = 0;
1306
            }
1307
        }
1308
    } else {
1309
        int ls = 0;
1310
        int ls_chroma = 0;
1311
1312
        if (ctx->force_quant > 64) {
1313
            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1314
            return AVERROR_INVALIDDATA;
1315
        }
1316
1317
        for (j = 0; j < 64; j++) {
1318
            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1319
            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1320
            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1321
            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1322
        }
1323
1324
        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1325
        if (ctx->chroma_factor == CFACTOR_Y444)
1326
            ctx->bits_per_mb += ls_chroma * 4;
1327
    }
1328
1329
4
    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1330
4
                                   ctx->slices_per_picture + 1) *
1331
4
                                  (2 + 2 * ctx->num_planes +
1332
4
                                   (mps * ctx->bits_per_mb) / 8)
1333
4
                                  + 200;
1334
1335
4
    if (ctx->alpha_bits) {
1336
         // The alpha plane is run-coded and might exceed the bit budget.
1337
         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1338
                                         ctx->slices_per_picture + 1) *
1339
         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1340
         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1341
    }
1342
1343
4
    avctx->codec_tag   = ctx->profile_info->tag;
1344
1345
4
    av_log(avctx, AV_LOG_DEBUG,
1346
           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1347
4
           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1348
           interlaced ? "yes" : "no", ctx->bits_per_mb);
1349
4
    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1350
           ctx->frame_size_upper_bound);
1351
1352
4
    return 0;
1353
}
1354
1355
#define OFFSET(x) offsetof(ProresContext, x)
1356
#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1357
1358
static const AVOption options[] = {
1359
    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1360
        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1361
    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1362
        { .i64 = PRORES_PROFILE_AUTO },
1363
        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1364
    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1365
        0, 0, VE, "profile" },
1366
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1367
        0, 0, VE, "profile" },
1368
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1369
        0, 0, VE, "profile" },
1370
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1371
        0, 0, VE, "profile" },
1372
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1373
        0, 0, VE, "profile" },
1374
    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1375
        0, 0, VE, "profile" },
1376
    { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1377
        0, 0, VE, "profile" },
1378
    { "vendor", "vendor ID", OFFSET(vendor),
1379
        AV_OPT_TYPE_STRING, { .str = "Lavc" }, 0, 0, VE },
1380
    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1381
        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1382
    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1383
        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1384
    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1385
        0, 0, VE, "quant_mat" },
1386
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1387
        0, 0, VE, "quant_mat" },
1388
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1389
        0, 0, VE, "quant_mat" },
1390
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1391
        0, 0, VE, "quant_mat" },
1392
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1393
        0, 0, VE, "quant_mat" },
1394
    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1395
        0, 0, VE, "quant_mat" },
1396
    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1397
        { .i64 = 16 }, 0, 16, VE },
1398
    { NULL }
1399
};
1400
1401
static const AVClass proresenc_class = {
1402
    .class_name = "ProRes encoder",
1403
    .item_name  = av_default_item_name,
1404
    .option     = options,
1405
    .version    = LIBAVUTIL_VERSION_INT,
1406
};
1407
1408
AVCodec ff_prores_ks_encoder = {
1409
    .name           = "prores_ks",
1410
    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1411
    .type           = AVMEDIA_TYPE_VIDEO,
1412
    .id             = AV_CODEC_ID_PRORES,
1413
    .priv_data_size = sizeof(ProresContext),
1414
    .init           = encode_init,
1415
    .close          = encode_close,
1416
    .encode2        = encode_frame,
1417
    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
1418
    .pix_fmts       = (const enum AVPixelFormat[]) {
1419
                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1420
                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1421
                      },
1422
    .priv_class     = &proresenc_class,
1423
    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1424
};