GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/proresenc_kostya.c Lines: 473 665 71.1 %
Date: 2019-11-22 03:34:36 Branches: 181 304 59.5 %

Line Branch Exec Source
1
/*
2
 * Apple ProRes encoder
3
 *
4
 * Copyright (c) 2012 Konstantin Shishkov
5
 *
6
 * This encoder appears to be based on Anatoliy Wassermans considering
7
 * similarities in the bugs.
8
 *
9
 * This file is part of FFmpeg.
10
 *
11
 * FFmpeg is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2.1 of the License, or (at your option) any later version.
15
 *
16
 * FFmpeg is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with FFmpeg; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
 */
25
26
#include "libavutil/opt.h"
27
#include "libavutil/pixdesc.h"
28
#include "avcodec.h"
29
#include "fdctdsp.h"
30
#include "put_bits.h"
31
#include "profiles.h"
32
#include "bytestream.h"
33
#include "internal.h"
34
#include "proresdata.h"
35
36
#define CFACTOR_Y422 2
37
#define CFACTOR_Y444 3
38
39
#define MAX_MBS_PER_SLICE 8
40
41
#define MAX_PLANES 4
42
43
enum {
44
    PRORES_PROFILE_AUTO  = -1,
45
    PRORES_PROFILE_PROXY = 0,
46
    PRORES_PROFILE_LT,
47
    PRORES_PROFILE_STANDARD,
48
    PRORES_PROFILE_HQ,
49
    PRORES_PROFILE_4444,
50
    PRORES_PROFILE_4444XQ,
51
};
52
53
enum {
54
    QUANT_MAT_PROXY = 0,
55
    QUANT_MAT_PROXY_CHROMA,
56
    QUANT_MAT_LT,
57
    QUANT_MAT_STANDARD,
58
    QUANT_MAT_HQ,
59
    QUANT_MAT_XQ_LUMA,
60
    QUANT_MAT_DEFAULT,
61
};
62
63
static const uint8_t prores_quant_matrices[][64] = {
64
    { // proxy
65
         4,  7,  9, 11, 13, 14, 15, 63,
66
         7,  7, 11, 12, 14, 15, 63, 63,
67
         9, 11, 13, 14, 15, 63, 63, 63,
68
        11, 11, 13, 14, 63, 63, 63, 63,
69
        11, 13, 14, 63, 63, 63, 63, 63,
70
        13, 14, 63, 63, 63, 63, 63, 63,
71
        13, 63, 63, 63, 63, 63, 63, 63,
72
        63, 63, 63, 63, 63, 63, 63, 63,
73
    },
74
    { // proxy chromas
75
        4,  7,  9, 11, 13, 14, 63, 63,
76
        7,  7, 11, 12, 14, 63, 63, 63,
77
        9, 11, 13, 14, 63, 63, 63, 63,
78
        11, 11, 13, 14, 63, 63, 63, 63,
79
        11, 13, 14, 63, 63, 63, 63, 63,
80
        13, 14, 63, 63, 63, 63, 63, 63,
81
        13, 63, 63, 63, 63, 63, 63, 63,
82
        63, 63, 63, 63, 63, 63, 63, 63
83
    },
84
    { // LT
85
         4,  5,  6,  7,  9, 11, 13, 15,
86
         5,  5,  7,  8, 11, 13, 15, 17,
87
         6,  7,  9, 11, 13, 15, 15, 17,
88
         7,  7,  9, 11, 13, 15, 17, 19,
89
         7,  9, 11, 13, 14, 16, 19, 23,
90
         9, 11, 13, 14, 16, 19, 23, 29,
91
         9, 11, 13, 15, 17, 21, 28, 35,
92
        11, 13, 16, 17, 21, 28, 35, 41,
93
    },
94
    { // standard
95
         4,  4,  5,  5,  6,  7,  7,  9,
96
         4,  4,  5,  6,  7,  7,  9,  9,
97
         5,  5,  6,  7,  7,  9,  9, 10,
98
         5,  5,  6,  7,  7,  9,  9, 10,
99
         5,  6,  7,  7,  8,  9, 10, 12,
100
         6,  7,  7,  8,  9, 10, 12, 15,
101
         6,  7,  7,  9, 10, 11, 14, 17,
102
         7,  7,  9, 10, 11, 14, 17, 21,
103
    },
104
    { // high quality
105
         4,  4,  4,  4,  4,  4,  4,  4,
106
         4,  4,  4,  4,  4,  4,  4,  4,
107
         4,  4,  4,  4,  4,  4,  4,  4,
108
         4,  4,  4,  4,  4,  4,  4,  5,
109
         4,  4,  4,  4,  4,  4,  5,  5,
110
         4,  4,  4,  4,  4,  5,  5,  6,
111
         4,  4,  4,  4,  5,  5,  6,  7,
112
         4,  4,  4,  4,  5,  6,  7,  7,
113
    },
114
    { // XQ luma
115
        2,  2,  2,  2,  2,  2,  2,  2,
116
        2,  2,  2,  2,  2,  2,  2,  2,
117
        2,  2,  2,  2,  2,  2,  2,  2,
118
        2,  2,  2,  2,  2,  2,  2,  3,
119
        2,  2,  2,  2,  2,  2,  3,  3,
120
        2,  2,  2,  2,  2,  3,  3,  3,
121
        2,  2,  2,  2,  3,  3,  3,  4,
122
        2,  2,  2,  2,  3,  3,  4,  4,
123
    },
124
    { // codec default
125
         4,  4,  4,  4,  4,  4,  4,  4,
126
         4,  4,  4,  4,  4,  4,  4,  4,
127
         4,  4,  4,  4,  4,  4,  4,  4,
128
         4,  4,  4,  4,  4,  4,  4,  4,
129
         4,  4,  4,  4,  4,  4,  4,  4,
130
         4,  4,  4,  4,  4,  4,  4,  4,
131
         4,  4,  4,  4,  4,  4,  4,  4,
132
         4,  4,  4,  4,  4,  4,  4,  4,
133
    },
134
};
135
136
#define NUM_MB_LIMITS 4
137
static const int prores_mb_limits[NUM_MB_LIMITS] = {
138
    1620, // up to 720x576
139
    2700, // up to 960x720
140
    6075, // up to 1440x1080
141
    9216, // up to 2048x1152
142
};
143
144
static const struct prores_profile {
145
    const char *full_name;
146
    uint32_t    tag;
147
    int         min_quant;
148
    int         max_quant;
149
    int         br_tab[NUM_MB_LIMITS];
150
    int         quant;
151
    int         quant_chroma;
152
} prores_profile_info[6] = {
153
    {
154
        .full_name = "proxy",
155
        .tag       = MKTAG('a', 'p', 'c', 'o'),
156
        .min_quant = 4,
157
        .max_quant = 8,
158
        .br_tab    = { 300, 242, 220, 194 },
159
        .quant     = QUANT_MAT_PROXY,
160
        .quant_chroma = QUANT_MAT_PROXY_CHROMA,
161
    },
162
    {
163
        .full_name = "LT",
164
        .tag       = MKTAG('a', 'p', 'c', 's'),
165
        .min_quant = 1,
166
        .max_quant = 9,
167
        .br_tab    = { 720, 560, 490, 440 },
168
        .quant     = QUANT_MAT_LT,
169
        .quant_chroma = QUANT_MAT_LT,
170
    },
171
    {
172
        .full_name = "standard",
173
        .tag       = MKTAG('a', 'p', 'c', 'n'),
174
        .min_quant = 1,
175
        .max_quant = 6,
176
        .br_tab    = { 1050, 808, 710, 632 },
177
        .quant     = QUANT_MAT_STANDARD,
178
        .quant_chroma = QUANT_MAT_STANDARD,
179
    },
180
    {
181
        .full_name = "high quality",
182
        .tag       = MKTAG('a', 'p', 'c', 'h'),
183
        .min_quant = 1,
184
        .max_quant = 6,
185
        .br_tab    = { 1566, 1216, 1070, 950 },
186
        .quant     = QUANT_MAT_HQ,
187
        .quant_chroma = QUANT_MAT_HQ,
188
    },
189
    {
190
        .full_name = "4444",
191
        .tag       = MKTAG('a', 'p', '4', 'h'),
192
        .min_quant = 1,
193
        .max_quant = 6,
194
        .br_tab    = { 2350, 1828, 1600, 1425 },
195
        .quant     = QUANT_MAT_HQ,
196
        .quant_chroma = QUANT_MAT_HQ,
197
    },
198
    {
199
        .full_name = "4444XQ",
200
        .tag       = MKTAG('a', 'p', '4', 'x'),
201
        .min_quant = 1,
202
        .max_quant = 6,
203
        .br_tab    = { 3525, 2742, 2400, 2137 },
204
        .quant     = QUANT_MAT_HQ, /* Fix me : use QUANT_MAT_XQ_LUMA */
205
        .quant_chroma = QUANT_MAT_HQ,
206
    }
207
};
208
209
#define TRELLIS_WIDTH 16
210
#define SCORE_LIMIT   INT_MAX / 2
211
212
struct TrellisNode {
213
    int prev_node;
214
    int quant;
215
    int bits;
216
    int score;
217
};
218
219
#define MAX_STORED_Q 16
220
221
typedef struct ProresThreadData {
222
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
223
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
224
    int16_t custom_q[64];
225
    int16_t custom_chroma_q[64];
226
    struct TrellisNode *nodes;
227
} ProresThreadData;
228
229
typedef struct ProresContext {
230
    AVClass *class;
231
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
232
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
233
    int16_t quants[MAX_STORED_Q][64];
234
    int16_t quants_chroma[MAX_STORED_Q][64];
235
    int16_t custom_q[64];
236
    int16_t custom_chroma_q[64];
237
    const uint8_t *quant_mat;
238
    const uint8_t *quant_chroma_mat;
239
    const uint8_t *scantable;
240
241
    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
242
                 ptrdiff_t linesize, int16_t *block);
243
    FDCTDSPContext fdsp;
244
245
    const AVFrame *pic;
246
    int mb_width, mb_height;
247
    int mbs_per_slice;
248
    int num_chroma_blocks, chroma_factor;
249
    int slices_width;
250
    int slices_per_picture;
251
    int pictures_per_frame; // 1 for progressive, 2 for interlaced
252
    int cur_picture_idx;
253
    int num_planes;
254
    int bits_per_mb;
255
    int force_quant;
256
    int alpha_bits;
257
    int warn;
258
259
    char *vendor;
260
    int quant_sel;
261
262
    int frame_size_upper_bound;
263
264
    int profile;
265
    const struct prores_profile *profile_info;
266
267
    int *slice_q;
268
269
    ProresThreadData *tdata;
270
} ProresContext;
271
272
66600
static void get_slice_data(ProresContext *ctx, const uint16_t *src,
273
                           ptrdiff_t linesize, int x, int y, int w, int h,
274
                           int16_t *blocks, uint16_t *emu_buf,
275
                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
276
{
277
    const uint16_t *esrc;
278
66600
    const int mb_width = 4 * blocks_per_mb;
279
    ptrdiff_t elinesize;
280
    int i, j, k;
281
282
425700
    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
283
359100
        if (x >= w) {
284
            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
285
                              * sizeof(*blocks));
286
            return;
287
        }
288

359100
        if (x + mb_width <= w && y + 16 <= h) {
289
357600
            esrc      = src;
290
357600
            elinesize = linesize;
291
        } else {
292
            int bw, bh, pix;
293
294
1500
            esrc      = emu_buf;
295
1500
            elinesize = 16 * sizeof(*emu_buf);
296
297
1500
            bw = FFMIN(w - x, mb_width);
298
1500
            bh = FFMIN(h - y, 16);
299
300
12900
            for (j = 0; j < bh; j++) {
301
11400
                memcpy(emu_buf + j * 16,
302
11400
                       (const uint8_t*)src + j * linesize,
303
                       bw * sizeof(*src));
304
11400
                pix = emu_buf[j * 16 + bw - 1];
305
106600
                for (k = bw; k < mb_width; k++)
306
95200
                    emu_buf[j * 16 + k] = pix;
307
            }
308
14100
            for (; j < 16; j++)
309
12600
                memcpy(emu_buf + j * 16,
310
12600
                       emu_buf + (bh - 1) * 16,
311
                       mb_width * sizeof(*emu_buf));
312
        }
313
359100
        if (!is_chroma) {
314
119700
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
315
119700
            blocks += 64;
316
119700
            if (blocks_per_mb > 2) {
317
119700
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
318
119700
                blocks += 64;
319
            }
320
119700
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
321
119700
            blocks += 64;
322
119700
            if (blocks_per_mb > 2) {
323
119700
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
324
119700
                blocks += 64;
325
            }
326
        } else {
327
239400
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
328
239400
            blocks += 64;
329
239400
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
330
239400
            blocks += 64;
331
239400
            if (blocks_per_mb > 2) {
332
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
333
                blocks += 64;
334
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
335
                blocks += 64;
336
            }
337
        }
338
339
359100
        x += mb_width;
340
    }
341
}
342
343
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
344
                           ptrdiff_t linesize, int x, int y, int w, int h,
345
                           int16_t *blocks, int mbs_per_slice, int abits)
346
{
347
    const int slice_width = 16 * mbs_per_slice;
348
    int i, j, copy_w, copy_h;
349
350
    copy_w = FFMIN(w - x, slice_width);
351
    copy_h = FFMIN(h - y, 16);
352
    for (i = 0; i < copy_h; i++) {
353
        memcpy(blocks, src, copy_w * sizeof(*src));
354
        if (abits == 8)
355
            for (j = 0; j < copy_w; j++)
356
                blocks[j] >>= 2;
357
        else
358
            for (j = 0; j < copy_w; j++)
359
                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
360
        for (j = copy_w; j < slice_width; j++)
361
            blocks[j] = blocks[copy_w - 1];
362
        blocks += slice_width;
363
        src    += linesize >> 1;
364
    }
365
    for (; i < 16; i++) {
366
        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
367
        blocks += slice_width;
368
    }
369
}
370
371
/**
372
 * Write an unsigned rice/exp golomb codeword.
373
 */
374
26885174
static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
375
{
376
    unsigned int rice_order, exp_order, switch_bits, switch_val;
377
    int exponent;
378
379
    /* number of prefix bits to switch between Rice and expGolomb */
380
26885174
    switch_bits = (codebook & 3) + 1;
381
26885174
    rice_order  =  codebook >> 5;       /* rice code order */
382
26885174
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
383
384
26885174
    switch_val  = switch_bits << rice_order;
385
386
26885174
    if (val >= switch_val) {
387
8074309
        val -= switch_val - (1 << exp_order);
388
8074309
        exponent = av_log2(val);
389
390
8074309
        put_bits(pb, exponent - exp_order + switch_bits, 0);
391
8074309
        put_bits(pb, exponent + 1, val);
392
    } else {
393
18810865
        exponent = val >> rice_order;
394
395
18810865
        if (exponent)
396
4838901
            put_bits(pb, exponent, 0);
397
18810865
        put_bits(pb, 1, 1);
398
18810865
        if (rice_order)
399
1904515
            put_sbits(pb, rice_order, val);
400
    }
401
26885174
}
402
403
#define GET_SIGN(x)  ((x) >> 31)
404
#define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
405
406
33300
static void encode_dcs(PutBitContext *pb, int16_t *blocks,
407
                       int blocks_per_slice, int scale)
408
{
409
    int i;
410
33300
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
411
412
33300
    prev_dc = (blocks[0] - 0x4000) / scale;
413
33300
    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
414
33300
    sign     = 0;
415
33300
    codebook = 3;
416
33300
    blocks  += 64;
417
418
478800
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
419
445500
        dc       = (blocks[0] - 0x4000) / scale;
420
445500
        delta    = dc - prev_dc;
421
445500
        new_sign = GET_SIGN(delta);
422
445500
        delta    = (delta ^ sign) - sign;
423
445500
        code     = MAKE_CODE(delta);
424
445500
        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
425
445500
        codebook = (code + (code & 1)) >> 1;
426
445500
        codebook = FFMIN(codebook, 3);
427
445500
        sign     = new_sign;
428
445500
        prev_dc  = dc;
429
    }
430
33300
}
431
432
33300
static void encode_acs(PutBitContext *pb, int16_t *blocks,
433
                       int blocks_per_slice,
434
                       int plane_size_factor,
435
                       const uint8_t *scan, const int16_t *qmat)
436
{
437
    int idx, i;
438
    int run, level, run_cb, lev_cb;
439
    int max_coeffs, abs_level;
440
441
33300
    max_coeffs = blocks_per_slice << 6;
442
33300
    run_cb     = ff_prores_run_to_cb_index[4];
443
33300
    lev_cb     = ff_prores_lev_to_cb_index[2];
444
33300
    run        = 0;
445
446
2131200
    for (i = 1; i < 64; i++) {
447
32262300
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
448
30164400
            level = blocks[idx] / qmat[scan[i]];
449
30164400
            if (level) {
450
13203187
                abs_level = FFABS(level);
451
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
452
13203187
                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
453
                                    abs_level - 1);
454
13203187
                put_sbits(pb, 1, GET_SIGN(level));
455
456
13203187
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
457
13203187
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
458
13203187
                run    = 0;
459
            } else {
460
16961213
                run++;
461
            }
462
        }
463
    }
464
33300
}
465
466
33300
static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
467
                              const uint16_t *src, ptrdiff_t linesize,
468
                              int mbs_per_slice, int16_t *blocks,
469
                              int blocks_per_mb, int plane_size_factor,
470
                              const int16_t *qmat)
471
{
472
    int blocks_per_slice, saved_pos;
473
474
33300
    saved_pos = put_bits_count(pb);
475
33300
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
476
477
33300
    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
478
33300
    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
479
               ctx->scantable, qmat);
480
33300
    flush_put_bits(pb);
481
482
33300
    return (put_bits_count(pb) - saved_pos) >> 3;
483
}
484
485
static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
486
{
487
    const int dbits = (abits == 8) ? 4 : 7;
488
    const int dsize = 1 << dbits - 1;
489
    int diff = cur - prev;
490
491
    diff = av_mod_uintp2(diff, abits);
492
    if (diff >= (1 << abits) - dsize)
493
        diff -= 1 << abits;
494
    if (diff < -dsize || diff > dsize || !diff) {
495
        put_bits(pb, 1, 1);
496
        put_bits(pb, abits, diff);
497
    } else {
498
        put_bits(pb, 1, 0);
499
        put_bits(pb, dbits - 1, FFABS(diff) - 1);
500
        put_bits(pb, 1, diff < 0);
501
    }
502
}
503
504
static void put_alpha_run(PutBitContext *pb, int run)
505
{
506
    if (run) {
507
        put_bits(pb, 1, 0);
508
        if (run < 0x10)
509
            put_bits(pb, 4, run);
510
        else
511
            put_bits(pb, 15, run);
512
    } else {
513
        put_bits(pb, 1, 1);
514
    }
515
}
516
517
// todo alpha quantisation for high quants
518
static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
519
                              int mbs_per_slice, uint16_t *blocks,
520
                              int quant)
521
{
522
    const int abits = ctx->alpha_bits;
523
    const int mask  = (1 << abits) - 1;
524
    const int num_coeffs = mbs_per_slice * 256;
525
    int saved_pos = put_bits_count(pb);
526
    int prev = mask, cur;
527
    int idx = 0;
528
    int run = 0;
529
530
    cur = blocks[idx++];
531
    put_alpha_diff(pb, cur, prev, abits);
532
    prev = cur;
533
    do {
534
        cur = blocks[idx++];
535
        if (cur != prev) {
536
            put_alpha_run (pb, run);
537
            put_alpha_diff(pb, cur, prev, abits);
538
            prev = cur;
539
            run  = 0;
540
        } else {
541
            run++;
542
        }
543
    } while (idx < num_coeffs);
544
    if (run)
545
        put_alpha_run(pb, run);
546
    flush_put_bits(pb);
547
    return (put_bits_count(pb) - saved_pos) >> 3;
548
}
549
550
11100
static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
551
                        PutBitContext *pb,
552
                        int sizes[4], int x, int y, int quant,
553
                        int mbs_per_slice)
554
{
555
11100
    ProresContext *ctx = avctx->priv_data;
556
    int i, xp, yp;
557
11100
    int total_size = 0;
558
    const uint16_t *src;
559
11100
    int slice_width_factor = av_log2(mbs_per_slice);
560
    int num_cblocks, pwidth, line_add;
561
    ptrdiff_t linesize;
562
    int plane_factor, is_chroma;
563
    uint16_t *qmat;
564
    uint16_t *qmat_chroma;
565
566
11100
    if (ctx->pictures_per_frame == 1)
567
11100
        line_add = 0;
568
    else
569
        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
570
571
11100
    if (ctx->force_quant) {
572
        qmat = ctx->quants[0];
573
        qmat_chroma = ctx->quants_chroma[0];
574
11100
    } else if (quant < MAX_STORED_Q) {
575
8353
        qmat = ctx->quants[quant];
576
8353
        qmat_chroma = ctx->quants_chroma[quant];
577
    } else {
578
2747
        qmat = ctx->custom_q;
579
2747
        qmat_chroma = ctx->custom_chroma_q;
580
178555
        for (i = 0; i < 64; i++) {
581
175808
            qmat[i] = ctx->quant_mat[i] * quant;
582
175808
            qmat_chroma[i] = ctx->quant_chroma_mat[i] * quant;
583
        }
584
    }
585
586
44400
    for (i = 0; i < ctx->num_planes; i++) {
587

33300
        is_chroma    = (i == 1 || i == 2);
588
33300
        plane_factor = slice_width_factor + 2;
589
33300
        if (is_chroma)
590
22200
            plane_factor += ctx->chroma_factor - 3;
591

33300
        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
592
11100
            xp          = x << 4;
593
11100
            yp          = y << 4;
594
11100
            num_cblocks = 4;
595
11100
            pwidth      = avctx->width;
596
        } else {
597
22200
            xp          = x << 3;
598
22200
            yp          = y << 4;
599
22200
            num_cblocks = 2;
600
22200
            pwidth      = avctx->width >> 1;
601
        }
602
603
33300
        linesize = pic->linesize[i] * ctx->pictures_per_frame;
604
33300
        src = (const uint16_t*)(pic->data[i] + yp * linesize +
605
33300
                                line_add * pic->linesize[i]) + xp;
606
607
33300
        if (i < 3) {
608
33300
            get_slice_data(ctx, src, linesize, xp, yp,
609
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
610
33300
                           ctx->blocks[0], ctx->emu_buf,
611
                           mbs_per_slice, num_cblocks, is_chroma);
612
33300
            if (!is_chroma) {/* luma quant */
613
11100
                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
614
11100
                                              mbs_per_slice, ctx->blocks[0],
615
                                              num_cblocks, plane_factor,
616
                                              qmat);
617
            } else { /* chroma plane */
618
22200
                sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
619
22200
                                              mbs_per_slice, ctx->blocks[0],
620
                                              num_cblocks, plane_factor,
621
                                              qmat_chroma);
622
            }
623
        } else {
624
            get_alpha_data(ctx, src, linesize, xp, yp,
625
                           pwidth, avctx->height / ctx->pictures_per_frame,
626
                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
627
            sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
628
                                          ctx->blocks[0], quant);
629
        }
630
33300
        total_size += sizes[i];
631
33300
        if (put_bits_left(pb) < 0) {
632
            av_log(avctx, AV_LOG_ERROR,
633
                   "Underestimated required buffer size.\n");
634
            return AVERROR_BUG;
635
        }
636
    }
637
11100
    return total_size;
638
}
639
640
390320122
static inline int estimate_vlc(unsigned codebook, int val)
641
{
642
    unsigned int rice_order, exp_order, switch_bits, switch_val;
643
    int exponent;
644
645
    /* number of prefix bits to switch between Rice and expGolomb */
646
390320122
    switch_bits = (codebook & 3) + 1;
647
390320122
    rice_order  =  codebook >> 5;       /* rice code order */
648
390320122
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
649
650
390320122
    switch_val  = switch_bits << rice_order;
651
652
390320122
    if (val >= switch_val) {
653
135366530
        val -= switch_val - (1 << exp_order);
654
135366530
        exponent = av_log2(val);
655
656
135366530
        return exponent * 2 - exp_order + switch_bits + 1;
657
    } else {
658
254953592
        return (val >> rice_order) + rice_order + 1;
659
    }
660
}
661
662
379686
static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
663
                        int scale)
664
{
665
    int i;
666
379686
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
667
    int bits;
668
669
379686
    prev_dc  = (blocks[0] - 0x4000) / scale;
670
379686
    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
671
379686
    sign     = 0;
672
379686
    codebook = 3;
673
379686
    blocks  += 64;
674
379686
    *error  += FFABS(blocks[0] - 0x4000) % scale;
675
676
5449672
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
677
5069986
        dc       = (blocks[0] - 0x4000) / scale;
678
5069986
        *error  += FFABS(blocks[0] - 0x4000) % scale;
679
5069986
        delta    = dc - prev_dc;
680
5069986
        new_sign = GET_SIGN(delta);
681
5069986
        delta    = (delta ^ sign) - sign;
682
5069986
        code     = MAKE_CODE(delta);
683
5069986
        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
684
5069986
        codebook = (code + (code & 1)) >> 1;
685
5069986
        codebook = FFMIN(codebook, 3);
686
5069986
        sign     = new_sign;
687
5069986
        prev_dc  = dc;
688
    }
689
690
379686
    return bits;
691
}
692
693
379686
static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
694
                        int plane_size_factor,
695
                        const uint8_t *scan, const int16_t *qmat)
696
{
697
    int idx, i;
698
    int run, level, run_cb, lev_cb;
699
    int max_coeffs, abs_level;
700
379686
    int bits = 0;
701
702
379686
    max_coeffs = blocks_per_slice << 6;
703
379686
    run_cb     = ff_prores_run_to_cb_index[4];
704
379686
    lev_cb     = ff_prores_lev_to_cb_index[2];
705
379686
    run        = 0;
706
707
24299904
    for (i = 1; i < 64; i++) {
708
367249554
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
709
343329336
            level   = blocks[idx] / qmat[scan[i]];
710
343329336
            *error += FFABS(blocks[idx]) % qmat[scan[i]];
711
343329336
            if (level) {
712
192435225
                abs_level = FFABS(level);
713
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
714
192435225
                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
715
192435225
                                     abs_level - 1) + 1;
716
717
192435225
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
718
192435225
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
719
192435225
                run    = 0;
720
            } else {
721
150894111
                run++;
722
            }
723
        }
724
    }
725
726
379686
    return bits;
727
}
728
729
379686
static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
730
                                const uint16_t *src, ptrdiff_t linesize,
731
                                int mbs_per_slice,
732
                                int blocks_per_mb, int plane_size_factor,
733
                                const int16_t *qmat, ProresThreadData *td)
734
{
735
    int blocks_per_slice;
736
    int bits;
737
738
379686
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
739
740
379686
    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
741
379686
    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
742
                         plane_size_factor, ctx->scantable, qmat);
743
744
379686
    return FFALIGN(bits, 8);
745
}
746
747
static int est_alpha_diff(int cur, int prev, int abits)
748
{
749
    const int dbits = (abits == 8) ? 4 : 7;
750
    const int dsize = 1 << dbits - 1;
751
    int diff = cur - prev;
752
753
    diff = av_mod_uintp2(diff, abits);
754
    if (diff >= (1 << abits) - dsize)
755
        diff -= 1 << abits;
756
    if (diff < -dsize || diff > dsize || !diff)
757
        return abits + 1;
758
    else
759
        return dbits + 1;
760
}
761
762
static int estimate_alpha_plane(ProresContext *ctx,
763
                                const uint16_t *src, ptrdiff_t linesize,
764
                                int mbs_per_slice, int16_t *blocks)
765
{
766
    const int abits = ctx->alpha_bits;
767
    const int mask  = (1 << abits) - 1;
768
    const int num_coeffs = mbs_per_slice * 256;
769
    int prev = mask, cur;
770
    int idx = 0;
771
    int run = 0;
772
    int bits;
773
774
    cur = blocks[idx++];
775
    bits = est_alpha_diff(cur, prev, abits);
776
    prev = cur;
777
    do {
778
        cur = blocks[idx++];
779
        if (cur != prev) {
780
            if (!run)
781
                bits++;
782
            else if (run < 0x10)
783
                bits += 4;
784
            else
785
                bits += 15;
786
            bits += est_alpha_diff(cur, prev, abits);
787
            prev = cur;
788
            run  = 0;
789
        } else {
790
            run++;
791
        }
792
    } while (idx < num_coeffs);
793
794
    if (run) {
795
        if (run < 0x10)
796
            bits += 4;
797
        else
798
            bits += 15;
799
    }
800
801
    return bits;
802
}
803
804
11100
static int find_slice_quant(AVCodecContext *avctx,
805
                            int trellis_node, int x, int y, int mbs_per_slice,
806
                            ProresThreadData *td)
807
{
808
11100
    ProresContext *ctx = avctx->priv_data;
809
    int i, q, pq, xp, yp;
810
    const uint16_t *src;
811
11100
    int slice_width_factor = av_log2(mbs_per_slice);
812
    int num_cblocks[MAX_PLANES], pwidth;
813
    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
814
11100
    const int min_quant = ctx->profile_info->min_quant;
815
11100
    const int max_quant = ctx->profile_info->max_quant;
816
    int error, bits, bits_limit;
817
    int mbs, prev, cur, new_score;
818
    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
819
    int overquant;
820
    uint16_t *qmat;
821
    uint16_t *qmat_chroma;
822
    int linesize[4], line_add;
823
11100
    int alpha_bits = 0;
824
825
11100
    if (ctx->pictures_per_frame == 1)
826
11100
        line_add = 0;
827
    else
828
        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
829
11100
    mbs = x + mbs_per_slice;
830
831
44400
    for (i = 0; i < ctx->num_planes; i++) {
832

33300
        is_chroma[i]    = (i == 1 || i == 2);
833
33300
        plane_factor[i] = slice_width_factor + 2;
834
33300
        if (is_chroma[i])
835
22200
            plane_factor[i] += ctx->chroma_factor - 3;
836

33300
        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
837
11100
            xp             = x << 4;
838
11100
            yp             = y << 4;
839
11100
            num_cblocks[i] = 4;
840
11100
            pwidth         = avctx->width;
841
        } else {
842
22200
            xp             = x << 3;
843
22200
            yp             = y << 4;
844
22200
            num_cblocks[i] = 2;
845
22200
            pwidth         = avctx->width >> 1;
846
        }
847
848
33300
        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
849
33300
        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
850
33300
                                 line_add * ctx->pic->linesize[i]) + xp;
851
852
33300
        if (i < 3) {
853
33300
            get_slice_data(ctx, src, linesize[i], xp, yp,
854
33300
                           pwidth, avctx->height / ctx->pictures_per_frame,
855
33300
                           td->blocks[i], td->emu_buf,
856
                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
857
        } else {
858
            get_alpha_data(ctx, src, linesize[i], xp, yp,
859
                           pwidth, avctx->height / ctx->pictures_per_frame,
860
                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
861
        }
862
    }
863
864
88800
    for (q = min_quant; q < max_quant + 2; q++) {
865
77700
        td->nodes[trellis_node + q].prev_node = -1;
866
77700
        td->nodes[trellis_node + q].quant     = q;
867
    }
868
869
11100
    if (ctx->alpha_bits)
870
        alpha_bits = estimate_alpha_plane(ctx, src, linesize[3],
871
                                          mbs_per_slice, td->blocks[3]);
872
    // todo: maybe perform coarser quantising to fit into frame size when needed
873
77700
    for (q = min_quant; q <= max_quant; q++) {
874
66600
        bits  = alpha_bits;
875
66600
        error = 0;
876
133200
        bits += estimate_slice_plane(ctx, &error, 0,
877
66600
                                     src, linesize[0],
878
                                     mbs_per_slice,
879
                                     num_cblocks[0], plane_factor[0],
880
66600
                                     ctx->quants[q], td); /* estimate luma plane */
881
199800
        for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
882
133200
            bits += estimate_slice_plane(ctx, &error, i,
883
133200
                                         src, linesize[i],
884
                                         mbs_per_slice,
885
                                         num_cblocks[i], plane_factor[i],
886
133200
                                         ctx->quants_chroma[q], td);
887
        }
888
66600
        if (bits > 65000 * 8)
889
            error = SCORE_LIMIT;
890
891
66600
        slice_bits[q]  = bits;
892
66600
        slice_score[q] = error;
893
    }
894
11100
    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
895
3643
        slice_bits[max_quant + 1]  = slice_bits[max_quant];
896
3643
        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
897
3643
        overquant = max_quant;
898
    } else {
899
59962
        for (q = max_quant + 1; q < 128; q++) {
900
59962
            bits  = alpha_bits;
901
59962
            error = 0;
902
59962
            if (q < MAX_STORED_Q) {
903
44939
                qmat = ctx->quants[q];
904
44939
                qmat_chroma = ctx->quants_chroma[q];
905
            } else {
906
15023
                qmat = td->custom_q;
907
15023
                qmat_chroma = td->custom_chroma_q;
908
976495
                for (i = 0; i < 64; i++) {
909
961472
                    qmat[i] = ctx->quant_mat[i] * q;
910
961472
                    qmat_chroma[i] = ctx->quant_chroma_mat[i] * q;
911
                }
912
            }
913
119924
            bits += estimate_slice_plane(ctx, &error, 0,
914
59962
                                         src, linesize[0],
915
                                         mbs_per_slice,
916
                                         num_cblocks[0], plane_factor[0],
917
                                         qmat, td);/* estimate luma plane */
918
179886
            for (i = 1; i < ctx->num_planes - !!ctx->alpha_bits; i++) { /* estimate chroma plane */
919
119924
                bits += estimate_slice_plane(ctx, &error, i,
920
119924
                                             src, linesize[i],
921
                                             mbs_per_slice,
922
                                             num_cblocks[i], plane_factor[i],
923
                                             qmat_chroma, td);
924
            }
925
59962
            if (bits <= ctx->bits_per_mb * mbs_per_slice)
926
7457
                break;
927
        }
928
929
7457
        slice_bits[max_quant + 1]  = bits;
930
7457
        slice_score[max_quant + 1] = error;
931
7457
        overquant = q;
932
    }
933
11100
    td->nodes[trellis_node + max_quant + 1].quant = overquant;
934
935
11100
    bits_limit = mbs * ctx->bits_per_mb;
936
88800
    for (pq = min_quant; pq < max_quant + 2; pq++) {
937
77700
        prev = trellis_node - TRELLIS_WIDTH + pq;
938
939
621600
        for (q = min_quant; q < max_quant + 2; q++) {
940
543900
            cur = trellis_node + q;
941
942
543900
            bits  = td->nodes[prev].bits + slice_bits[q];
943
543900
            error = slice_score[q];
944
543900
            if (bits > bits_limit)
945
438632
                error = SCORE_LIMIT;
946
947

543900
            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
948
98480
                new_score = td->nodes[prev].score + error;
949
            else
950
445420
                new_score = SCORE_LIMIT;
951
543900
            if (td->nodes[cur].prev_node == -1 ||
952
466200
                td->nodes[cur].score >= new_score) {
953
954
505564
                td->nodes[cur].bits      = bits;
955
505564
                td->nodes[cur].score     = new_score;
956
505564
                td->nodes[cur].prev_node = prev;
957
            }
958
        }
959
    }
960
961
11100
    error = td->nodes[trellis_node + min_quant].score;
962
11100
    pq    = trellis_node + min_quant;
963
77700
    for (q = min_quant + 1; q < max_quant + 2; q++) {
964
66600
        if (td->nodes[trellis_node + q].score <= error) {
965
52033
            error = td->nodes[trellis_node + q].score;
966
52033
            pq    = trellis_node + q;
967
        }
968
    }
969
970
11100
    return pq;
971
}
972
973
2850
static int find_quant_thread(AVCodecContext *avctx, void *arg,
974
                             int jobnr, int threadnr)
975
{
976
2850
    ProresContext *ctx = avctx->priv_data;
977
2850
    ProresThreadData *td = ctx->tdata + threadnr;
978
2850
    int mbs_per_slice = ctx->mbs_per_slice;
979
2850
    int x, y = jobnr, mb, q = 0;
980
981
13950
    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
982
16950
        while (ctx->mb_width - x < mbs_per_slice)
983
5850
            mbs_per_slice >>= 1;
984
11100
        q = find_slice_quant(avctx,
985
11100
                             (mb + 1) * TRELLIS_WIDTH, x, y,
986
                             mbs_per_slice, td);
987
    }
988
989
13950
    for (x = ctx->slices_width - 1; x >= 0; x--) {
990
11100
        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
991
11100
        q = td->nodes[q].prev_node;
992
    }
993
994
2850
    return 0;
995
}
996
997
200
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
998
                        const AVFrame *pic, int *got_packet)
999
{
1000
200
    ProresContext *ctx = avctx->priv_data;
1001
    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
1002
    uint8_t *picture_size_pos;
1003
    PutBitContext pb;
1004
200
    int x, y, i, mb, q = 0;
1005
200
    int sizes[4] = { 0 };
1006
200
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
1007
    int frame_size, picture_size, slice_size;
1008
    int pkt_size, ret;
1009
200
    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
1010
    uint8_t frame_flags;
1011
1012
200
    ctx->pic = pic;
1013
200
    pkt_size = ctx->frame_size_upper_bound;
1014
1015
200
    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
1016
        return ret;
1017
1018
200
    orig_buf = pkt->data;
1019
1020
    // frame atom
1021
200
    orig_buf += 4;                              // frame size
1022
200
    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
1023
200
    buf = orig_buf;
1024
1025
    // frame header
1026
200
    tmp = buf;
1027
200
    buf += 2;                                   // frame header size will be stored here
1028
200
    bytestream_put_be16  (&buf, 0);             // version 1
1029
200
    bytestream_put_buffer(&buf, ctx->vendor, 4);
1030
200
    bytestream_put_be16  (&buf, avctx->width);
1031
200
    bytestream_put_be16  (&buf, avctx->height);
1032
1033
200
    frame_flags = ctx->chroma_factor << 6;
1034
200
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
1035
        frame_flags |= pic->top_field_first ? 0x04 : 0x08;
1036
200
    bytestream_put_byte  (&buf, frame_flags);
1037
1038
200
    bytestream_put_byte  (&buf, 0);             // reserved
1039
200
    bytestream_put_byte  (&buf, pic->color_primaries);
1040
200
    bytestream_put_byte  (&buf, pic->color_trc);
1041
200
    bytestream_put_byte  (&buf, pic->colorspace);
1042
200
    bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
1043
200
    bytestream_put_byte  (&buf, 0);             // reserved
1044
200
    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
1045
200
        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
1046
        // luma quantisation matrix
1047
13000
        for (i = 0; i < 64; i++)
1048
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1049
        // chroma quantisation matrix
1050
13000
        for (i = 0; i < 64; i++)
1051
12800
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
1052
    } else {
1053
        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
1054
    }
1055
200
    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
1056
1057
200
    for (ctx->cur_picture_idx = 0;
1058
400
         ctx->cur_picture_idx < ctx->pictures_per_frame;
1059
200
         ctx->cur_picture_idx++) {
1060
        // picture header
1061
200
        picture_size_pos = buf + 1;
1062
200
        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
1063
200
        buf += 4;                                   // picture data size will be stored here
1064
200
        bytestream_put_be16  (&buf, ctx->slices_per_picture);
1065
200
        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
1066
1067
        // seek table - will be filled during slice encoding
1068
200
        slice_sizes = buf;
1069
200
        buf += ctx->slices_per_picture * 2;
1070
1071
        // slices
1072
200
        if (!ctx->force_quant) {
1073
200
            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1074
                                  ctx->mb_height);
1075
200
            if (ret)
1076
                return ret;
1077
        }
1078
1079
3050
        for (y = 0; y < ctx->mb_height; y++) {
1080
2850
            int mbs_per_slice = ctx->mbs_per_slice;
1081
13950
            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1082
22200
                q = ctx->force_quant ? ctx->force_quant
1083
11100
                                     : ctx->slice_q[mb + y * ctx->slices_width];
1084
1085
16950
                while (ctx->mb_width - x < mbs_per_slice)
1086
5850
                    mbs_per_slice >>= 1;
1087
1088
11100
                bytestream_put_byte(&buf, slice_hdr_size << 3);
1089
11100
                slice_hdr = buf;
1090
11100
                buf += slice_hdr_size - 1;
1091
11100
                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1092
                    uint8_t *start = pkt->data;
1093
                    // Recompute new size according to max_slice_size
1094
                    // and deduce delta
1095
                    int delta = 200 + (ctx->pictures_per_frame *
1096
                                ctx->slices_per_picture + 1) *
1097
                                max_slice_size - pkt_size;
1098
1099
                    delta = FFMAX(delta, 2 * max_slice_size);
1100
                    ctx->frame_size_upper_bound += delta;
1101
1102
                    if (!ctx->warn) {
1103
                        avpriv_request_sample(avctx,
1104
                                              "Packet too small: is %i,"
1105
                                              " needs %i (slice: %i). "
1106
                                              "Correct allocation",
1107
                                              pkt_size, delta, max_slice_size);
1108
                        ctx->warn = 1;
1109
                    }
1110
1111
                    ret = av_grow_packet(pkt, delta);
1112
                    if (ret < 0)
1113
                        return ret;
1114
1115
                    pkt_size += delta;
1116
                    // restore pointers
1117
                    orig_buf         = pkt->data + (orig_buf         - start);
1118
                    buf              = pkt->data + (buf              - start);
1119
                    picture_size_pos = pkt->data + (picture_size_pos - start);
1120
                    slice_sizes      = pkt->data + (slice_sizes      - start);
1121
                    slice_hdr        = pkt->data + (slice_hdr        - start);
1122
                    tmp              = pkt->data + (tmp              - start);
1123
                }
1124
11100
                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1125
11100
                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1126
                                   mbs_per_slice);
1127
11100
                if (ret < 0)
1128
                    return ret;
1129
1130
11100
                bytestream_put_byte(&slice_hdr, q);
1131
11100
                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1132
33300
                for (i = 0; i < ctx->num_planes - 1; i++) {
1133
22200
                    bytestream_put_be16(&slice_hdr, sizes[i]);
1134
22200
                    slice_size += sizes[i];
1135
                }
1136
11100
                bytestream_put_be16(&slice_sizes, slice_size);
1137
11100
                buf += slice_size - slice_hdr_size;
1138
11100
                if (max_slice_size < slice_size)
1139
216
                    max_slice_size = slice_size;
1140
            }
1141
        }
1142
1143
200
        picture_size = buf - (picture_size_pos - 1);
1144
200
        bytestream_put_be32(&picture_size_pos, picture_size);
1145
    }
1146
1147
200
    orig_buf -= 8;
1148
200
    frame_size = buf - orig_buf;
1149
200
    bytestream_put_be32(&orig_buf, frame_size);
1150
1151
200
    pkt->size   = frame_size;
1152
200
    pkt->flags |= AV_PKT_FLAG_KEY;
1153
200
    *got_packet = 1;
1154
1155
200
    return 0;
1156
}
1157
1158
4
static av_cold int encode_close(AVCodecContext *avctx)
1159
{
1160
4
    ProresContext *ctx = avctx->priv_data;
1161
    int i;
1162
1163
4
    if (ctx->tdata) {
1164
8
        for (i = 0; i < avctx->thread_count; i++)
1165
4
            av_freep(&ctx->tdata[i].nodes);
1166
    }
1167
4
    av_freep(&ctx->tdata);
1168
4
    av_freep(&ctx->slice_q);
1169
1170
4
    return 0;
1171
}
1172
1173
957600
static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1174
                        ptrdiff_t linesize, int16_t *block)
1175
{
1176
    int x, y;
1177
957600
    const uint16_t *tsrc = src;
1178
1179
8618400
    for (y = 0; y < 8; y++) {
1180
68947200
        for (x = 0; x < 8; x++)
1181
61286400
            block[y * 8 + x] = tsrc[x];
1182
7660800
        tsrc += linesize >> 1;
1183
    }
1184
957600
    fdsp->fdct(block);
1185
957600
}
1186
1187
4
static av_cold int encode_init(AVCodecContext *avctx)
1188
{
1189
4
    ProresContext *ctx = avctx->priv_data;
1190
    int mps;
1191
    int i, j;
1192
    int min_quant, max_quant;
1193
4
    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1194
1195
4
    avctx->bits_per_raw_sample = 10;
1196
#if FF_API_CODED_FRAME
1197
FF_DISABLE_DEPRECATION_WARNINGS
1198
4
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1199
4
    avctx->coded_frame->key_frame = 1;
1200
FF_ENABLE_DEPRECATION_WARNINGS
1201
#endif
1202
1203
4
    ctx->fdct      = prores_fdct;
1204
4
    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1205
4
                                : ff_prores_progressive_scan;
1206
4
    ff_fdctdsp_init(&ctx->fdsp, avctx);
1207
1208
4
    mps = ctx->mbs_per_slice;
1209
4
    if (mps & (mps - 1)) {
1210
        av_log(avctx, AV_LOG_ERROR,
1211
               "there should be an integer power of two MBs per slice\n");
1212
        return AVERROR(EINVAL);
1213
    }
1214
4
    if (ctx->profile == PRORES_PROFILE_AUTO) {
1215
        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1216
        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1217
                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1218
                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1219
        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1220
               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1221
               ? "4:4:4:4 profile because of the used input colorspace"
1222
               : "HQ profile to keep best quality");
1223
    }
1224
4
    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1225
        if (ctx->profile != PRORES_PROFILE_4444 &&
1226
            ctx->profile != PRORES_PROFILE_4444XQ) {
1227
            // force alpha and warn
1228
            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1229
                   "encode alpha. Override with -profile if needed.\n");
1230
            ctx->alpha_bits = 0;
1231
        }
1232
        if (ctx->alpha_bits & 7) {
1233
            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1234
            return AVERROR(EINVAL);
1235
        }
1236
        avctx->bits_per_coded_sample = 32;
1237
    } else {
1238
4
        ctx->alpha_bits = 0;
1239
    }
1240
1241
8
    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1242
                         ? CFACTOR_Y422
1243
4
                         : CFACTOR_Y444;
1244
4
    ctx->profile_info  = prores_profile_info + ctx->profile;
1245
4
    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1246
1247
4
    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1248
1249
4
    if (interlaced)
1250
        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1251
    else
1252
4
        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1253
1254
4
    ctx->slices_width  = ctx->mb_width / mps;
1255
4
    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1256
4
    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1257
4
    ctx->pictures_per_frame = 1 + interlaced;
1258
1259
4
    if (ctx->quant_sel == -1) {
1260
4
        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1261
4
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->profile_info->quant_chroma];
1262
    } else {
1263
        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1264
        ctx->quant_chroma_mat = prores_quant_matrices[ctx->quant_sel];
1265
    }
1266
1267
4
    if (strlen(ctx->vendor) != 4) {
1268
        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1269
        return AVERROR_INVALIDDATA;
1270
    }
1271
1272
4
    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1273
4
    if (!ctx->force_quant) {
1274
4
        if (!ctx->bits_per_mb) {
1275
4
            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1276
4
                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1277
4
                                           ctx->pictures_per_frame)
1278
4
                    break;
1279
4
            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1280
4
            if (ctx->alpha_bits)
1281
                ctx->bits_per_mb *= 20;
1282
        } else if (ctx->bits_per_mb < 128) {
1283
            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1284
            return AVERROR_INVALIDDATA;
1285
        }
1286
1287
4
        min_quant = ctx->profile_info->min_quant;
1288
4
        max_quant = ctx->profile_info->max_quant;
1289
64
        for (i = min_quant; i < MAX_STORED_Q; i++) {
1290
3900
            for (j = 0; j < 64; j++) {
1291
3840
                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1292
3840
                ctx->quants_chroma[i][j] = ctx->quant_chroma_mat[j] * i;
1293
            }
1294
        }
1295
1296
4
        ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1297
4
        if (!ctx->slice_q) {
1298
            encode_close(avctx);
1299
            return AVERROR(ENOMEM);
1300
        }
1301
1302
4
        ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1303
4
        if (!ctx->tdata) {
1304
            encode_close(avctx);
1305
            return AVERROR(ENOMEM);
1306
        }
1307
1308
8
        for (j = 0; j < avctx->thread_count; j++) {
1309
8
            ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1310
4
                                            * TRELLIS_WIDTH
1311
                                            * sizeof(*ctx->tdata->nodes));
1312
4
            if (!ctx->tdata[j].nodes) {
1313
                encode_close(avctx);
1314
                return AVERROR(ENOMEM);
1315
            }
1316
32
            for (i = min_quant; i < max_quant + 2; i++) {
1317
28
                ctx->tdata[j].nodes[i].prev_node = -1;
1318
28
                ctx->tdata[j].nodes[i].bits      = 0;
1319
28
                ctx->tdata[j].nodes[i].score     = 0;
1320
            }
1321
        }
1322
    } else {
1323
        int ls = 0;
1324
        int ls_chroma = 0;
1325
1326
        if (ctx->force_quant > 64) {
1327
            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1328
            return AVERROR_INVALIDDATA;
1329
        }
1330
1331
        for (j = 0; j < 64; j++) {
1332
            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1333
            ctx->quants_chroma[0][j] = ctx->quant_chroma_mat[j] * ctx->force_quant;
1334
            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1335
            ls_chroma += av_log2((1 << 11)  / ctx->quants_chroma[0][j]) * 2 + 1;
1336
        }
1337
1338
        ctx->bits_per_mb = ls * 4 + ls_chroma * 4;
1339
        if (ctx->chroma_factor == CFACTOR_Y444)
1340
            ctx->bits_per_mb += ls_chroma * 4;
1341
    }
1342
1343
4
    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1344
4
                                   ctx->slices_per_picture + 1) *
1345
4
                                  (2 + 2 * ctx->num_planes +
1346
4
                                   (mps * ctx->bits_per_mb) / 8)
1347
4
                                  + 200;
1348
1349
4
    if (ctx->alpha_bits) {
1350
         // The alpha plane is run-coded and might exceed the bit budget.
1351
         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1352
                                         ctx->slices_per_picture + 1) *
1353
         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1354
         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1355
    }
1356
1357
4
    avctx->codec_tag   = ctx->profile_info->tag;
1358
1359
4
    av_log(avctx, AV_LOG_DEBUG,
1360
           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1361
4
           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1362
           interlaced ? "yes" : "no", ctx->bits_per_mb);
1363
4
    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1364
           ctx->frame_size_upper_bound);
1365
1366
4
    return 0;
1367
}
1368
1369
#define OFFSET(x) offsetof(ProresContext, x)
1370
#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1371
1372
static const AVOption options[] = {
1373
    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1374
        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1375
    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1376
        { .i64 = PRORES_PROFILE_AUTO },
1377
        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
1378
    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1379
        0, 0, VE, "profile" },
1380
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1381
        0, 0, VE, "profile" },
1382
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1383
        0, 0, VE, "profile" },
1384
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1385
        0, 0, VE, "profile" },
1386
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1387
        0, 0, VE, "profile" },
1388
    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1389
        0, 0, VE, "profile" },
1390
    { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
1391
        0, 0, VE, "profile" },
1392
    { "vendor", "vendor ID", OFFSET(vendor),
1393
        AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1394
    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1395
        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1396
    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1397
        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1398
    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1399
        0, 0, VE, "quant_mat" },
1400
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1401
        0, 0, VE, "quant_mat" },
1402
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1403
        0, 0, VE, "quant_mat" },
1404
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1405
        0, 0, VE, "quant_mat" },
1406
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1407
        0, 0, VE, "quant_mat" },
1408
    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1409
        0, 0, VE, "quant_mat" },
1410
    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1411
        { .i64 = 16 }, 0, 16, VE },
1412
    { NULL }
1413
};
1414
1415
static const AVClass proresenc_class = {
1416
    .class_name = "ProRes encoder",
1417
    .item_name  = av_default_item_name,
1418
    .option     = options,
1419
    .version    = LIBAVUTIL_VERSION_INT,
1420
};
1421
1422
AVCodec ff_prores_ks_encoder = {
1423
    .name           = "prores_ks",
1424
    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1425
    .type           = AVMEDIA_TYPE_VIDEO,
1426
    .id             = AV_CODEC_ID_PRORES,
1427
    .priv_data_size = sizeof(ProresContext),
1428
    .init           = encode_init,
1429
    .close          = encode_close,
1430
    .encode2        = encode_frame,
1431
    .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
1432
    .pix_fmts       = (const enum AVPixelFormat[]) {
1433
                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1434
                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1435
                      },
1436
    .priv_class     = &proresenc_class,
1437
    .profiles       = NULL_IF_CONFIG_SMALL(ff_prores_profiles),
1438
};