LCOV - code coverage report
Current view: top level - libavcodec - proresenc_kostya.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 456 649 70.3 %
Date: 2017-12-16 21:16:39 Functions: 16 22 72.7 %

          Line data    Source code
       1             : /*
       2             :  * Apple ProRes encoder
       3             :  *
       4             :  * Copyright (c) 2012 Konstantin Shishkov
       5             :  *
       6             :  * This encoder appears to be based on Anatoliy Wassermans considering
       7             :  * similarities in the bugs.
       8             :  *
       9             :  * This file is part of FFmpeg.
      10             :  *
      11             :  * FFmpeg is free software; you can redistribute it and/or
      12             :  * modify it under the terms of the GNU Lesser General Public
      13             :  * License as published by the Free Software Foundation; either
      14             :  * version 2.1 of the License, or (at your option) any later version.
      15             :  *
      16             :  * FFmpeg is distributed in the hope that it will be useful,
      17             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      18             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      19             :  * Lesser General Public License for more details.
      20             :  *
      21             :  * You should have received a copy of the GNU Lesser General Public
      22             :  * License along with FFmpeg; if not, write to the Free Software
      23             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      24             :  */
      25             : 
      26             : #include "libavutil/opt.h"
      27             : #include "libavutil/pixdesc.h"
      28             : #include "avcodec.h"
      29             : #include "fdctdsp.h"
      30             : #include "put_bits.h"
      31             : #include "bytestream.h"
      32             : #include "internal.h"
      33             : #include "proresdata.h"
      34             : 
      35             : #define CFACTOR_Y422 2
      36             : #define CFACTOR_Y444 3
      37             : 
      38             : #define MAX_MBS_PER_SLICE 8
      39             : 
      40             : #define MAX_PLANES 4
      41             : 
      42             : enum {
      43             :     PRORES_PROFILE_AUTO  = -1,
      44             :     PRORES_PROFILE_PROXY = 0,
      45             :     PRORES_PROFILE_LT,
      46             :     PRORES_PROFILE_STANDARD,
      47             :     PRORES_PROFILE_HQ,
      48             :     PRORES_PROFILE_4444,
      49             :     PRORES_PROFILE_4444XQ,
      50             : };
      51             : 
      52             : enum {
      53             :     QUANT_MAT_PROXY = 0,
      54             :     QUANT_MAT_LT,
      55             :     QUANT_MAT_STANDARD,
      56             :     QUANT_MAT_HQ,
      57             :     QUANT_MAT_DEFAULT,
      58             : };
      59             : 
      60             : static const uint8_t prores_quant_matrices[][64] = {
      61             :     { // proxy
      62             :          4,  7,  9, 11, 13, 14, 15, 63,
      63             :          7,  7, 11, 12, 14, 15, 63, 63,
      64             :          9, 11, 13, 14, 15, 63, 63, 63,
      65             :         11, 11, 13, 14, 63, 63, 63, 63,
      66             :         11, 13, 14, 63, 63, 63, 63, 63,
      67             :         13, 14, 63, 63, 63, 63, 63, 63,
      68             :         13, 63, 63, 63, 63, 63, 63, 63,
      69             :         63, 63, 63, 63, 63, 63, 63, 63,
      70             :     },
      71             :     { // LT
      72             :          4,  5,  6,  7,  9, 11, 13, 15,
      73             :          5,  5,  7,  8, 11, 13, 15, 17,
      74             :          6,  7,  9, 11, 13, 15, 15, 17,
      75             :          7,  7,  9, 11, 13, 15, 17, 19,
      76             :          7,  9, 11, 13, 14, 16, 19, 23,
      77             :          9, 11, 13, 14, 16, 19, 23, 29,
      78             :          9, 11, 13, 15, 17, 21, 28, 35,
      79             :         11, 13, 16, 17, 21, 28, 35, 41,
      80             :     },
      81             :     { // standard
      82             :          4,  4,  5,  5,  6,  7,  7,  9,
      83             :          4,  4,  5,  6,  7,  7,  9,  9,
      84             :          5,  5,  6,  7,  7,  9,  9, 10,
      85             :          5,  5,  6,  7,  7,  9,  9, 10,
      86             :          5,  6,  7,  7,  8,  9, 10, 12,
      87             :          6,  7,  7,  8,  9, 10, 12, 15,
      88             :          6,  7,  7,  9, 10, 11, 14, 17,
      89             :          7,  7,  9, 10, 11, 14, 17, 21,
      90             :     },
      91             :     { // high quality
      92             :          4,  4,  4,  4,  4,  4,  4,  4,
      93             :          4,  4,  4,  4,  4,  4,  4,  4,
      94             :          4,  4,  4,  4,  4,  4,  4,  4,
      95             :          4,  4,  4,  4,  4,  4,  4,  5,
      96             :          4,  4,  4,  4,  4,  4,  5,  5,
      97             :          4,  4,  4,  4,  4,  5,  5,  6,
      98             :          4,  4,  4,  4,  5,  5,  6,  7,
      99             :          4,  4,  4,  4,  5,  6,  7,  7,
     100             :     },
     101             :     { // codec default
     102             :          4,  4,  4,  4,  4,  4,  4,  4,
     103             :          4,  4,  4,  4,  4,  4,  4,  4,
     104             :          4,  4,  4,  4,  4,  4,  4,  4,
     105             :          4,  4,  4,  4,  4,  4,  4,  4,
     106             :          4,  4,  4,  4,  4,  4,  4,  4,
     107             :          4,  4,  4,  4,  4,  4,  4,  4,
     108             :          4,  4,  4,  4,  4,  4,  4,  4,
     109             :          4,  4,  4,  4,  4,  4,  4,  4,
     110             :     },
     111             : };
     112             : 
     113             : #define NUM_MB_LIMITS 4
     114             : static const int prores_mb_limits[NUM_MB_LIMITS] = {
     115             :     1620, // up to 720x576
     116             :     2700, // up to 960x720
     117             :     6075, // up to 1440x1080
     118             :     9216, // up to 2048x1152
     119             : };
     120             : 
     121             : static const struct prores_profile {
     122             :     const char *full_name;
     123             :     uint32_t    tag;
     124             :     int         min_quant;
     125             :     int         max_quant;
     126             :     int         br_tab[NUM_MB_LIMITS];
     127             :     int         quant;
     128             : } prores_profile_info[6] = {
     129             :     {
     130             :         .full_name = "proxy",
     131             :         .tag       = MKTAG('a', 'p', 'c', 'o'),
     132             :         .min_quant = 4,
     133             :         .max_quant = 8,
     134             :         .br_tab    = { 300, 242, 220, 194 },
     135             :         .quant     = QUANT_MAT_PROXY,
     136             :     },
     137             :     {
     138             :         .full_name = "LT",
     139             :         .tag       = MKTAG('a', 'p', 'c', 's'),
     140             :         .min_quant = 1,
     141             :         .max_quant = 9,
     142             :         .br_tab    = { 720, 560, 490, 440 },
     143             :         .quant     = QUANT_MAT_LT,
     144             :     },
     145             :     {
     146             :         .full_name = "standard",
     147             :         .tag       = MKTAG('a', 'p', 'c', 'n'),
     148             :         .min_quant = 1,
     149             :         .max_quant = 6,
     150             :         .br_tab    = { 1050, 808, 710, 632 },
     151             :         .quant     = QUANT_MAT_STANDARD,
     152             :     },
     153             :     {
     154             :         .full_name = "high quality",
     155             :         .tag       = MKTAG('a', 'p', 'c', 'h'),
     156             :         .min_quant = 1,
     157             :         .max_quant = 6,
     158             :         .br_tab    = { 1566, 1216, 1070, 950 },
     159             :         .quant     = QUANT_MAT_HQ,
     160             :     },
     161             :     {
     162             :         .full_name = "4444",
     163             :         .tag       = MKTAG('a', 'p', '4', 'h'),
     164             :         .min_quant = 1,
     165             :         .max_quant = 6,
     166             :         .br_tab    = { 2350, 1828, 1600, 1425 },
     167             :         .quant     = QUANT_MAT_HQ,
     168             :     },
     169             :     {
     170             :         .full_name = "4444XQ",
     171             :         .tag       = MKTAG('a', 'p', '4', 'x'),
     172             :         .min_quant = 1,
     173             :         .max_quant = 6,
     174             :         .br_tab    = { 3525, 2742, 2400, 2137 },
     175             :         .quant     = QUANT_MAT_HQ,
     176             :     }
     177             : };
     178             : 
     179             : #define TRELLIS_WIDTH 16
     180             : #define SCORE_LIMIT   INT_MAX / 2
     181             : 
     182             : struct TrellisNode {
     183             :     int prev_node;
     184             :     int quant;
     185             :     int bits;
     186             :     int score;
     187             : };
     188             : 
     189             : #define MAX_STORED_Q 16
     190             : 
     191             : typedef struct ProresThreadData {
     192             :     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
     193             :     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
     194             :     int16_t custom_q[64];
     195             :     struct TrellisNode *nodes;
     196             : } ProresThreadData;
     197             : 
     198             : typedef struct ProresContext {
     199             :     AVClass *class;
     200             :     DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
     201             :     DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
     202             :     int16_t quants[MAX_STORED_Q][64];
     203             :     int16_t custom_q[64];
     204             :     const uint8_t *quant_mat;
     205             :     const uint8_t *scantable;
     206             : 
     207             :     void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
     208             :                  ptrdiff_t linesize, int16_t *block);
     209             :     FDCTDSPContext fdsp;
     210             : 
     211             :     const AVFrame *pic;
     212             :     int mb_width, mb_height;
     213             :     int mbs_per_slice;
     214             :     int num_chroma_blocks, chroma_factor;
     215             :     int slices_width;
     216             :     int slices_per_picture;
     217             :     int pictures_per_frame; // 1 for progressive, 2 for interlaced
     218             :     int cur_picture_idx;
     219             :     int num_planes;
     220             :     int bits_per_mb;
     221             :     int force_quant;
     222             :     int alpha_bits;
     223             :     int warn;
     224             : 
     225             :     char *vendor;
     226             :     int quant_sel;
     227             : 
     228             :     int frame_size_upper_bound;
     229             : 
     230             :     int profile;
     231             :     const struct prores_profile *profile_info;
     232             : 
     233             :     int *slice_q;
     234             : 
     235             :     ProresThreadData *tdata;
     236             : } ProresContext;
     237             : 
     238       66600 : static void get_slice_data(ProresContext *ctx, const uint16_t *src,
     239             :                            ptrdiff_t linesize, int x, int y, int w, int h,
     240             :                            int16_t *blocks, uint16_t *emu_buf,
     241             :                            int mbs_per_slice, int blocks_per_mb, int is_chroma)
     242             : {
     243             :     const uint16_t *esrc;
     244       66600 :     const int mb_width = 4 * blocks_per_mb;
     245             :     ptrdiff_t elinesize;
     246             :     int i, j, k;
     247             : 
     248      425700 :     for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
     249      359100 :         if (x >= w) {
     250           0 :             memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
     251             :                               * sizeof(*blocks));
     252           0 :             return;
     253             :         }
     254      359100 :         if (x + mb_width <= w && y + 16 <= h) {
     255      357600 :             esrc      = src;
     256      357600 :             elinesize = linesize;
     257             :         } else {
     258             :             int bw, bh, pix;
     259             : 
     260        1500 :             esrc      = emu_buf;
     261        1500 :             elinesize = 16 * sizeof(*emu_buf);
     262             : 
     263        1500 :             bw = FFMIN(w - x, mb_width);
     264        1500 :             bh = FFMIN(h - y, 16);
     265             : 
     266       12900 :             for (j = 0; j < bh; j++) {
     267       22800 :                 memcpy(emu_buf + j * 16,
     268       11400 :                        (const uint8_t*)src + j * linesize,
     269             :                        bw * sizeof(*src));
     270       11400 :                 pix = emu_buf[j * 16 + bw - 1];
     271      106600 :                 for (k = bw; k < mb_width; k++)
     272       95200 :                     emu_buf[j * 16 + k] = pix;
     273             :             }
     274       14100 :             for (; j < 16; j++)
     275       37800 :                 memcpy(emu_buf + j * 16,
     276       25200 :                        emu_buf + (bh - 1) * 16,
     277             :                        mb_width * sizeof(*emu_buf));
     278             :         }
     279      359100 :         if (!is_chroma) {
     280      119700 :             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
     281      119700 :             blocks += 64;
     282      119700 :             if (blocks_per_mb > 2) {
     283      119700 :                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
     284      119700 :                 blocks += 64;
     285             :             }
     286      119700 :             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
     287      119700 :             blocks += 64;
     288      119700 :             if (blocks_per_mb > 2) {
     289      119700 :                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
     290      119700 :                 blocks += 64;
     291             :             }
     292             :         } else {
     293      239400 :             ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
     294      239400 :             blocks += 64;
     295      239400 :             ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
     296      239400 :             blocks += 64;
     297      239400 :             if (blocks_per_mb > 2) {
     298           0 :                 ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
     299           0 :                 blocks += 64;
     300           0 :                 ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
     301           0 :                 blocks += 64;
     302             :             }
     303             :         }
     304             : 
     305      359100 :         x += mb_width;
     306             :     }
     307             : }
     308             : 
     309           0 : static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
     310             :                            ptrdiff_t linesize, int x, int y, int w, int h,
     311             :                            int16_t *blocks, int mbs_per_slice, int abits)
     312             : {
     313           0 :     const int slice_width = 16 * mbs_per_slice;
     314             :     int i, j, copy_w, copy_h;
     315             : 
     316           0 :     copy_w = FFMIN(w - x, slice_width);
     317           0 :     copy_h = FFMIN(h - y, 16);
     318           0 :     for (i = 0; i < copy_h; i++) {
     319           0 :         memcpy(blocks, src, copy_w * sizeof(*src));
     320           0 :         if (abits == 8)
     321           0 :             for (j = 0; j < copy_w; j++)
     322           0 :                 blocks[j] >>= 2;
     323             :         else
     324           0 :             for (j = 0; j < copy_w; j++)
     325           0 :                 blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
     326           0 :         for (j = copy_w; j < slice_width; j++)
     327           0 :             blocks[j] = blocks[copy_w - 1];
     328           0 :         blocks += slice_width;
     329           0 :         src    += linesize >> 1;
     330             :     }
     331           0 :     for (; i < 16; i++) {
     332           0 :         memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
     333           0 :         blocks += slice_width;
     334             :     }
     335           0 : }
     336             : 
     337             : /**
     338             :  * Write an unsigned rice/exp golomb codeword.
     339             :  */
     340    26885174 : static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
     341             : {
     342             :     unsigned int rice_order, exp_order, switch_bits, switch_val;
     343             :     int exponent;
     344             : 
     345             :     /* number of prefix bits to switch between Rice and expGolomb */
     346    26885174 :     switch_bits = (codebook & 3) + 1;
     347    26885174 :     rice_order  =  codebook >> 5;       /* rice code order */
     348    26885174 :     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
     349             : 
     350    26885174 :     switch_val  = switch_bits << rice_order;
     351             : 
     352    26885174 :     if (val >= switch_val) {
     353     8074309 :         val -= switch_val - (1 << exp_order);
     354     8074309 :         exponent = av_log2(val);
     355             : 
     356     8074309 :         put_bits(pb, exponent - exp_order + switch_bits, 0);
     357     8074309 :         put_bits(pb, exponent + 1, val);
     358             :     } else {
     359    18810865 :         exponent = val >> rice_order;
     360             : 
     361    18810865 :         if (exponent)
     362     4838901 :             put_bits(pb, exponent, 0);
     363    18810865 :         put_bits(pb, 1, 1);
     364    18810865 :         if (rice_order)
     365     1904515 :             put_sbits(pb, rice_order, val);
     366             :     }
     367    26885174 : }
     368             : 
     369             : #define GET_SIGN(x)  ((x) >> 31)
     370             : #define MAKE_CODE(x) ((((x)) * 2) ^ GET_SIGN(x))
     371             : 
     372       33300 : static void encode_dcs(PutBitContext *pb, int16_t *blocks,
     373             :                        int blocks_per_slice, int scale)
     374             : {
     375             :     int i;
     376       33300 :     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
     377             : 
     378       33300 :     prev_dc = (blocks[0] - 0x4000) / scale;
     379       33300 :     encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
     380       33300 :     sign     = 0;
     381       33300 :     codebook = 3;
     382       33300 :     blocks  += 64;
     383             : 
     384      478800 :     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
     385      445500 :         dc       = (blocks[0] - 0x4000) / scale;
     386      445500 :         delta    = dc - prev_dc;
     387      445500 :         new_sign = GET_SIGN(delta);
     388      445500 :         delta    = (delta ^ sign) - sign;
     389      445500 :         code     = MAKE_CODE(delta);
     390      445500 :         encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
     391      445500 :         codebook = (code + (code & 1)) >> 1;
     392      445500 :         codebook = FFMIN(codebook, 3);
     393      445500 :         sign     = new_sign;
     394      445500 :         prev_dc  = dc;
     395             :     }
     396       33300 : }
     397             : 
     398       33300 : static void encode_acs(PutBitContext *pb, int16_t *blocks,
     399             :                        int blocks_per_slice,
     400             :                        int plane_size_factor,
     401             :                        const uint8_t *scan, const int16_t *qmat)
     402             : {
     403             :     int idx, i;
     404             :     int run, level, run_cb, lev_cb;
     405             :     int max_coeffs, abs_level;
     406             : 
     407       33300 :     max_coeffs = blocks_per_slice << 6;
     408       33300 :     run_cb     = ff_prores_run_to_cb_index[4];
     409       33300 :     lev_cb     = ff_prores_lev_to_cb_index[2];
     410       33300 :     run        = 0;
     411             : 
     412     2131200 :     for (i = 1; i < 64; i++) {
     413    32262300 :         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
     414    30164400 :             level = blocks[idx] / qmat[scan[i]];
     415    30164400 :             if (level) {
     416    13203187 :                 abs_level = FFABS(level);
     417    13203187 :                 encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
     418    13203187 :                 encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
     419             :                                     abs_level - 1);
     420    13203187 :                 put_sbits(pb, 1, GET_SIGN(level));
     421             : 
     422    13203187 :                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
     423    13203187 :                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
     424    13203187 :                 run    = 0;
     425             :             } else {
     426    16961213 :                 run++;
     427             :             }
     428             :         }
     429             :     }
     430       33300 : }
     431             : 
     432       33300 : static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
     433             :                               const uint16_t *src, ptrdiff_t linesize,
     434             :                               int mbs_per_slice, int16_t *blocks,
     435             :                               int blocks_per_mb, int plane_size_factor,
     436             :                               const int16_t *qmat)
     437             : {
     438             :     int blocks_per_slice, saved_pos;
     439             : 
     440       33300 :     saved_pos = put_bits_count(pb);
     441       33300 :     blocks_per_slice = mbs_per_slice * blocks_per_mb;
     442             : 
     443       33300 :     encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
     444       33300 :     encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
     445             :                ctx->scantable, qmat);
     446       33300 :     flush_put_bits(pb);
     447             : 
     448       33300 :     return (put_bits_count(pb) - saved_pos) >> 3;
     449             : }
     450             : 
     451           0 : static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
     452             : {
     453           0 :     const int dbits = (abits == 8) ? 4 : 7;
     454           0 :     const int dsize = 1 << dbits - 1;
     455           0 :     int diff = cur - prev;
     456             : 
     457           0 :     diff = av_mod_uintp2(diff, abits);
     458           0 :     if (diff >= (1 << abits) - dsize)
     459           0 :         diff -= 1 << abits;
     460           0 :     if (diff < -dsize || diff > dsize || !diff) {
     461           0 :         put_bits(pb, 1, 1);
     462           0 :         put_bits(pb, abits, diff);
     463             :     } else {
     464           0 :         put_bits(pb, 1, 0);
     465           0 :         put_bits(pb, dbits - 1, FFABS(diff) - 1);
     466           0 :         put_bits(pb, 1, diff < 0);
     467             :     }
     468           0 : }
     469             : 
     470           0 : static void put_alpha_run(PutBitContext *pb, int run)
     471             : {
     472           0 :     if (run) {
     473           0 :         put_bits(pb, 1, 0);
     474           0 :         if (run < 0x10)
     475           0 :             put_bits(pb, 4, run);
     476             :         else
     477           0 :             put_bits(pb, 15, run);
     478             :     } else {
     479           0 :         put_bits(pb, 1, 1);
     480             :     }
     481           0 : }
     482             : 
     483             : // todo alpha quantisation for high quants
     484           0 : static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
     485             :                               int mbs_per_slice, uint16_t *blocks,
     486             :                               int quant)
     487             : {
     488           0 :     const int abits = ctx->alpha_bits;
     489           0 :     const int mask  = (1 << abits) - 1;
     490           0 :     const int num_coeffs = mbs_per_slice * 256;
     491           0 :     int saved_pos = put_bits_count(pb);
     492           0 :     int prev = mask, cur;
     493           0 :     int idx = 0;
     494           0 :     int run = 0;
     495             : 
     496           0 :     cur = blocks[idx++];
     497           0 :     put_alpha_diff(pb, cur, prev, abits);
     498           0 :     prev = cur;
     499             :     do {
     500           0 :         cur = blocks[idx++];
     501           0 :         if (cur != prev) {
     502           0 :             put_alpha_run (pb, run);
     503           0 :             put_alpha_diff(pb, cur, prev, abits);
     504           0 :             prev = cur;
     505           0 :             run  = 0;
     506             :         } else {
     507           0 :             run++;
     508             :         }
     509           0 :     } while (idx < num_coeffs);
     510           0 :     if (run)
     511           0 :         put_alpha_run(pb, run);
     512           0 :     flush_put_bits(pb);
     513           0 :     return (put_bits_count(pb) - saved_pos) >> 3;
     514             : }
     515             : 
     516       11100 : static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
     517             :                         PutBitContext *pb,
     518             :                         int sizes[4], int x, int y, int quant,
     519             :                         int mbs_per_slice)
     520             : {
     521       11100 :     ProresContext *ctx = avctx->priv_data;
     522             :     int i, xp, yp;
     523       11100 :     int total_size = 0;
     524             :     const uint16_t *src;
     525       11100 :     int slice_width_factor = av_log2(mbs_per_slice);
     526             :     int num_cblocks, pwidth, line_add;
     527             :     ptrdiff_t linesize;
     528             :     int plane_factor, is_chroma;
     529             :     uint16_t *qmat;
     530             : 
     531       11100 :     if (ctx->pictures_per_frame == 1)
     532       11100 :         line_add = 0;
     533             :     else
     534           0 :         line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
     535             : 
     536       11100 :     if (ctx->force_quant) {
     537           0 :         qmat = ctx->quants[0];
     538       11100 :     } else if (quant < MAX_STORED_Q) {
     539        8353 :         qmat = ctx->quants[quant];
     540             :     } else {
     541        2747 :         qmat = ctx->custom_q;
     542      178555 :         for (i = 0; i < 64; i++)
     543      175808 :             qmat[i] = ctx->quant_mat[i] * quant;
     544             :     }
     545             : 
     546       44400 :     for (i = 0; i < ctx->num_planes; i++) {
     547       33300 :         is_chroma    = (i == 1 || i == 2);
     548       33300 :         plane_factor = slice_width_factor + 2;
     549       33300 :         if (is_chroma)
     550       22200 :             plane_factor += ctx->chroma_factor - 3;
     551       33300 :         if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
     552       11100 :             xp          = x << 4;
     553       11100 :             yp          = y << 4;
     554       11100 :             num_cblocks = 4;
     555       11100 :             pwidth      = avctx->width;
     556             :         } else {
     557       22200 :             xp          = x << 3;
     558       22200 :             yp          = y << 4;
     559       22200 :             num_cblocks = 2;
     560       22200 :             pwidth      = avctx->width >> 1;
     561             :         }
     562             : 
     563       33300 :         linesize = pic->linesize[i] * ctx->pictures_per_frame;
     564       99900 :         src = (const uint16_t*)(pic->data[i] + yp * linesize +
     565       66600 :                                 line_add * pic->linesize[i]) + xp;
     566             : 
     567       33300 :         if (i < 3) {
     568       33300 :             get_slice_data(ctx, src, linesize, xp, yp,
     569       33300 :                            pwidth, avctx->height / ctx->pictures_per_frame,
     570       33300 :                            ctx->blocks[0], ctx->emu_buf,
     571             :                            mbs_per_slice, num_cblocks, is_chroma);
     572       33300 :             sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
     573       33300 :                                           mbs_per_slice, ctx->blocks[0],
     574             :                                           num_cblocks, plane_factor,
     575             :                                           qmat);
     576             :         } else {
     577           0 :             get_alpha_data(ctx, src, linesize, xp, yp,
     578           0 :                            pwidth, avctx->height / ctx->pictures_per_frame,
     579           0 :                            ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
     580           0 :             sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
     581           0 :                                           ctx->blocks[0], quant);
     582             :         }
     583       33300 :         total_size += sizes[i];
     584       33300 :         if (put_bits_left(pb) < 0) {
     585           0 :             av_log(avctx, AV_LOG_ERROR,
     586             :                    "Underestimated required buffer size.\n");
     587           0 :             return AVERROR_BUG;
     588             :         }
     589             :     }
     590       11100 :     return total_size;
     591             : }
     592             : 
     593   390320122 : static inline int estimate_vlc(unsigned codebook, int val)
     594             : {
     595             :     unsigned int rice_order, exp_order, switch_bits, switch_val;
     596             :     int exponent;
     597             : 
     598             :     /* number of prefix bits to switch between Rice and expGolomb */
     599   390320122 :     switch_bits = (codebook & 3) + 1;
     600   390320122 :     rice_order  =  codebook >> 5;       /* rice code order */
     601   390320122 :     exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
     602             : 
     603   390320122 :     switch_val  = switch_bits << rice_order;
     604             : 
     605   390320122 :     if (val >= switch_val) {
     606   135366530 :         val -= switch_val - (1 << exp_order);
     607   135366530 :         exponent = av_log2(val);
     608             : 
     609   135366530 :         return exponent * 2 - exp_order + switch_bits + 1;
     610             :     } else {
     611   254953592 :         return (val >> rice_order) + rice_order + 1;
     612             :     }
     613             : }
     614             : 
     615      379686 : static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
     616             :                         int scale)
     617             : {
     618             :     int i;
     619      379686 :     int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
     620             :     int bits;
     621             : 
     622      379686 :     prev_dc  = (blocks[0] - 0x4000) / scale;
     623      379686 :     bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
     624      379686 :     sign     = 0;
     625      379686 :     codebook = 3;
     626      379686 :     blocks  += 64;
     627      379686 :     *error  += FFABS(blocks[0] - 0x4000) % scale;
     628             : 
     629     5449672 :     for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
     630     5069986 :         dc       = (blocks[0] - 0x4000) / scale;
     631     5069986 :         *error  += FFABS(blocks[0] - 0x4000) % scale;
     632     5069986 :         delta    = dc - prev_dc;
     633     5069986 :         new_sign = GET_SIGN(delta);
     634     5069986 :         delta    = (delta ^ sign) - sign;
     635     5069986 :         code     = MAKE_CODE(delta);
     636     5069986 :         bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
     637     5069986 :         codebook = (code + (code & 1)) >> 1;
     638     5069986 :         codebook = FFMIN(codebook, 3);
     639     5069986 :         sign     = new_sign;
     640     5069986 :         prev_dc  = dc;
     641             :     }
     642             : 
     643      379686 :     return bits;
     644             : }
     645             : 
     646      379686 : static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
     647             :                         int plane_size_factor,
     648             :                         const uint8_t *scan, const int16_t *qmat)
     649             : {
     650             :     int idx, i;
     651             :     int run, level, run_cb, lev_cb;
     652             :     int max_coeffs, abs_level;
     653      379686 :     int bits = 0;
     654             : 
     655      379686 :     max_coeffs = blocks_per_slice << 6;
     656      379686 :     run_cb     = ff_prores_run_to_cb_index[4];
     657      379686 :     lev_cb     = ff_prores_lev_to_cb_index[2];
     658      379686 :     run        = 0;
     659             : 
     660    24299904 :     for (i = 1; i < 64; i++) {
     661   367249554 :         for (idx = scan[i]; idx < max_coeffs; idx += 64) {
     662   343329336 :             level   = blocks[idx] / qmat[scan[i]];
     663   343329336 :             *error += FFABS(blocks[idx]) % qmat[scan[i]];
     664   343329336 :             if (level) {
     665   192435225 :                 abs_level = FFABS(level);
     666   192435225 :                 bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
     667   384870450 :                 bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
     668   192435225 :                                      abs_level - 1) + 1;
     669             : 
     670   192435225 :                 run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
     671   192435225 :                 lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
     672   192435225 :                 run    = 0;
     673             :             } else {
     674   150894111 :                 run++;
     675             :             }
     676             :         }
     677             :     }
     678             : 
     679      379686 :     return bits;
     680             : }
     681             : 
     682      379686 : static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
     683             :                                 const uint16_t *src, ptrdiff_t linesize,
     684             :                                 int mbs_per_slice,
     685             :                                 int blocks_per_mb, int plane_size_factor,
     686             :                                 const int16_t *qmat, ProresThreadData *td)
     687             : {
     688             :     int blocks_per_slice;
     689             :     int bits;
     690             : 
     691      379686 :     blocks_per_slice = mbs_per_slice * blocks_per_mb;
     692             : 
     693      379686 :     bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
     694      379686 :     bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
     695             :                          plane_size_factor, ctx->scantable, qmat);
     696             : 
     697      379686 :     return FFALIGN(bits, 8);
     698             : }
     699             : 
     700           0 : static int est_alpha_diff(int cur, int prev, int abits)
     701             : {
     702           0 :     const int dbits = (abits == 8) ? 4 : 7;
     703           0 :     const int dsize = 1 << dbits - 1;
     704           0 :     int diff = cur - prev;
     705             : 
     706           0 :     diff = av_mod_uintp2(diff, abits);
     707           0 :     if (diff >= (1 << abits) - dsize)
     708           0 :         diff -= 1 << abits;
     709           0 :     if (diff < -dsize || diff > dsize || !diff)
     710           0 :         return abits + 1;
     711             :     else
     712           0 :         return dbits + 1;
     713             : }
     714             : 
     715           0 : static int estimate_alpha_plane(ProresContext *ctx, int *error,
     716             :                                 const uint16_t *src, ptrdiff_t linesize,
     717             :                                 int mbs_per_slice, int quant,
     718             :                                 int16_t *blocks)
     719             : {
     720           0 :     const int abits = ctx->alpha_bits;
     721           0 :     const int mask  = (1 << abits) - 1;
     722           0 :     const int num_coeffs = mbs_per_slice * 256;
     723           0 :     int prev = mask, cur;
     724           0 :     int idx = 0;
     725           0 :     int run = 0;
     726             :     int bits;
     727             : 
     728           0 :     *error = 0;
     729           0 :     cur = blocks[idx++];
     730           0 :     bits = est_alpha_diff(cur, prev, abits);
     731           0 :     prev = cur;
     732             :     do {
     733           0 :         cur = blocks[idx++];
     734           0 :         if (cur != prev) {
     735           0 :             if (!run)
     736           0 :                 bits++;
     737           0 :             else if (run < 0x10)
     738           0 :                 bits += 4;
     739             :             else
     740           0 :                 bits += 15;
     741           0 :             bits += est_alpha_diff(cur, prev, abits);
     742           0 :             prev = cur;
     743           0 :             run  = 0;
     744             :         } else {
     745           0 :             run++;
     746             :         }
     747           0 :     } while (idx < num_coeffs);
     748             : 
     749           0 :     if (run) {
     750           0 :         if (run < 0x10)
     751           0 :             bits += 4;
     752             :         else
     753           0 :             bits += 15;
     754             :     }
     755             : 
     756           0 :     return bits;
     757             : }
     758             : 
     759       11100 : static int find_slice_quant(AVCodecContext *avctx,
     760             :                             int trellis_node, int x, int y, int mbs_per_slice,
     761             :                             ProresThreadData *td)
     762             : {
     763       11100 :     ProresContext *ctx = avctx->priv_data;
     764             :     int i, q, pq, xp, yp;
     765             :     const uint16_t *src;
     766       11100 :     int slice_width_factor = av_log2(mbs_per_slice);
     767             :     int num_cblocks[MAX_PLANES], pwidth;
     768             :     int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
     769       11100 :     const int min_quant = ctx->profile_info->min_quant;
     770       11100 :     const int max_quant = ctx->profile_info->max_quant;
     771             :     int error, bits, bits_limit;
     772             :     int mbs, prev, cur, new_score;
     773             :     int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
     774             :     int overquant;
     775             :     uint16_t *qmat;
     776             :     int linesize[4], line_add;
     777             : 
     778       11100 :     if (ctx->pictures_per_frame == 1)
     779       11100 :         line_add = 0;
     780             :     else
     781           0 :         line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
     782       11100 :     mbs = x + mbs_per_slice;
     783             : 
     784       44400 :     for (i = 0; i < ctx->num_planes; i++) {
     785       33300 :         is_chroma[i]    = (i == 1 || i == 2);
     786       33300 :         plane_factor[i] = slice_width_factor + 2;
     787       33300 :         if (is_chroma[i])
     788       22200 :             plane_factor[i] += ctx->chroma_factor - 3;
     789       33300 :         if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
     790       11100 :             xp             = x << 4;
     791       11100 :             yp             = y << 4;
     792       11100 :             num_cblocks[i] = 4;
     793       11100 :             pwidth         = avctx->width;
     794             :         } else {
     795       22200 :             xp             = x << 3;
     796       22200 :             yp             = y << 4;
     797       22200 :             num_cblocks[i] = 2;
     798       22200 :             pwidth         = avctx->width >> 1;
     799             :         }
     800             : 
     801       33300 :         linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
     802       99900 :         src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
     803       66600 :                                  line_add * ctx->pic->linesize[i]) + xp;
     804             : 
     805       33300 :         if (i < 3) {
     806       66600 :             get_slice_data(ctx, src, linesize[i], xp, yp,
     807       33300 :                            pwidth, avctx->height / ctx->pictures_per_frame,
     808       33300 :                            td->blocks[i], td->emu_buf,
     809             :                            mbs_per_slice, num_cblocks[i], is_chroma[i]);
     810             :         } else {
     811           0 :             get_alpha_data(ctx, src, linesize[i], xp, yp,
     812           0 :                            pwidth, avctx->height / ctx->pictures_per_frame,
     813           0 :                            td->blocks[i], mbs_per_slice, ctx->alpha_bits);
     814             :         }
     815             :     }
     816             : 
     817       88800 :     for (q = min_quant; q < max_quant + 2; q++) {
     818       77700 :         td->nodes[trellis_node + q].prev_node = -1;
     819       77700 :         td->nodes[trellis_node + q].quant     = q;
     820             :     }
     821             : 
     822             :     // todo: maybe perform coarser quantising to fit into frame size when needed
     823       77700 :     for (q = min_quant; q <= max_quant; q++) {
     824       66600 :         bits  = 0;
     825       66600 :         error = 0;
     826      266400 :         for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
     827      399600 :             bits += estimate_slice_plane(ctx, &error, i,
     828      199800 :                                          src, linesize[i],
     829             :                                          mbs_per_slice,
     830             :                                          num_cblocks[i], plane_factor[i],
     831      199800 :                                          ctx->quants[q], td);
     832             :         }
     833       66600 :         if (ctx->alpha_bits)
     834           0 :             bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
     835           0 :                                          mbs_per_slice, q, td->blocks[3]);
     836       66600 :         if (bits > 65000 * 8)
     837           0 :             error = SCORE_LIMIT;
     838             : 
     839       66600 :         slice_bits[q]  = bits;
     840       66600 :         slice_score[q] = error;
     841             :     }
     842       11100 :     if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
     843        3643 :         slice_bits[max_quant + 1]  = slice_bits[max_quant];
     844        3643 :         slice_score[max_quant + 1] = slice_score[max_quant] + 1;
     845        3643 :         overquant = max_quant;
     846             :     } else {
     847       59962 :         for (q = max_quant + 1; q < 128; q++) {
     848       59962 :             bits  = 0;
     849       59962 :             error = 0;
     850       59962 :             if (q < MAX_STORED_Q) {
     851       44939 :                 qmat = ctx->quants[q];
     852             :             } else {
     853       15023 :                 qmat = td->custom_q;
     854      976495 :                 for (i = 0; i < 64; i++)
     855      961472 :                     qmat[i] = ctx->quant_mat[i] * q;
     856             :             }
     857      239848 :             for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
     858      359772 :                 bits += estimate_slice_plane(ctx, &error, i,
     859      179886 :                                              src, linesize[i],
     860             :                                              mbs_per_slice,
     861             :                                              num_cblocks[i], plane_factor[i],
     862             :                                              qmat, td);
     863             :             }
     864       59962 :             if (ctx->alpha_bits)
     865           0 :                 bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
     866           0 :                                              mbs_per_slice, q, td->blocks[3]);
     867       59962 :             if (bits <= ctx->bits_per_mb * mbs_per_slice)
     868        7457 :                 break;
     869             :         }
     870             : 
     871        7457 :         slice_bits[max_quant + 1]  = bits;
     872        7457 :         slice_score[max_quant + 1] = error;
     873        7457 :         overquant = q;
     874             :     }
     875       11100 :     td->nodes[trellis_node + max_quant + 1].quant = overquant;
     876             : 
     877       11100 :     bits_limit = mbs * ctx->bits_per_mb;
     878       88800 :     for (pq = min_quant; pq < max_quant + 2; pq++) {
     879       77700 :         prev = trellis_node - TRELLIS_WIDTH + pq;
     880             : 
     881      621600 :         for (q = min_quant; q < max_quant + 2; q++) {
     882      543900 :             cur = trellis_node + q;
     883             : 
     884      543900 :             bits  = td->nodes[prev].bits + slice_bits[q];
     885      543900 :             error = slice_score[q];
     886      543900 :             if (bits > bits_limit)
     887      438632 :                 error = SCORE_LIMIT;
     888             : 
     889      543900 :             if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
     890       98480 :                 new_score = td->nodes[prev].score + error;
     891             :             else
     892      445420 :                 new_score = SCORE_LIMIT;
     893     1010100 :             if (td->nodes[cur].prev_node == -1 ||
     894      466200 :                 td->nodes[cur].score >= new_score) {
     895             : 
     896      505564 :                 td->nodes[cur].bits      = bits;
     897      505564 :                 td->nodes[cur].score     = new_score;
     898      505564 :                 td->nodes[cur].prev_node = prev;
     899             :             }
     900             :         }
     901             :     }
     902             : 
     903       11100 :     error = td->nodes[trellis_node + min_quant].score;
     904       11100 :     pq    = trellis_node + min_quant;
     905       77700 :     for (q = min_quant + 1; q < max_quant + 2; q++) {
     906       66600 :         if (td->nodes[trellis_node + q].score <= error) {
     907       52033 :             error = td->nodes[trellis_node + q].score;
     908       52033 :             pq    = trellis_node + q;
     909             :         }
     910             :     }
     911             : 
     912       11100 :     return pq;
     913             : }
     914             : 
     915        2850 : static int find_quant_thread(AVCodecContext *avctx, void *arg,
     916             :                              int jobnr, int threadnr)
     917             : {
     918        2850 :     ProresContext *ctx = avctx->priv_data;
     919        2850 :     ProresThreadData *td = ctx->tdata + threadnr;
     920        2850 :     int mbs_per_slice = ctx->mbs_per_slice;
     921        2850 :     int x, y = jobnr, mb, q = 0;
     922             : 
     923       13950 :     for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
     924       28050 :         while (ctx->mb_width - x < mbs_per_slice)
     925        5850 :             mbs_per_slice >>= 1;
     926       11100 :         q = find_slice_quant(avctx,
     927       11100 :                              (mb + 1) * TRELLIS_WIDTH, x, y,
     928             :                              mbs_per_slice, td);
     929             :     }
     930             : 
     931       13950 :     for (x = ctx->slices_width - 1; x >= 0; x--) {
     932       11100 :         ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
     933       11100 :         q = td->nodes[q].prev_node;
     934             :     }
     935             : 
     936        2850 :     return 0;
     937             : }
     938             : 
     939         200 : static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     940             :                         const AVFrame *pic, int *got_packet)
     941             : {
     942         200 :     ProresContext *ctx = avctx->priv_data;
     943             :     uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
     944             :     uint8_t *picture_size_pos;
     945             :     PutBitContext pb;
     946         200 :     int x, y, i, mb, q = 0;
     947         200 :     int sizes[4] = { 0 };
     948         200 :     int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
     949             :     int frame_size, picture_size, slice_size;
     950             :     int pkt_size, ret;
     951         200 :     int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
     952             :     uint8_t frame_flags;
     953             : 
     954         200 :     ctx->pic = pic;
     955         200 :     pkt_size = ctx->frame_size_upper_bound;
     956             : 
     957         200 :     if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
     958           0 :         return ret;
     959             : 
     960         200 :     orig_buf = pkt->data;
     961             : 
     962             :     // frame atom
     963         200 :     orig_buf += 4;                              // frame size
     964         200 :     bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
     965         200 :     buf = orig_buf;
     966             : 
     967             :     // frame header
     968         200 :     tmp = buf;
     969         200 :     buf += 2;                                   // frame header size will be stored here
     970         200 :     bytestream_put_be16  (&buf, 0);             // version 1
     971         200 :     bytestream_put_buffer(&buf, ctx->vendor, 4);
     972         200 :     bytestream_put_be16  (&buf, avctx->width);
     973         200 :     bytestream_put_be16  (&buf, avctx->height);
     974             : 
     975         200 :     frame_flags = ctx->chroma_factor << 6;
     976         200 :     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
     977           0 :         frame_flags |= pic->top_field_first ? 0x04 : 0x08;
     978         200 :     bytestream_put_byte  (&buf, frame_flags);
     979             : 
     980         200 :     bytestream_put_byte  (&buf, 0);             // reserved
     981         200 :     bytestream_put_byte  (&buf, pic->color_primaries);
     982         200 :     bytestream_put_byte  (&buf, pic->color_trc);
     983         200 :     bytestream_put_byte  (&buf, pic->colorspace);
     984         200 :     bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
     985         200 :     bytestream_put_byte  (&buf, 0);             // reserved
     986         200 :     if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
     987         200 :         bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
     988             :         // luma quantisation matrix
     989       13000 :         for (i = 0; i < 64; i++)
     990       12800 :             bytestream_put_byte(&buf, ctx->quant_mat[i]);
     991             :         // chroma quantisation matrix
     992       13000 :         for (i = 0; i < 64; i++)
     993       12800 :             bytestream_put_byte(&buf, ctx->quant_mat[i]);
     994             :     } else {
     995           0 :         bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
     996             :     }
     997         200 :     bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
     998             : 
     999         600 :     for (ctx->cur_picture_idx = 0;
    1000         400 :          ctx->cur_picture_idx < ctx->pictures_per_frame;
    1001         200 :          ctx->cur_picture_idx++) {
    1002             :         // picture header
    1003         200 :         picture_size_pos = buf + 1;
    1004         200 :         bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
    1005         200 :         buf += 4;                                   // picture data size will be stored here
    1006         200 :         bytestream_put_be16  (&buf, ctx->slices_per_picture);
    1007         200 :         bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
    1008             : 
    1009             :         // seek table - will be filled during slice encoding
    1010         200 :         slice_sizes = buf;
    1011         200 :         buf += ctx->slices_per_picture * 2;
    1012             : 
    1013             :         // slices
    1014         200 :         if (!ctx->force_quant) {
    1015         200 :             ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
    1016             :                                   ctx->mb_height);
    1017         200 :             if (ret)
    1018           0 :                 return ret;
    1019             :         }
    1020             : 
    1021        3050 :         for (y = 0; y < ctx->mb_height; y++) {
    1022        2850 :             int mbs_per_slice = ctx->mbs_per_slice;
    1023       13950 :             for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
    1024       22200 :                 q = ctx->force_quant ? ctx->force_quant
    1025       11100 :                                      : ctx->slice_q[mb + y * ctx->slices_width];
    1026             : 
    1027       28050 :                 while (ctx->mb_width - x < mbs_per_slice)
    1028        5850 :                     mbs_per_slice >>= 1;
    1029             : 
    1030       11100 :                 bytestream_put_byte(&buf, slice_hdr_size << 3);
    1031       11100 :                 slice_hdr = buf;
    1032       11100 :                 buf += slice_hdr_size - 1;
    1033       11100 :                 if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
    1034           0 :                     uint8_t *start = pkt->data;
    1035             :                     // Recompute new size according to max_slice_size
    1036             :                     // and deduce delta
    1037           0 :                     int delta = 200 + (ctx->pictures_per_frame *
    1038           0 :                                 ctx->slices_per_picture + 1) *
    1039             :                                 max_slice_size - pkt_size;
    1040             : 
    1041           0 :                     delta = FFMAX(delta, 2 * max_slice_size);
    1042           0 :                     ctx->frame_size_upper_bound += delta;
    1043             : 
    1044           0 :                     if (!ctx->warn) {
    1045           0 :                         avpriv_request_sample(avctx,
    1046             :                                               "Packet too small: is %i,"
    1047             :                                               " needs %i (slice: %i). "
    1048             :                                               "Correct allocation",
    1049             :                                               pkt_size, delta, max_slice_size);
    1050           0 :                         ctx->warn = 1;
    1051             :                     }
    1052             : 
    1053           0 :                     ret = av_grow_packet(pkt, delta);
    1054           0 :                     if (ret < 0)
    1055           0 :                         return ret;
    1056             : 
    1057           0 :                     pkt_size += delta;
    1058             :                     // restore pointers
    1059           0 :                     orig_buf         = pkt->data + (orig_buf         - start);
    1060           0 :                     buf              = pkt->data + (buf              - start);
    1061           0 :                     picture_size_pos = pkt->data + (picture_size_pos - start);
    1062           0 :                     slice_sizes      = pkt->data + (slice_sizes      - start);
    1063           0 :                     slice_hdr        = pkt->data + (slice_hdr        - start);
    1064           0 :                     tmp              = pkt->data + (tmp              - start);
    1065             :                 }
    1066       11100 :                 init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
    1067       11100 :                 ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
    1068             :                                    mbs_per_slice);
    1069       11100 :                 if (ret < 0)
    1070           0 :                     return ret;
    1071             : 
    1072       11100 :                 bytestream_put_byte(&slice_hdr, q);
    1073       11100 :                 slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
    1074       33300 :                 for (i = 0; i < ctx->num_planes - 1; i++) {
    1075       22200 :                     bytestream_put_be16(&slice_hdr, sizes[i]);
    1076       22200 :                     slice_size += sizes[i];
    1077             :                 }
    1078       11100 :                 bytestream_put_be16(&slice_sizes, slice_size);
    1079       11100 :                 buf += slice_size - slice_hdr_size;
    1080       11100 :                 if (max_slice_size < slice_size)
    1081         216 :                     max_slice_size = slice_size;
    1082             :             }
    1083             :         }
    1084             : 
    1085         200 :         picture_size = buf - (picture_size_pos - 1);
    1086         200 :         bytestream_put_be32(&picture_size_pos, picture_size);
    1087             :     }
    1088             : 
    1089         200 :     orig_buf -= 8;
    1090         200 :     frame_size = buf - orig_buf;
    1091         200 :     bytestream_put_be32(&orig_buf, frame_size);
    1092             : 
    1093         200 :     pkt->size   = frame_size;
    1094         200 :     pkt->flags |= AV_PKT_FLAG_KEY;
    1095         200 :     *got_packet = 1;
    1096             : 
    1097         200 :     return 0;
    1098             : }
    1099             : 
    1100           4 : static av_cold int encode_close(AVCodecContext *avctx)
    1101             : {
    1102           4 :     ProresContext *ctx = avctx->priv_data;
    1103             :     int i;
    1104             : 
    1105           4 :     if (ctx->tdata) {
    1106           8 :         for (i = 0; i < avctx->thread_count; i++)
    1107           4 :             av_freep(&ctx->tdata[i].nodes);
    1108             :     }
    1109           4 :     av_freep(&ctx->tdata);
    1110           4 :     av_freep(&ctx->slice_q);
    1111             : 
    1112           4 :     return 0;
    1113             : }
    1114             : 
    1115      957600 : static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
    1116             :                         ptrdiff_t linesize, int16_t *block)
    1117             : {
    1118             :     int x, y;
    1119      957600 :     const uint16_t *tsrc = src;
    1120             : 
    1121     8618400 :     for (y = 0; y < 8; y++) {
    1122    68947200 :         for (x = 0; x < 8; x++)
    1123    61286400 :             block[y * 8 + x] = tsrc[x];
    1124     7660800 :         tsrc += linesize >> 1;
    1125             :     }
    1126      957600 :     fdsp->fdct(block);
    1127      957600 : }
    1128             : 
    1129           4 : static av_cold int encode_init(AVCodecContext *avctx)
    1130             : {
    1131           4 :     ProresContext *ctx = avctx->priv_data;
    1132             :     int mps;
    1133             :     int i, j;
    1134             :     int min_quant, max_quant;
    1135           4 :     int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
    1136             : 
    1137           4 :     avctx->bits_per_raw_sample = 10;
    1138             : #if FF_API_CODED_FRAME
    1139             : FF_DISABLE_DEPRECATION_WARNINGS
    1140           4 :     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
    1141           4 :     avctx->coded_frame->key_frame = 1;
    1142             : FF_ENABLE_DEPRECATION_WARNINGS
    1143             : #endif
    1144             : 
    1145           4 :     ctx->fdct      = prores_fdct;
    1146           4 :     ctx->scantable = interlaced ? ff_prores_interlaced_scan
    1147           4 :                                 : ff_prores_progressive_scan;
    1148           4 :     ff_fdctdsp_init(&ctx->fdsp, avctx);
    1149             : 
    1150           4 :     mps = ctx->mbs_per_slice;
    1151           4 :     if (mps & (mps - 1)) {
    1152           0 :         av_log(avctx, AV_LOG_ERROR,
    1153             :                "there should be an integer power of two MBs per slice\n");
    1154           0 :         return AVERROR(EINVAL);
    1155             :     }
    1156           4 :     if (ctx->profile == PRORES_PROFILE_AUTO) {
    1157           0 :         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
    1158           0 :         ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
    1159           0 :                         !(desc->log2_chroma_w + desc->log2_chroma_h))
    1160           0 :                      ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
    1161           0 :         av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
    1162           0 :                "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
    1163             :                ? "4:4:4:4 profile because of the used input colorspace"
    1164             :                : "HQ profile to keep best quality");
    1165             :     }
    1166           4 :     if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
    1167           0 :         if (ctx->profile != PRORES_PROFILE_4444 &&
    1168           0 :             ctx->profile != PRORES_PROFILE_4444XQ) {
    1169             :             // force alpha and warn
    1170           0 :             av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
    1171             :                    "encode alpha. Override with -profile if needed.\n");
    1172           0 :             ctx->alpha_bits = 0;
    1173             :         }
    1174           0 :         if (ctx->alpha_bits & 7) {
    1175           0 :             av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
    1176           0 :             return AVERROR(EINVAL);
    1177             :         }
    1178           0 :         avctx->bits_per_coded_sample = 32;
    1179             :     } else {
    1180           4 :         ctx->alpha_bits = 0;
    1181             :     }
    1182             : 
    1183           8 :     ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
    1184             :                          ? CFACTOR_Y422
    1185           4 :                          : CFACTOR_Y444;
    1186           4 :     ctx->profile_info  = prores_profile_info + ctx->profile;
    1187           4 :     ctx->num_planes    = 3 + !!ctx->alpha_bits;
    1188             : 
    1189           4 :     ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
    1190             : 
    1191           4 :     if (interlaced)
    1192           0 :         ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
    1193             :     else
    1194           4 :         ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
    1195             : 
    1196           4 :     ctx->slices_width  = ctx->mb_width / mps;
    1197           4 :     ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
    1198           4 :     ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
    1199           4 :     ctx->pictures_per_frame = 1 + interlaced;
    1200             : 
    1201           4 :     if (ctx->quant_sel == -1)
    1202           4 :         ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
    1203             :     else
    1204           0 :         ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
    1205             : 
    1206           4 :     if (strlen(ctx->vendor) != 4) {
    1207           0 :         av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
    1208           0 :         return AVERROR_INVALIDDATA;
    1209             :     }
    1210             : 
    1211           4 :     ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
    1212           4 :     if (!ctx->force_quant) {
    1213           4 :         if (!ctx->bits_per_mb) {
    1214           4 :             for (i = 0; i < NUM_MB_LIMITS - 1; i++)
    1215           8 :                 if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
    1216           4 :                                            ctx->pictures_per_frame)
    1217           4 :                     break;
    1218           4 :             ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
    1219           4 :             if (ctx->alpha_bits)
    1220           0 :                 ctx->bits_per_mb *= 20;
    1221           0 :         } else if (ctx->bits_per_mb < 128) {
    1222           0 :             av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
    1223           0 :             return AVERROR_INVALIDDATA;
    1224             :         }
    1225             : 
    1226           4 :         min_quant = ctx->profile_info->min_quant;
    1227           4 :         max_quant = ctx->profile_info->max_quant;
    1228          64 :         for (i = min_quant; i < MAX_STORED_Q; i++) {
    1229        3900 :             for (j = 0; j < 64; j++)
    1230        3840 :                 ctx->quants[i][j] = ctx->quant_mat[j] * i;
    1231             :         }
    1232             : 
    1233           4 :         ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
    1234           4 :         if (!ctx->slice_q) {
    1235           0 :             encode_close(avctx);
    1236           0 :             return AVERROR(ENOMEM);
    1237             :         }
    1238             : 
    1239           4 :         ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
    1240           4 :         if (!ctx->tdata) {
    1241           0 :             encode_close(avctx);
    1242           0 :             return AVERROR(ENOMEM);
    1243             :         }
    1244             : 
    1245           8 :         for (j = 0; j < avctx->thread_count; j++) {
    1246           4 :             ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
    1247             :                                             * TRELLIS_WIDTH
    1248             :                                             * sizeof(*ctx->tdata->nodes));
    1249           4 :             if (!ctx->tdata[j].nodes) {
    1250           0 :                 encode_close(avctx);
    1251           0 :                 return AVERROR(ENOMEM);
    1252             :             }
    1253          32 :             for (i = min_quant; i < max_quant + 2; i++) {
    1254          28 :                 ctx->tdata[j].nodes[i].prev_node = -1;
    1255          28 :                 ctx->tdata[j].nodes[i].bits      = 0;
    1256          28 :                 ctx->tdata[j].nodes[i].score     = 0;
    1257             :             }
    1258             :         }
    1259             :     } else {
    1260           0 :         int ls = 0;
    1261             : 
    1262           0 :         if (ctx->force_quant > 64) {
    1263           0 :             av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
    1264           0 :             return AVERROR_INVALIDDATA;
    1265             :         }
    1266             : 
    1267           0 :         for (j = 0; j < 64; j++) {
    1268           0 :             ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
    1269           0 :             ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
    1270             :         }
    1271             : 
    1272           0 :         ctx->bits_per_mb = ls * 8;
    1273           0 :         if (ctx->chroma_factor == CFACTOR_Y444)
    1274           0 :             ctx->bits_per_mb += ls * 4;
    1275             :     }
    1276             : 
    1277          12 :     ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
    1278          12 :                                    ctx->slices_per_picture + 1) *
    1279           8 :                                   (2 + 2 * ctx->num_planes +
    1280           4 :                                    (mps * ctx->bits_per_mb) / 8)
    1281           4 :                                   + 200;
    1282             : 
    1283           4 :     if (ctx->alpha_bits) {
    1284             :          // The alpha plane is run-coded and might exceed the bit budget.
    1285           0 :          ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
    1286           0 :                                          ctx->slices_per_picture + 1) *
    1287           0 :          /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
    1288           0 :          /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
    1289             :     }
    1290             : 
    1291           4 :     avctx->codec_tag   = ctx->profile_info->tag;
    1292             : 
    1293           8 :     av_log(avctx, AV_LOG_DEBUG,
    1294             :            "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
    1295           4 :            ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
    1296             :            interlaced ? "yes" : "no", ctx->bits_per_mb);
    1297           4 :     av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
    1298             :            ctx->frame_size_upper_bound);
    1299             : 
    1300           4 :     return 0;
    1301             : }
    1302             : 
    1303             : #define OFFSET(x) offsetof(ProresContext, x)
    1304             : #define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
    1305             : 
    1306             : static const AVOption options[] = {
    1307             :     { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
    1308             :         AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
    1309             :     { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
    1310             :         { .i64 = PRORES_PROFILE_AUTO },
    1311             :         PRORES_PROFILE_AUTO, PRORES_PROFILE_4444XQ, VE, "profile" },
    1312             :     { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
    1313             :         0, 0, VE, "profile" },
    1314             :     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
    1315             :         0, 0, VE, "profile" },
    1316             :     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
    1317             :         0, 0, VE, "profile" },
    1318             :     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
    1319             :         0, 0, VE, "profile" },
    1320             :     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
    1321             :         0, 0, VE, "profile" },
    1322             :     { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
    1323             :         0, 0, VE, "profile" },
    1324             :     { "4444xq",        NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444XQ },
    1325             :         0, 0, VE, "profile" },
    1326             :     { "vendor", "vendor ID", OFFSET(vendor),
    1327             :         AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
    1328             :     { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
    1329             :         AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
    1330             :     { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
    1331             :         { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
    1332             :     { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
    1333             :         0, 0, VE, "quant_mat" },
    1334             :     { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
    1335             :         0, 0, VE, "quant_mat" },
    1336             :     { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
    1337             :         0, 0, VE, "quant_mat" },
    1338             :     { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
    1339             :         0, 0, VE, "quant_mat" },
    1340             :     { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
    1341             :         0, 0, VE, "quant_mat" },
    1342             :     { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
    1343             :         0, 0, VE, "quant_mat" },
    1344             :     { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
    1345             :         { .i64 = 16 }, 0, 16, VE },
    1346             :     { NULL }
    1347             : };
    1348             : 
    1349             : static const AVClass proresenc_class = {
    1350             :     .class_name = "ProRes encoder",
    1351             :     .item_name  = av_default_item_name,
    1352             :     .option     = options,
    1353             :     .version    = LIBAVUTIL_VERSION_INT,
    1354             : };
    1355             : 
    1356             : AVCodec ff_prores_ks_encoder = {
    1357             :     .name           = "prores_ks",
    1358             :     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
    1359             :     .type           = AVMEDIA_TYPE_VIDEO,
    1360             :     .id             = AV_CODEC_ID_PRORES,
    1361             :     .priv_data_size = sizeof(ProresContext),
    1362             :     .init           = encode_init,
    1363             :     .close          = encode_close,
    1364             :     .encode2        = encode_frame,
    1365             :     .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
    1366             :     .pix_fmts       = (const enum AVPixelFormat[]) {
    1367             :                           AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
    1368             :                           AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
    1369             :                       },
    1370             :     .priv_class     = &proresenc_class,
    1371             : };

Generated by: LCOV version 1.13