LCOV - code coverage report
Current view: top level - libavcodec - dnxhdenc.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 681 801 85.0 %
Date: 2017-12-11 04:34:20 Functions: 29 30 96.7 %

          Line data    Source code
       1             : /*
       2             :  * VC3/DNxHD encoder
       3             :  * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
       4             :  * Copyright (c) 2011 MirriAd Ltd
       5             :  *
       6             :  * VC-3 encoder funded by the British Broadcasting Corporation
       7             :  * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
       8             :  *
       9             :  * This file is part of FFmpeg.
      10             :  *
      11             :  * FFmpeg is free software; you can redistribute it and/or
      12             :  * modify it under the terms of the GNU Lesser General Public
      13             :  * License as published by the Free Software Foundation; either
      14             :  * version 2.1 of the License, or (at your option) any later version.
      15             :  *
      16             :  * FFmpeg is distributed in the hope that it will be useful,
      17             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      18             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      19             :  * Lesser General Public License for more details.
      20             :  *
      21             :  * You should have received a copy of the GNU Lesser General Public
      22             :  * License along with FFmpeg; if not, write to the Free Software
      23             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      24             :  */
      25             : 
      26             : #include "libavutil/attributes.h"
      27             : #include "libavutil/internal.h"
      28             : #include "libavutil/opt.h"
      29             : #include "libavutil/timer.h"
      30             : 
      31             : #include "avcodec.h"
      32             : #include "blockdsp.h"
      33             : #include "fdctdsp.h"
      34             : #include "internal.h"
      35             : #include "mpegvideo.h"
      36             : #include "pixblockdsp.h"
      37             : #include "profiles.h"
      38             : #include "dnxhdenc.h"
      39             : 
      40             : // The largest value that will not lead to overflow for 10-bit samples.
      41             : #define DNX10BIT_QMAT_SHIFT 18
      42             : #define RC_VARIANCE 1 // use variance or ssd for fast rc
      43             : #define LAMBDA_FRAC_BITS 10
      44             : 
      45             : #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
      46             : static const AVOption options[] = {
      47             :     { "nitris_compat", "encode with Avid Nitris compatibility",
      48             :         offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
      49             :     { "ibias", "intra quant bias",
      50             :         offsetof(DNXHDEncContext, intra_quant_bias), AV_OPT_TYPE_INT,
      51             :         { .i64 = 0 }, INT_MIN, INT_MAX, VE },
      52             :     { "profile",       NULL, offsetof(DNXHDEncContext, profile), AV_OPT_TYPE_INT,
      53             :         { .i64 = FF_PROFILE_DNXHD },
      54             :         FF_PROFILE_DNXHD, FF_PROFILE_DNXHR_444, VE, "profile" },
      55             :     { "dnxhd",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHD },
      56             :         0, 0, VE, "profile" },
      57             :     { "dnxhr_444", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_444 },
      58             :         0, 0, VE, "profile" },
      59             :     { "dnxhr_hqx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQX },
      60             :         0, 0, VE, "profile" },
      61             :     { "dnxhr_hq",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_HQ },
      62             :         0, 0, VE, "profile" },
      63             :     { "dnxhr_sq",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_SQ },
      64             :         0, 0, VE, "profile" },
      65             :     { "dnxhr_lb",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_DNXHR_LB },
      66             :         0, 0, VE, "profile" },
      67             :     { NULL }
      68             : };
      69             : 
      70             : static const AVClass dnxhd_class = {
      71             :     .class_name = "dnxhd",
      72             :     .item_name  = av_default_item_name,
      73             :     .option     = options,
      74             :     .version    = LIBAVUTIL_VERSION_INT,
      75             : };
      76             : 
      77       97920 : static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block,
      78             :                                           const uint8_t *pixels,
      79             :                                           ptrdiff_t line_size)
      80             : {
      81             :     int i;
      82      489600 :     for (i = 0; i < 4; i++) {
      83      391680 :         block[0] = pixels[0];
      84      391680 :         block[1] = pixels[1];
      85      391680 :         block[2] = pixels[2];
      86      391680 :         block[3] = pixels[3];
      87      391680 :         block[4] = pixels[4];
      88      391680 :         block[5] = pixels[5];
      89      391680 :         block[6] = pixels[6];
      90      391680 :         block[7] = pixels[7];
      91      391680 :         pixels  += line_size;
      92      391680 :         block   += 8;
      93             :     }
      94       97920 :     memcpy(block,      block -  8, sizeof(*block) * 8);
      95       97920 :     memcpy(block +  8, block - 16, sizeof(*block) * 8);
      96       97920 :     memcpy(block + 16, block - 24, sizeof(*block) * 8);
      97       97920 :     memcpy(block + 24, block - 32, sizeof(*block) * 8);
      98       97920 : }
      99             : 
     100             : static av_always_inline
     101       43200 : void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block,
     102             :                                     const uint8_t *pixels,
     103             :                                     ptrdiff_t line_size)
     104             : {
     105       43200 :     memcpy(block + 0 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
     106       43200 :     memcpy(block + 7 * 8, pixels + 0 * line_size, 8 * sizeof(*block));
     107       43200 :     memcpy(block + 1 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
     108       43200 :     memcpy(block + 6 * 8, pixels + 1 * line_size, 8 * sizeof(*block));
     109       43200 :     memcpy(block + 2 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
     110       43200 :     memcpy(block + 5 * 8, pixels + 2 * line_size, 8 * sizeof(*block));
     111       43200 :     memcpy(block + 3 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
     112       43200 :     memcpy(block + 4 * 8, pixels + 3 * line_size, 8 * sizeof(*block));
     113       43200 : }
     114             : 
     115           0 : static int dnxhd_10bit_dct_quantize_444(MpegEncContext *ctx, int16_t *block,
     116             :                                         int n, int qscale, int *overflow)
     117             : {
     118             :     int i, j, level, last_non_zero, start_i;
     119             :     const int *qmat;
     120           0 :     const uint8_t *scantable= ctx->intra_scantable.scantable;
     121             :     int bias;
     122           0 :     int max = 0;
     123             :     unsigned int threshold1, threshold2;
     124             : 
     125           0 :     ctx->fdsp.fdct(block);
     126             : 
     127           0 :     block[0] = (block[0] + 2) >> 2;
     128           0 :     start_i = 1;
     129           0 :     last_non_zero = 0;
     130           0 :     qmat = n < 4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
     131           0 :     bias= ctx->intra_quant_bias * (1 << (16 - 8));
     132           0 :     threshold1 = (1 << 16) - bias - 1;
     133           0 :     threshold2 = (threshold1 << 1);
     134             : 
     135           0 :     for (i = 63; i >= start_i; i--) {
     136           0 :         j = scantable[i];
     137           0 :         level = block[j] * qmat[j];
     138             : 
     139           0 :         if (((unsigned)(level + threshold1)) > threshold2) {
     140           0 :             last_non_zero = i;
     141           0 :             break;
     142             :         } else{
     143           0 :             block[j]=0;
     144             :         }
     145             :     }
     146             : 
     147           0 :     for (i = start_i; i <= last_non_zero; i++) {
     148           0 :         j = scantable[i];
     149           0 :         level = block[j] * qmat[j];
     150             : 
     151           0 :         if (((unsigned)(level + threshold1)) > threshold2) {
     152           0 :             if (level > 0) {
     153           0 :                 level = (bias + level) >> 16;
     154           0 :                 block[j] = level;
     155             :             } else{
     156           0 :                 level = (bias - level) >> 16;
     157           0 :                 block[j] = -level;
     158             :             }
     159           0 :             max |= level;
     160             :         } else {
     161           0 :             block[j] = 0;
     162             :         }
     163             :     }
     164           0 :     *overflow = ctx->max_qcoeff < max; //overflow might have happened
     165             : 
     166             :     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
     167           0 :     if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
     168           0 :         ff_block_permute(block, ctx->idsp.idct_permutation,
     169             :                          scantable, last_non_zero);
     170             : 
     171           0 :     return last_non_zero;
     172             : }
     173             : 
     174     3945600 : static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
     175             :                                     int n, int qscale, int *overflow)
     176             : {
     177     3945600 :     const uint8_t *scantable= ctx->intra_scantable.scantable;
     178     3945600 :     const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
     179     3945600 :     int last_non_zero = 0;
     180             :     int i;
     181             : 
     182     3945600 :     ctx->fdsp.fdct(block);
     183             : 
     184             :     // Divide by 4 with rounding, to compensate scaling of DCT coefficients
     185     3945600 :     block[0] = (block[0] + 2) >> 2;
     186             : 
     187   252518400 :     for (i = 1; i < 64; ++i) {
     188   248572800 :         int j = scantable[i];
     189   248572800 :         int sign = FF_SIGNBIT(block[j]);
     190   248572800 :         int level = (block[j] ^ sign) - sign;
     191   248572800 :         level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
     192   248572800 :         block[j] = (level ^ sign) - sign;
     193   248572800 :         if (level)
     194    32824223 :             last_non_zero = i;
     195             :     }
     196             : 
     197             :     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
     198     3945600 :     if (ctx->idsp.perm_type != FF_IDCT_PERM_NONE)
     199           0 :         ff_block_permute(block, ctx->idsp.idct_permutation,
     200             :                          scantable, last_non_zero);
     201             : 
     202     3945600 :     return last_non_zero;
     203             : }
     204             : 
     205          73 : static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
     206             : {
     207             :     int i, j, level, run;
     208          73 :     int max_level = 1 << (ctx->bit_depth + 2);
     209             : 
     210          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->vlc_codes,
     211             :                       max_level, 4 * sizeof(*ctx->vlc_codes), fail);
     212          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->vlc_bits,
     213             :                       max_level, 4 * sizeof(*ctx->vlc_bits), fail);
     214          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes,
     215             :                       63 * 2, fail);
     216          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits,
     217             :                       63, fail);
     218             : 
     219          73 :     ctx->vlc_codes += max_level * 2;
     220          73 :     ctx->vlc_bits  += max_level * 2;
     221      192585 :     for (level = -max_level; level < max_level; level++) {
     222      577536 :         for (run = 0; run < 2; run++) {
     223      385024 :             int index = (level << 1) | run;
     224      385024 :             int sign, offset = 0, alevel = level;
     225             : 
     226      385024 :             MASK_ABS(sign, alevel);
     227      385024 :             if (alevel > 64) {
     228      366190 :                 offset  = (alevel - 1) >> 6;
     229      366190 :                 alevel -= offset << 6;
     230             :             }
     231    65137020 :             for (j = 0; j < 257; j++) {
     232    65136947 :                 if (ctx->cid_table->ac_info[2*j+0] >> 1 == alevel &&
     233     1163352 :                     (!offset || (ctx->cid_table->ac_info[2*j+1] & 1) && offset) &&
     234      390917 :                     (!run    || (ctx->cid_table->ac_info[2*j+1] & 2) && run)) {
     235             :                     av_assert1(!ctx->vlc_codes[index]);
     236      384951 :                     if (alevel) {
     237      769756 :                         ctx->vlc_codes[index] =
     238      769756 :                             (ctx->cid_table->ac_codes[j] << 1) | (sign & 1);
     239      384878 :                         ctx->vlc_bits[index] = ctx->cid_table->ac_bits[j] + 1;
     240             :                     } else {
     241          73 :                         ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j];
     242          73 :                         ctx->vlc_bits[index]  = ctx->cid_table->ac_bits[j];
     243             :                     }
     244      384951 :                     break;
     245             :                 }
     246             :             }
     247      385024 :             av_assert0(!alevel || j < 257);
     248      385024 :             if (offset) {
     249      732380 :                 ctx->vlc_codes[index] =
     250      732380 :                     (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset;
     251      366190 :                 ctx->vlc_bits[index] += ctx->cid_table->index_bits;
     252             :             }
     253             :         }
     254             :     }
     255        4599 :     for (i = 0; i < 62; i++) {
     256        4526 :         int run = ctx->cid_table->run[i];
     257        4526 :         av_assert0(run < 63);
     258        4526 :         ctx->run_codes[run] = ctx->cid_table->run_codes[i];
     259        4526 :         ctx->run_bits[run]  = ctx->cid_table->run_bits[i];
     260             :     }
     261          73 :     return 0;
     262           0 : fail:
     263           0 :     return AVERROR(ENOMEM);
     264             : }
     265             : 
     266          73 : static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
     267             : {
     268             :     // init first elem to 1 to avoid div by 0 in convert_matrix
     269          73 :     uint16_t weight_matrix[64] = { 1, }; // convert_matrix needs uint16_t*
     270             :     int qscale, i;
     271          73 :     const uint8_t *luma_weight_table   = ctx->cid_table->luma_weight;
     272          73 :     const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
     273             : 
     274          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l,
     275             :                       (ctx->m.avctx->qmax + 1), 64 * sizeof(int), fail);
     276          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c,
     277             :                       (ctx->m.avctx->qmax + 1), 64 * sizeof(int), fail);
     278          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16,
     279             :                       (ctx->m.avctx->qmax + 1), 64 * 2 * sizeof(uint16_t),
     280             :                       fail);
     281          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16,
     282             :                       (ctx->m.avctx->qmax + 1), 64 * 2 * sizeof(uint16_t),
     283             :                       fail);
     284             : 
     285          73 :     if (ctx->bit_depth == 8) {
     286        4224 :         for (i = 1; i < 64; i++) {
     287        4158 :             int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
     288        4158 :             weight_matrix[j] = ctx->cid_table->luma_weight[i];
     289             :         }
     290          66 :         ff_convert_matrix(&ctx->m, ctx->qmatrix_l, ctx->qmatrix_l16,
     291             :                           weight_matrix, ctx->intra_quant_bias, 1,
     292          66 :                           ctx->m.avctx->qmax, 1);
     293        4224 :         for (i = 1; i < 64; i++) {
     294        4158 :             int j = ctx->m.idsp.idct_permutation[ff_zigzag_direct[i]];
     295        4158 :             weight_matrix[j] = ctx->cid_table->chroma_weight[i];
     296             :         }
     297          66 :         ff_convert_matrix(&ctx->m, ctx->qmatrix_c, ctx->qmatrix_c16,
     298             :                           weight_matrix, ctx->intra_quant_bias, 1,
     299          66 :                           ctx->m.avctx->qmax, 1);
     300             : 
     301       42250 :         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
     302     2741960 :             for (i = 0; i < 64; i++) {
     303     2699776 :                 ctx->qmatrix_l[qscale][i]      <<= 2;
     304     2699776 :                 ctx->qmatrix_c[qscale][i]      <<= 2;
     305     2699776 :                 ctx->qmatrix_l16[qscale][0][i] <<= 2;
     306     2699776 :                 ctx->qmatrix_l16[qscale][1][i] <<= 2;
     307     2699776 :                 ctx->qmatrix_c16[qscale][0][i] <<= 2;
     308     2699776 :                 ctx->qmatrix_c16[qscale][1][i] <<= 2;
     309             :             }
     310             :         }
     311             :     } else {
     312             :         // 10-bit
     313          63 :         for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
     314        3584 :             for (i = 1; i < 64; i++) {
     315        3528 :                 int j = ff_zigzag_direct[i];
     316             : 
     317             :                 /* The quantization formula from the VC-3 standard is:
     318             :                  * quantized = sign(block[i]) * floor(abs(block[i]/s) * p /
     319             :                  *             (qscale * weight_table[i]))
     320             :                  * Where p is 32 for 8-bit samples and 8 for 10-bit ones.
     321             :                  * The s factor compensates scaling of DCT coefficients done by
     322             :                  * the DCT routines, and therefore is not present in standard.
     323             :                  * It's 8 for 8-bit samples and 4 for 10-bit ones.
     324             :                  * We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
     325             :                  *     ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) /
     326             :                  *     (qscale * weight_table[i])
     327             :                  * For 10-bit samples, p / s == 2 */
     328        3528 :                 ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
     329        3528 :                                             (qscale * luma_weight_table[i]);
     330        3528 :                 ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) /
     331        3528 :                                             (qscale * chroma_weight_table[i]);
     332             :             }
     333             :         }
     334             :     }
     335             : 
     336          73 :     ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16;
     337          73 :     ctx->m.q_chroma_intra_matrix   = ctx->qmatrix_c;
     338          73 :     ctx->m.q_intra_matrix16        = ctx->qmatrix_l16;
     339          73 :     ctx->m.q_intra_matrix          = ctx->qmatrix_l;
     340             : 
     341          73 :     return 0;
     342           0 : fail:
     343           0 :     return AVERROR(ENOMEM);
     344             : }
     345             : 
     346          73 : static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx)
     347             : {
     348          73 :     FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->mb_rc, (ctx->m.avctx->qmax + 1),
     349             :                           ctx->m.mb_num * sizeof(RCEntry), fail);
     350          73 :     if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) {
     351          58 :         FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->mb_cmp,
     352             :                           ctx->m.mb_num, sizeof(RCCMPEntry), fail);
     353          58 :         FF_ALLOCZ_ARRAY_OR_GOTO(ctx->m.avctx, ctx->mb_cmp_tmp,
     354             :                           ctx->m.mb_num, sizeof(RCCMPEntry), fail);
     355             :     }
     356         219 :     ctx->frame_bits = (ctx->coding_unit_size -
     357         146 :                        ctx->data_offset - 4 - ctx->min_padding) * 8;
     358          73 :     ctx->qscale = 1;
     359          73 :     ctx->lambda = 2 << LAMBDA_FRAC_BITS; // qscale 2
     360          73 :     return 0;
     361           0 : fail:
     362           0 :     return AVERROR(ENOMEM);
     363             : }
     364             : 
     365          73 : static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     366             : {
     367          73 :     DNXHDEncContext *ctx = avctx->priv_data;
     368             :     int i, index, ret;
     369             : 
     370          73 :     switch (avctx->pix_fmt) {
     371          66 :     case AV_PIX_FMT_YUV422P:
     372          66 :         ctx->bit_depth = 8;
     373          66 :         break;
     374           7 :     case AV_PIX_FMT_YUV422P10:
     375             :     case AV_PIX_FMT_YUV444P10:
     376             :     case AV_PIX_FMT_GBRP10:
     377           7 :         ctx->bit_depth = 10;
     378           7 :         break;
     379           0 :     default:
     380           0 :         av_log(avctx, AV_LOG_ERROR,
     381             :                "pixel format is incompatible with DNxHD\n");
     382           0 :         return AVERROR(EINVAL);
     383             :     }
     384             : 
     385          73 :     if ((ctx->profile == FF_PROFILE_DNXHR_444 && (avctx->pix_fmt != AV_PIX_FMT_YUV444P10 &&
     386          73 :                                                   avctx->pix_fmt != AV_PIX_FMT_GBRP10)) ||
     387         219 :         (ctx->profile != FF_PROFILE_DNXHR_444 && (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 ||
     388          73 :                                                   avctx->pix_fmt == AV_PIX_FMT_GBRP10))) {
     389           0 :         av_log(avctx, AV_LOG_ERROR,
     390             :                "pixel format is incompatible with DNxHD profile\n");
     391           0 :         return AVERROR(EINVAL);
     392             :     }
     393             : 
     394          73 :     if (ctx->profile == FF_PROFILE_DNXHR_HQX && avctx->pix_fmt != AV_PIX_FMT_YUV422P10) {
     395           0 :         av_log(avctx, AV_LOG_ERROR,
     396             :                "pixel format is incompatible with DNxHR HQX profile\n");
     397           0 :         return AVERROR(EINVAL);
     398             :     }
     399             : 
     400         134 :     if ((ctx->profile == FF_PROFILE_DNXHR_LB ||
     401         114 :          ctx->profile == FF_PROFILE_DNXHR_SQ ||
     402          93 :          ctx->profile == FF_PROFILE_DNXHR_HQ) && avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
     403           0 :         av_log(avctx, AV_LOG_ERROR,
     404             :                "pixel format is incompatible with DNxHR LB/SQ/HQ profile\n");
     405           0 :         return AVERROR(EINVAL);
     406             :     }
     407             : 
     408          73 :     ctx->is_444 = ctx->profile == FF_PROFILE_DNXHR_444;
     409          73 :     avctx->profile = ctx->profile;
     410          73 :     ctx->cid = ff_dnxhd_find_cid(avctx, ctx->bit_depth);
     411          73 :     if (!ctx->cid) {
     412           0 :         av_log(avctx, AV_LOG_ERROR,
     413             :                "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n");
     414           0 :         ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR);
     415           0 :         return AVERROR(EINVAL);
     416             :     }
     417          73 :     av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
     418             : 
     419          73 :     if (ctx->cid >= 1270 && ctx->cid <= 1274)
     420          40 :         avctx->codec_tag = MKTAG('A','V','d','h');
     421             : 
     422          73 :     if (avctx->width < 256 || avctx->height < 120) {
     423           0 :         av_log(avctx, AV_LOG_ERROR,
     424             :                "Input dimensions too small, input must be at least 256x120\n");
     425           0 :         return AVERROR(EINVAL);
     426             :     }
     427             : 
     428          73 :     index = ff_dnxhd_get_cid_table(ctx->cid);
     429          73 :     av_assert0(index >= 0);
     430             : 
     431          73 :     ctx->cid_table = &ff_dnxhd_cid_table[index];
     432             : 
     433          73 :     ctx->m.avctx    = avctx;
     434          73 :     ctx->m.mb_intra = 1;
     435          73 :     ctx->m.h263_aic = 1;
     436             : 
     437          73 :     avctx->bits_per_raw_sample = ctx->bit_depth;
     438             : 
     439          73 :     ff_blockdsp_init(&ctx->bdsp, avctx);
     440          73 :     ff_fdctdsp_init(&ctx->m.fdsp, avctx);
     441          73 :     ff_mpv_idct_init(&ctx->m);
     442          73 :     ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
     443          73 :     ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
     444          73 :     ff_dct_encode_init(&ctx->m);
     445             : 
     446          73 :     if (ctx->profile != FF_PROFILE_DNXHD)
     447          40 :         ff_videodsp_init(&ctx->m.vdsp, ctx->bit_depth);
     448             : 
     449          73 :     if (!ctx->m.dct_quantize)
     450           0 :         ctx->m.dct_quantize = ff_dct_quantize_c;
     451             : 
     452          73 :     if (ctx->is_444 || ctx->profile == FF_PROFILE_DNXHR_HQX) {
     453           0 :         ctx->m.dct_quantize     = dnxhd_10bit_dct_quantize_444;
     454           0 :         ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
     455           0 :         ctx->block_width_l2     = 4;
     456          73 :     } else if (ctx->bit_depth == 10) {
     457           7 :         ctx->m.dct_quantize     = dnxhd_10bit_dct_quantize;
     458           7 :         ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
     459           7 :         ctx->block_width_l2     = 4;
     460             :     } else {
     461          66 :         ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
     462          66 :         ctx->block_width_l2     = 3;
     463             :     }
     464             : 
     465             :     if (ARCH_X86)
     466          73 :         ff_dnxhdenc_init_x86(ctx);
     467             : 
     468          73 :     ctx->m.mb_height = (avctx->height + 15) / 16;
     469          73 :     ctx->m.mb_width  = (avctx->width  + 15) / 16;
     470             : 
     471          73 :     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
     472          11 :         ctx->interlaced   = 1;
     473          11 :         ctx->m.mb_height /= 2;
     474             :     }
     475             : 
     476          73 :     ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width;
     477             : 
     478          73 :     if (ctx->cid_table->frame_size == DNXHD_VARIABLE) {
     479          40 :         ctx->frame_size = ff_dnxhd_get_hr_frame_size(ctx->cid,
     480             :                                                      avctx->width, avctx->height);
     481          40 :         av_assert0(ctx->frame_size >= 0);
     482          40 :         ctx->coding_unit_size = ctx->frame_size;
     483             :     } else {
     484          33 :         ctx->frame_size = ctx->cid_table->frame_size;
     485          33 :         ctx->coding_unit_size = ctx->cid_table->coding_unit_size;
     486             :     }
     487             : 
     488          73 :     if (ctx->m.mb_height > 68)
     489          12 :         ctx->data_offset = 0x170 + (ctx->m.mb_height << 2);
     490             :     else
     491          61 :         ctx->data_offset = 0x280;
     492             : 
     493             :     // XXX tune lbias/cbias
     494          73 :     if ((ret = dnxhd_init_qmat(ctx, ctx->intra_quant_bias, 0)) < 0)
     495           0 :         return ret;
     496             : 
     497             :     /* Avid Nitris hardware decoder requires a minimum amount of padding
     498             :      * in the coding unit payload */
     499          73 :     if (ctx->nitris_compat)
     500           0 :         ctx->min_padding = 1600;
     501             : 
     502          73 :     if ((ret = dnxhd_init_vlc(ctx)) < 0)
     503           0 :         return ret;
     504          73 :     if ((ret = dnxhd_init_rc(ctx)) < 0)
     505           0 :         return ret;
     506             : 
     507          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size,
     508             :                       ctx->m.mb_height * sizeof(uint32_t), fail);
     509          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs,
     510             :                       ctx->m.mb_height * sizeof(uint32_t), fail);
     511          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits,
     512             :                       ctx->m.mb_num * sizeof(uint16_t), fail);
     513          73 :     FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale,
     514             :                       ctx->m.mb_num * sizeof(uint8_t), fail);
     515             : 
     516             : #if FF_API_CODED_FRAME
     517             : FF_DISABLE_DEPRECATION_WARNINGS
     518          73 :     avctx->coded_frame->key_frame = 1;
     519          73 :     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     520             : FF_ENABLE_DEPRECATION_WARNINGS
     521             : #endif
     522             : 
     523          73 :     if (avctx->active_thread_type == FF_THREAD_SLICE) {
     524           0 :         if (avctx->thread_count > MAX_THREADS) {
     525           0 :             av_log(avctx, AV_LOG_ERROR, "too many threads\n");
     526           0 :             return AVERROR(EINVAL);
     527             :         }
     528             :     }
     529             : 
     530          73 :     if (avctx->qmax <= 1) {
     531           0 :         av_log(avctx, AV_LOG_ERROR, "qmax must be at least 2\n");
     532           0 :         return AVERROR(EINVAL);
     533             :     }
     534             : 
     535          73 :     ctx->thread[0] = ctx;
     536          73 :     if (avctx->active_thread_type == FF_THREAD_SLICE) {
     537           0 :         for (i = 1; i < avctx->thread_count; i++) {
     538           0 :             ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext));
     539           0 :             memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext));
     540             :         }
     541             :     }
     542             : 
     543          73 :     return 0;
     544           0 : fail:  // for FF_ALLOCZ_OR_GOTO
     545           0 :     return AVERROR(ENOMEM);
     546             : }
     547             : 
     548         380 : static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
     549             : {
     550         380 :     DNXHDEncContext *ctx = avctx->priv_data;
     551             : 
     552         380 :     memset(buf, 0, ctx->data_offset);
     553             : 
     554             :     // * write prefix */
     555         380 :     AV_WB16(buf + 0x02, ctx->data_offset);
     556         380 :     if (ctx->cid >= 1270 && ctx->cid <= 1274)
     557         200 :         buf[4] = 0x03;
     558             :     else
     559         180 :         buf[4] = 0x01;
     560             : 
     561         380 :     buf[5] = ctx->interlaced ? ctx->cur_field + 2 : 0x01;
     562         380 :     buf[6] = 0x80; // crc flag off
     563         380 :     buf[7] = 0xa0; // reserved
     564         380 :     AV_WB16(buf + 0x18, avctx->height >> ctx->interlaced); // ALPF
     565         380 :     AV_WB16(buf + 0x1a, avctx->width);  // SPL
     566         380 :     AV_WB16(buf + 0x1d, avctx->height >> ctx->interlaced); // NAL
     567             : 
     568         380 :     buf[0x21] = ctx->bit_depth == 10 ? 0x58 : 0x38;
     569         380 :     buf[0x22] = 0x88 + (ctx->interlaced << 2);
     570         380 :     AV_WB32(buf + 0x28, ctx->cid); // CID
     571         380 :     buf[0x2c] = (!ctx->interlaced << 7) | (ctx->is_444 << 6) | (avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
     572             : 
     573         380 :     buf[0x5f] = 0x01; // UDL
     574             : 
     575         380 :     buf[0x167] = 0x02; // reserved
     576         380 :     AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS
     577         380 :     AV_WB16(buf + 0x16c, ctx->m.mb_height); // Ns
     578         380 :     buf[0x16f] = 0x10; // reserved
     579             : 
     580         380 :     ctx->msip = buf + 0x170;
     581         380 :     return 0;
     582             : }
     583             : 
     584    26918560 : static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff)
     585             : {
     586             :     int nbits;
     587    26918560 :     if (diff < 0) {
     588    12317800 :         nbits = av_log2_16bit(-2 * diff);
     589    12317800 :         diff--;
     590             :     } else {
     591    14600760 :         nbits = av_log2_16bit(2 * diff);
     592             :     }
     593    26918560 :     put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits,
     594    26918560 :              (ctx->cid_table->dc_codes[nbits] << nbits) +
     595    26918560 :              av_mod_uintp2(diff, nbits));
     596    26918560 : }
     597             : 
     598             : static av_always_inline
     599    26918560 : void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block,
     600             :                         int last_index, int n)
     601             : {
     602    26918560 :     int last_non_zero = 0;
     603             :     int slevel, i, j;
     604             : 
     605    26918560 :     dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]);
     606    26918560 :     ctx->m.last_dc[n] = block[0];
     607             : 
     608   323734349 :     for (i = 1; i <= last_index; i++) {
     609   296815789 :         j = ctx->m.intra_scantable.permutated[i];
     610   296815789 :         slevel = block[j];
     611   296815789 :         if (slevel) {
     612   111021078 :             int run_level = i - last_non_zero - 1;
     613   111021078 :             int rlevel = (slevel << 1) | !!run_level;
     614   111021078 :             put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]);
     615   111021078 :             if (run_level)
     616    36001894 :                 put_bits(&ctx->m.pb, ctx->run_bits[run_level],
     617    36001894 :                          ctx->run_codes[run_level]);
     618   111021078 :             last_non_zero = i;
     619             :         }
     620             :     }
     621    26918560 :     put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB
     622    26918560 : }
     623             : 
     624             : static av_always_inline
     625     3024000 : void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n,
     626             :                         int qscale, int last_index)
     627             : {
     628             :     const uint8_t *weight_matrix;
     629             :     int level;
     630             :     int i;
     631             : 
     632     3024000 :     if (ctx->is_444) {
     633           0 :         weight_matrix = ((n % 6) < 2) ? ctx->cid_table->luma_weight
     634           0 :                                       : ctx->cid_table->chroma_weight;
     635             :     } else {
     636     7560000 :         weight_matrix = (n & 2) ? ctx->cid_table->chroma_weight
     637     4536000 :                                 : ctx->cid_table->luma_weight;
     638             :     }
     639             : 
     640    35797315 :     for (i = 1; i <= last_index; i++) {
     641    32773315 :         int j = ctx->m.intra_scantable.permutated[i];
     642    32773315 :         level = block[j];
     643    32773315 :         if (level) {
     644    16774641 :             if (level < 0) {
     645     8458215 :                 level = (1 - 2 * level) * qscale * weight_matrix[i];
     646     8458215 :                 if (ctx->bit_depth == 10) {
     647           0 :                     if (weight_matrix[i] != 8)
     648           0 :                         level += 8;
     649           0 :                     level >>= 4;
     650             :                 } else {
     651     8458215 :                     if (weight_matrix[i] != 32)
     652     6479585 :                         level += 32;
     653     8458215 :                     level >>= 6;
     654             :                 }
     655     8458215 :                 level = -level;
     656             :             } else {
     657     8316426 :                 level = (2 * level + 1) * qscale * weight_matrix[i];
     658     8316426 :                 if (ctx->bit_depth == 10) {
     659           0 :                     if (weight_matrix[i] != 8)
     660           0 :                         level += 8;
     661           0 :                     level >>= 4;
     662             :                 } else {
     663     8316426 :                     if (weight_matrix[i] != 32)
     664     6813970 :                         level += 32;
     665     8316426 :                     level >>= 6;
     666             :                 }
     667             :             }
     668    16774641 :             block[j] = level;
     669             :         }
     670             :     }
     671     3024000 : }
     672             : 
     673     3024000 : static av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block)
     674             : {
     675     3024000 :     int score = 0;
     676             :     int i;
     677   196560000 :     for (i = 0; i < 64; i++)
     678   193536000 :         score += (block[i] - qblock[i]) * (block[i] - qblock[i]);
     679     3024000 :     return score;
     680             : }
     681             : 
     682             : static av_always_inline
     683    51881432 : int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index)
     684             : {
     685    51881432 :     int last_non_zero = 0;
     686    51881432 :     int bits = 0;
     687             :     int i, j, level;
     688   515019637 :     for (i = 1; i <= last_index; i++) {
     689   463138205 :         j = ctx->m.intra_scantable.permutated[i];
     690   463138205 :         level = block[j];
     691   463138205 :         if (level) {
     692   191817206 :             int run_level = i - last_non_zero - 1;
     693   575451618 :             bits += ctx->vlc_bits[(level << 1) |
     694   383634412 :                     !!run_level] + ctx->run_bits[run_level];
     695   191817206 :             last_non_zero = i;
     696             :         }
     697             :     }
     698    51881432 :     return bits;
     699             : }
     700             : 
     701             : static av_always_inline
     702     9849999 : void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
     703             : {
     704     9849999 :     const int bs = ctx->block_width_l2;
     705     9849999 :     const int bw = 1 << bs;
     706     9849999 :     int dct_y_offset = ctx->dct_y_offset;
     707     9849999 :     int dct_uv_offset = ctx->dct_uv_offset;
     708     9849999 :     int linesize = ctx->m.linesize;
     709     9849999 :     int uvlinesize = ctx->m.uvlinesize;
     710    19699998 :     const uint8_t *ptr_y = ctx->thread[0]->src[0] +
     711     9849999 :                            ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs + 1);
     712    19699998 :     const uint8_t *ptr_u = ctx->thread[0]->src[1] +
     713     9849999 :                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
     714    19699998 :     const uint8_t *ptr_v = ctx->thread[0]->src[2] +
     715     9849999 :                            ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs + ctx->is_444);
     716     9849999 :     PixblockDSPContext *pdsp = &ctx->m.pdsp;
     717     9849999 :     VideoDSPContext *vdsp = &ctx->m.vdsp;
     718             : 
     719    17053151 :     if (ctx->bit_depth != 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
     720     7225663 :                                                            (mb_y << 4) + 16 > ctx->m.avctx->height)) {
     721       22511 :         int y_w = ctx->m.avctx->width  - (mb_x << 4);
     722       22511 :         int y_h = ctx->m.avctx->height - (mb_y << 4);
     723       22511 :         int uv_w = (y_w + 1) / 2;
     724       22511 :         int uv_h = y_h;
     725       22511 :         linesize = 16;
     726       22511 :         uvlinesize = 8;
     727             : 
     728       22511 :         vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
     729             :                                linesize, ctx->m.linesize,
     730             :                                linesize, 16,
     731             :                                0, 0, y_w, y_h);
     732       22511 :         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
     733             :                                uvlinesize, ctx->m.uvlinesize,
     734             :                                uvlinesize, 16,
     735             :                                0, 0, uv_w, uv_h);
     736       22511 :         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
     737             :                                uvlinesize, ctx->m.uvlinesize,
     738             :                                uvlinesize, 16,
     739             :                                0, 0, uv_w, uv_h);
     740             : 
     741       22511 :         dct_y_offset =  bw * linesize;
     742       22511 :         dct_uv_offset = bw * uvlinesize;
     743       22511 :         ptr_y = &ctx->edge_buf_y[0];
     744       22511 :         ptr_u = &ctx->edge_buf_uv[0][0];
     745       22511 :         ptr_v = &ctx->edge_buf_uv[1][0];
     746     9827488 :     } else if (ctx->bit_depth == 10 && vdsp->emulated_edge_mc && ((mb_x << 4) + 16 > ctx->m.avctx->width ||
     747           0 :                                                                   (mb_y << 4) + 16 > ctx->m.avctx->height)) {
     748           0 :         int y_w = ctx->m.avctx->width  - (mb_x << 4);
     749           0 :         int y_h = ctx->m.avctx->height - (mb_y << 4);
     750           0 :         int uv_w = ctx->is_444 ? y_w : (y_w + 1) / 2;
     751           0 :         int uv_h = y_h;
     752           0 :         linesize = 32;
     753           0 :         uvlinesize = 16 + 16 * ctx->is_444;
     754             : 
     755           0 :         vdsp->emulated_edge_mc(&ctx->edge_buf_y[0], ptr_y,
     756             :                                linesize, ctx->m.linesize,
     757             :                                linesize / 2, 16,
     758             :                                0, 0, y_w, y_h);
     759           0 :         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[0][0], ptr_u,
     760             :                                uvlinesize, ctx->m.uvlinesize,
     761             :                                uvlinesize / 2, 16,
     762             :                                0, 0, uv_w, uv_h);
     763           0 :         vdsp->emulated_edge_mc(&ctx->edge_buf_uv[1][0], ptr_v,
     764             :                                uvlinesize, ctx->m.uvlinesize,
     765             :                                uvlinesize / 2, 16,
     766             :                                0, 0, uv_w, uv_h);
     767             : 
     768           0 :         dct_y_offset =  bw * linesize / 2;
     769           0 :         dct_uv_offset = bw * uvlinesize / 2;
     770           0 :         ptr_y = &ctx->edge_buf_y[0];
     771           0 :         ptr_u = &ctx->edge_buf_uv[0][0];
     772           0 :         ptr_v = &ctx->edge_buf_uv[1][0];
     773             :     }
     774             : 
     775     9849999 :     if (!ctx->is_444) {
     776     9849999 :         pdsp->get_pixels(ctx->blocks[0], ptr_y,      linesize);
     777     9849999 :         pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
     778     9849999 :         pdsp->get_pixels(ctx->blocks[2], ptr_u,      uvlinesize);
     779     9849999 :         pdsp->get_pixels(ctx->blocks[3], ptr_v,      uvlinesize);
     780             : 
     781     9849999 :         if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
     782      113120 :             if (ctx->interlaced) {
     783       35280 :                 ctx->get_pixels_8x4_sym(ctx->blocks[4],
     784             :                                         ptr_y + dct_y_offset,
     785             :                                         linesize);
     786       70560 :                 ctx->get_pixels_8x4_sym(ctx->blocks[5],
     787       35280 :                                         ptr_y + dct_y_offset + bw,
     788             :                                         linesize);
     789       35280 :                 ctx->get_pixels_8x4_sym(ctx->blocks[6],
     790             :                                         ptr_u + dct_uv_offset,
     791             :                                         uvlinesize);
     792       35280 :                 ctx->get_pixels_8x4_sym(ctx->blocks[7],
     793             :                                         ptr_v + dct_uv_offset,
     794             :                                         uvlinesize);
     795             :             } else {
     796       21280 :                 ctx->bdsp.clear_block(ctx->blocks[4]);
     797       21280 :                 ctx->bdsp.clear_block(ctx->blocks[5]);
     798       21280 :                 ctx->bdsp.clear_block(ctx->blocks[6]);
     799       21280 :                 ctx->bdsp.clear_block(ctx->blocks[7]);
     800             :             }
     801             :         } else {
     802     9793439 :             pdsp->get_pixels(ctx->blocks[4],
     803             :                              ptr_y + dct_y_offset, linesize);
     804    19586878 :             pdsp->get_pixels(ctx->blocks[5],
     805     9793439 :                              ptr_y + dct_y_offset + bw, linesize);
     806     9793439 :             pdsp->get_pixels(ctx->blocks[6],
     807             :                              ptr_u + dct_uv_offset, uvlinesize);
     808     9793439 :             pdsp->get_pixels(ctx->blocks[7],
     809             :                              ptr_v + dct_uv_offset, uvlinesize);
     810             :         }
     811             :     } else {
     812           0 :         pdsp->get_pixels(ctx->blocks[0], ptr_y,      linesize);
     813           0 :         pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, linesize);
     814           0 :         pdsp->get_pixels(ctx->blocks[6], ptr_y + dct_y_offset, linesize);
     815           0 :         pdsp->get_pixels(ctx->blocks[7], ptr_y + dct_y_offset + bw, linesize);
     816             : 
     817           0 :         pdsp->get_pixels(ctx->blocks[2], ptr_u,      uvlinesize);
     818           0 :         pdsp->get_pixels(ctx->blocks[3], ptr_u + bw, uvlinesize);
     819           0 :         pdsp->get_pixels(ctx->blocks[8], ptr_u + dct_uv_offset, uvlinesize);
     820           0 :         pdsp->get_pixels(ctx->blocks[9], ptr_u + dct_uv_offset + bw, uvlinesize);
     821             : 
     822           0 :         pdsp->get_pixels(ctx->blocks[4], ptr_v,      uvlinesize);
     823           0 :         pdsp->get_pixels(ctx->blocks[5], ptr_v + bw, uvlinesize);
     824           0 :         pdsp->get_pixels(ctx->blocks[10], ptr_v + dct_uv_offset, uvlinesize);
     825           0 :         pdsp->get_pixels(ctx->blocks[11], ptr_v + dct_uv_offset + bw, uvlinesize);
     826             :     }
     827     9849999 : }
     828             : 
     829             : static av_always_inline
     830    78799992 : int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
     831             : {
     832             :     int x;
     833             : 
     834    78799992 :     if (ctx->is_444) {
     835           0 :         x = (i >> 1) % 3;
     836             :     } else {
     837             :         const static uint8_t component[8]={0,0,1,2,0,0,1,2};
     838    78799992 :         x = component[i];
     839             :     }
     840    78799992 :     return x;
     841             : }
     842             : 
     843       41557 : static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg,
     844             :                                   int jobnr, int threadnr)
     845             : {
     846       41557 :     DNXHDEncContext *ctx = avctx->priv_data;
     847       41557 :     int mb_y = jobnr, mb_x;
     848       41557 :     int qscale = ctx->qscale;
     849       41557 :     LOCAL_ALIGNED_16(int16_t, block, [64]);
     850       41557 :     ctx = ctx->thread[threadnr];
     851             : 
     852       41557 :     ctx->m.last_dc[0] =
     853       41557 :     ctx->m.last_dc[1] =
     854       41557 :     ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
     855             : 
     856     6526736 :     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
     857     6485179 :         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
     858     6485179 :         int ssd     = 0;
     859     6485179 :         int ac_bits = 0;
     860     6485179 :         int dc_bits = 0;
     861             :         int i;
     862             : 
     863     6485179 :         dnxhd_get_blocks(ctx, mb_x, mb_y);
     864             : 
     865    58366611 :         for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
     866    51881432 :             int16_t *src_block = ctx->blocks[i];
     867             :             int overflow, nbits, diff, last_index;
     868    51881432 :             int n = dnxhd_switch_matrix(ctx, i);
     869             : 
     870    51881432 :             memcpy(block, src_block, 64 * sizeof(*block));
     871   155644296 :             last_index = ctx->m.dct_quantize(&ctx->m, block,
     872   103762864 :                                              ctx->is_444 ? 4 * (n > 0): 4 & (2*i),
     873             :                                              qscale, &overflow);
     874    51881432 :             ac_bits   += dnxhd_calc_ac_bits(ctx, block, last_index);
     875             : 
     876    51881432 :             diff = block[0] - ctx->m.last_dc[n];
     877    51881432 :             if (diff < 0)
     878    23766663 :                 nbits = av_log2_16bit(-2 * diff);
     879             :             else
     880    28114769 :                 nbits = av_log2_16bit(2 * diff);
     881             : 
     882             :             av_assert1(nbits < ctx->bit_depth + 4);
     883    51881432 :             dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
     884             : 
     885    51881432 :             ctx->m.last_dc[n] = block[0];
     886             : 
     887    51881432 :             if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) {
     888     3024000 :                 dnxhd_unquantize_c(ctx, block, i, qscale, last_index);
     889     3024000 :                 ctx->m.idsp.idct(block);
     890     3024000 :                 ssd += dnxhd_ssd_block(block, src_block);
     891             :             }
     892             :         }
     893     6485179 :         ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].ssd  = ssd;
     894    12970358 :         ctx->mb_rc[(qscale * ctx->m.mb_num) + mb].bits = ac_bits + dc_bits + 12 +
     895     6485179 :                                      (1 + ctx->is_444) * 8 * ctx->vlc_bits[0];
     896             :     }
     897       41557 :     return 0;
     898             : }
     899             : 
     900       20765 : static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg,
     901             :                                int jobnr, int threadnr)
     902             : {
     903       20765 :     DNXHDEncContext *ctx = avctx->priv_data;
     904       20765 :     int mb_y = jobnr, mb_x;
     905       20765 :     ctx = ctx->thread[threadnr];
     906       20765 :     init_put_bits(&ctx->m.pb, (uint8_t *)arg + ctx->data_offset + ctx->slice_offs[jobnr],
     907       20765 :                   ctx->slice_size[jobnr]);
     908             : 
     909       20765 :     ctx->m.last_dc[0] =
     910       20765 :     ctx->m.last_dc[1] =
     911       20765 :     ctx->m.last_dc[2] = 1 << (ctx->bit_depth + 2);
     912     3385585 :     for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
     913     3364820 :         unsigned mb = mb_y * ctx->m.mb_width + mb_x;
     914     3364820 :         int qscale = ctx->mb_qscale[mb];
     915             :         int i;
     916             : 
     917     3364820 :         put_bits(&ctx->m.pb, 11, qscale);
     918     3364820 :         put_bits(&ctx->m.pb, 1, avctx->pix_fmt == AV_PIX_FMT_YUV444P10);
     919             : 
     920     3364820 :         dnxhd_get_blocks(ctx, mb_x, mb_y);
     921             : 
     922    30283380 :         for (i = 0; i < 8 + 4 * ctx->is_444; i++) {
     923    26918560 :             int16_t *block = ctx->blocks[i];
     924    26918560 :             int overflow, n = dnxhd_switch_matrix(ctx, i);
     925    80755680 :             int last_index = ctx->m.dct_quantize(&ctx->m, block,
     926    53837120 :                                                  ctx->is_444 ? (((i >> 1) % 3) < 1 ? 0 : 4): 4 & (2*i),
     927             :                                                  qscale, &overflow);
     928             :             // START_TIMER;
     929    26918560 :             dnxhd_encode_block(ctx, block, last_index, n);
     930             :             // STOP_TIMER("encode_block");
     931             :         }
     932             :     }
     933       20765 :     if (put_bits_count(&ctx->m.pb) & 31)
     934       20137 :         put_bits(&ctx->m.pb, 32 - (put_bits_count(&ctx->m.pb) & 31), 0);
     935       20765 :     flush_put_bits(&ctx->m.pb);
     936       20765 :     return 0;
     937             : }
     938             : 
     939         380 : static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx)
     940             : {
     941             :     int mb_y, mb_x;
     942         380 :     int offset = 0;
     943       21145 :     for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) {
     944             :         int thread_size;
     945       20765 :         ctx->slice_offs[mb_y] = offset;
     946       20765 :         ctx->slice_size[mb_y] = 0;
     947     3385585 :         for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
     948     3364820 :             unsigned mb = mb_y * ctx->m.mb_width + mb_x;
     949     3364820 :             ctx->slice_size[mb_y] += ctx->mb_bits[mb];
     950             :         }
     951       20765 :         ctx->slice_size[mb_y]   = (ctx->slice_size[mb_y] + 31) & ~31;
     952       20765 :         ctx->slice_size[mb_y] >>= 3;
     953       20765 :         thread_size = ctx->slice_size[mb_y];
     954       20765 :         offset += thread_size;
     955             :     }
     956         380 : }
     957             : 
     958       11520 : static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg,
     959             :                                int jobnr, int threadnr)
     960             : {
     961       11520 :     DNXHDEncContext *ctx = avctx->priv_data;
     962       11520 :     int mb_y = jobnr, mb_x, x, y;
     963       11740 :     int partial_last_row = (mb_y == ctx->m.mb_height - 1) &&
     964         220 :                            ((avctx->height >> ctx->interlaced) & 0xF);
     965             : 
     966       11520 :     ctx = ctx->thread[threadnr];
     967       11520 :     if (ctx->bit_depth == 8) {
     968       10955 :         uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize);
     969     1930655 :         for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
     970     1919700 :             unsigned mb = mb_y * ctx->m.mb_width + mb_x;
     971             :             int sum;
     972             :             int varc;
     973             : 
     974     1919700 :             if (!partial_last_row && mb_x * 16 <= avctx->width - 16 && (avctx->width % 16) == 0) {
     975     1901720 :                 sum  = ctx->m.mpvencdsp.pix_sum(pix, ctx->m.linesize);
     976     1901720 :                 varc = ctx->m.mpvencdsp.pix_norm1(pix, ctx->m.linesize);
     977             :             } else {
     978       17980 :                 int bw = FFMIN(avctx->width - 16 * mb_x, 16);
     979       17980 :                 int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
     980       17980 :                 sum = varc = 0;
     981      244175 :                 for (y = 0; y < bh; y++) {
     982     3800030 :                     for (x = 0; x < bw; x++) {
     983     3573835 :                         uint8_t val = pix[x + y * ctx->m.linesize];
     984     3573835 :                         sum  += val;
     985     3573835 :                         varc += val * val;
     986             :                     }
     987             :                 }
     988             :             }
     989     1919700 :             varc = (varc - (((unsigned) sum * sum) >> 8) + 128) >> 8;
     990             : 
     991     1919700 :             ctx->mb_cmp[mb].value = varc;
     992     1919700 :             ctx->mb_cmp[mb].mb    = mb;
     993             :         }
     994             :     } else { // 10-bit
     995         565 :         const int linesize = ctx->m.linesize >> 1;
     996       59365 :         for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
     997      117600 :             uint16_t *pix = (uint16_t *)ctx->thread[0]->src[0] +
     998       58800 :                             ((mb_y << 4) * linesize) + (mb_x << 4);
     999       58800 :             unsigned mb  = mb_y * ctx->m.mb_width + mb_x;
    1000       58800 :             int sum = 0;
    1001       58800 :             int sqsum = 0;
    1002       58800 :             int bw = FFMIN(avctx->width - 16 * mb_x, 16);
    1003       58800 :             int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16);
    1004             :             int mean, sqmean;
    1005             :             int i, j;
    1006             :             // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
    1007      994800 :             for (i = 0; i < bh; ++i) {
    1008    15912000 :                 for (j = 0; j < bw; ++j) {
    1009             :                     // Turn 16-bit pixels into 10-bit ones.
    1010    14976000 :                     const int sample = (unsigned) pix[j] >> 6;
    1011    14976000 :                     sum   += sample;
    1012    14976000 :                     sqsum += sample * sample;
    1013             :                     // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
    1014             :                 }
    1015      936000 :                 pix += linesize;
    1016             :             }
    1017       58800 :             mean = sum >> 8; // 16*16 == 2^8
    1018       58800 :             sqmean = sqsum >> 8;
    1019       58800 :             ctx->mb_cmp[mb].value = sqmean - mean * mean;
    1020       58800 :             ctx->mb_cmp[mb].mb    = mb;
    1021             :         }
    1022             :     }
    1023       11520 :     return 0;
    1024             : }
    1025             : 
    1026          15 : static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx)
    1027             : {
    1028             :     int lambda, up_step, down_step;
    1029          15 :     int last_lower = INT_MAX, last_higher = 0;
    1030             :     int x, y, q;
    1031             : 
    1032         120 :     for (q = 1; q < avctx->qmax; q++) {
    1033         105 :         ctx->qscale = q;
    1034         105 :         avctx->execute2(avctx, dnxhd_calc_bits_thread,
    1035             :                         NULL, NULL, ctx->m.mb_height);
    1036             :     }
    1037          15 :     up_step = down_step = 2 << LAMBDA_FRAC_BITS;
    1038          15 :     lambda  = ctx->lambda;
    1039             : 
    1040          70 :     for (;;) {
    1041          85 :         int bits = 0;
    1042          85 :         int end  = 0;
    1043          85 :         if (lambda == last_higher) {
    1044           5 :             lambda++;
    1045           5 :             end = 1; // need to set final qscales/bits
    1046             :         }
    1047        3862 :         for (y = 0; y < ctx->m.mb_height; y++) {
    1048      308529 :             for (x = 0; x < ctx->m.mb_width; x++) {
    1049      304720 :                 unsigned min = UINT_MAX;
    1050      304720 :                 int qscale = 1;
    1051      304720 :                 int mb     = y * ctx->m.mb_width + x;
    1052      304720 :                 int rc = 0;
    1053     2437760 :                 for (q = 1; q < avctx->qmax; q++) {
    1054     2133040 :                     int i = (q*ctx->m.mb_num) + mb;
    1055     4266080 :                     unsigned score = ctx->mb_rc[i].bits * lambda +
    1056     2133040 :                                      ((unsigned) ctx->mb_rc[i].ssd << LAMBDA_FRAC_BITS);
    1057     2133040 :                     if (score < min) {
    1058      493989 :                         min    = score;
    1059      493989 :                         qscale = q;
    1060      493989 :                         rc = i;
    1061             :                     }
    1062             :                 }
    1063      304720 :                 bits += ctx->mb_rc[rc].bits;
    1064      304720 :                 ctx->mb_qscale[mb] = qscale;
    1065      304720 :                 ctx->mb_bits[mb]   = ctx->mb_rc[rc].bits;
    1066             :             }
    1067        3809 :             bits = (bits + 31) & ~31; // padding
    1068        3809 :             if (bits > ctx->frame_bits)
    1069          32 :                 break;
    1070             :         }
    1071          85 :         if (end) {
    1072           5 :             if (bits > ctx->frame_bits)
    1073           0 :                 return AVERROR(EINVAL);
    1074           5 :             break;
    1075             :         }
    1076          80 :         if (bits < ctx->frame_bits) {
    1077          48 :             last_lower = FFMIN(lambda, last_lower);
    1078          48 :             if (last_higher != 0)
    1079          25 :                 lambda = (lambda+last_higher)>>1;
    1080             :             else
    1081          23 :                 lambda -= down_step;
    1082          48 :             down_step = FFMIN((int64_t)down_step*5, INT_MAX);
    1083          48 :             up_step = 1<<LAMBDA_FRAC_BITS;
    1084          48 :             lambda = FFMAX(1, lambda);
    1085          48 :             if (lambda == last_lower)
    1086          10 :                 break;
    1087             :         } else {
    1088          32 :             last_higher = FFMAX(lambda, last_higher);
    1089          32 :             if (last_lower != INT_MAX)
    1090          32 :                 lambda = (lambda+last_lower)>>1;
    1091           0 :             else if ((int64_t)lambda + up_step > INT_MAX)
    1092           0 :                 return AVERROR(EINVAL);
    1093             :             else
    1094           0 :                 lambda += up_step;
    1095          32 :             up_step = FFMIN((int64_t)up_step*5, INT_MAX);
    1096          32 :             down_step = 1<<LAMBDA_FRAC_BITS;
    1097             :         }
    1098             :     }
    1099          15 :     ctx->lambda = lambda;
    1100          15 :     return 0;
    1101             : }
    1102             : 
    1103         365 : static int dnxhd_find_qscale(DNXHDEncContext *ctx)
    1104             : {
    1105         365 :     int bits = 0;
    1106         365 :     int up_step = 1;
    1107         365 :     int down_step = 1;
    1108         365 :     int last_higher = 0;
    1109         365 :     int last_lower = INT_MAX;
    1110             :     int qscale;
    1111             :     int x, y;
    1112             : 
    1113         365 :     qscale = ctx->qscale;
    1114             :     for (;;) {
    1115        1017 :         bits = 0;
    1116         691 :         ctx->qscale = qscale;
    1117             :         // XXX avoid recalculating bits
    1118         691 :         ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread,
    1119             :                                NULL, NULL, ctx->m.mb_height);
    1120       35231 :         for (y = 0; y < ctx->m.mb_height; y++) {
    1121     5794959 :             for (x = 0; x < ctx->m.mb_width; x++)
    1122     5760121 :                 bits += ctx->mb_rc[(qscale*ctx->m.mb_num) + (y*ctx->m.mb_width+x)].bits;
    1123       34838 :             bits = (bits+31)&~31; // padding
    1124       34838 :             if (bits > ctx->frame_bits)
    1125         298 :                 break;
    1126             :         }
    1127         691 :         if (bits < ctx->frame_bits) {
    1128         393 :             if (qscale == 1)
    1129         145 :                 return 1;
    1130         248 :             if (last_higher == qscale - 1) {
    1131         195 :                 qscale = last_higher;
    1132         195 :                 break;
    1133             :             }
    1134          53 :             last_lower = FFMIN(qscale, last_lower);
    1135          53 :             if (last_higher != 0)
    1136          38 :                 qscale = (qscale + last_higher) >> 1;
    1137             :             else
    1138          15 :                 qscale -= down_step++;
    1139          53 :             if (qscale < 1)
    1140           0 :                 qscale = 1;
    1141          53 :             up_step = 1;
    1142             :         } else {
    1143         298 :             if (last_lower == qscale + 1)
    1144          25 :                 break;
    1145         273 :             last_higher = FFMAX(qscale, last_higher);
    1146         273 :             if (last_lower != INT_MAX)
    1147           5 :                 qscale = (qscale + last_lower) >> 1;
    1148             :             else
    1149         268 :                 qscale += up_step++;
    1150         273 :             down_step = 1;
    1151         273 :             if (qscale >= ctx->m.avctx->qmax)
    1152           0 :                 return AVERROR(EINVAL);
    1153             :         }
    1154             :     }
    1155         220 :     ctx->qscale = qscale;
    1156         220 :     return 0;
    1157             : }
    1158             : 
    1159             : #define BUCKET_BITS 8
    1160             : #define RADIX_PASSES 4
    1161             : #define NBUCKETS (1 << BUCKET_BITS)
    1162             : 
    1163    11871000 : static inline int get_bucket(int value, int shift)
    1164             : {
    1165    11871000 :     value >>= shift;
    1166    11871000 :     value  &= NBUCKETS - 1;
    1167    11871000 :     return NBUCKETS - 1 - value;
    1168             : }
    1169             : 
    1170         220 : static void radix_count(const RCCMPEntry *data, int size,
    1171             :                         int buckets[RADIX_PASSES][NBUCKETS])
    1172             : {
    1173             :     int i, j;
    1174         220 :     memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS);
    1175     1978720 :     for (i = 0; i < size; i++) {
    1176     1978500 :         int v = data[i].value;
    1177     9892500 :         for (j = 0; j < RADIX_PASSES; j++) {
    1178     7914000 :             buckets[j][get_bucket(v, 0)]++;
    1179     7914000 :             v >>= BUCKET_BITS;
    1180             :         }
    1181             :         av_assert1(!v);
    1182             :     }
    1183        1100 :     for (j = 0; j < RADIX_PASSES; j++) {
    1184         880 :         int offset = size;
    1185      226160 :         for (i = NBUCKETS - 1; i >= 0; i--)
    1186      225280 :             buckets[j][i] = offset -= buckets[j][i];
    1187             :         av_assert1(!buckets[j][0]);
    1188             :     }
    1189         220 : }
    1190             : 
    1191         440 : static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data,
    1192             :                             int size, int buckets[NBUCKETS], int pass)
    1193             : {
    1194         440 :     int shift = pass * BUCKET_BITS;
    1195             :     int i;
    1196     3957440 :     for (i = 0; i < size; i++) {
    1197     3957000 :         int v   = get_bucket(data[i].value, shift);
    1198     3957000 :         int pos = buckets[v]++;
    1199     3957000 :         dst[pos] = data[i];
    1200             :     }
    1201         440 : }
    1202             : 
    1203         220 : static void radix_sort(RCCMPEntry *data, RCCMPEntry *tmp, int size)
    1204             : {
    1205             :     int buckets[RADIX_PASSES][NBUCKETS];
    1206         220 :     radix_count(data, size, buckets);
    1207         220 :     radix_sort_pass(tmp, data, size, buckets[0], 0);
    1208         220 :     radix_sort_pass(data, tmp, size, buckets[1], 1);
    1209         220 :     if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) {
    1210           0 :         radix_sort_pass(tmp, data, size, buckets[2], 2);
    1211           0 :         radix_sort_pass(data, tmp, size, buckets[3], 3);
    1212             :     }
    1213         220 : }
    1214             : 
    1215         365 : static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx)
    1216             : {
    1217         365 :     int max_bits = 0;
    1218             :     int ret, x, y;
    1219         365 :     if ((ret = dnxhd_find_qscale(ctx)) < 0)
    1220           0 :         return ret;
    1221       20455 :     for (y = 0; y < ctx->m.mb_height; y++) {
    1222     3330910 :         for (x = 0; x < ctx->m.mb_width; x++) {
    1223     3310820 :             int mb = y * ctx->m.mb_width + x;
    1224     3310820 :             int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
    1225             :             int delta_bits;
    1226     3310820 :             ctx->mb_qscale[mb] = ctx->qscale;
    1227     3310820 :             ctx->mb_bits[mb] = ctx->mb_rc[rc].bits;
    1228     3310820 :             max_bits += ctx->mb_rc[rc].bits;
    1229             :             if (!RC_VARIANCE) {
    1230             :                 delta_bits = ctx->mb_rc[rc].bits -
    1231             :                              ctx->mb_rc[rc + ctx->m.mb_num].bits;
    1232             :                 ctx->mb_cmp[mb].mb = mb;
    1233             :                 ctx->mb_cmp[mb].value =
    1234             :                     delta_bits ? ((ctx->mb_rc[rc].ssd -
    1235             :                                    ctx->mb_rc[rc + ctx->m.mb_num].ssd) * 100) /
    1236             :                                   delta_bits
    1237             :                                : INT_MIN; // avoid increasing qscale
    1238             :             }
    1239             :         }
    1240       20090 :         max_bits += 31; // worst padding
    1241             :     }
    1242         365 :     if (!ret) {
    1243             :         if (RC_VARIANCE)
    1244         220 :             avctx->execute2(avctx, dnxhd_mb_var_thread,
    1245             :                             NULL, NULL, ctx->m.mb_height);
    1246         220 :         radix_sort(ctx->mb_cmp, ctx->mb_cmp_tmp, ctx->m.mb_num);
    1247      647025 :         for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) {
    1248      646805 :             int mb = ctx->mb_cmp[x].mb;
    1249      646805 :             int rc = (ctx->qscale * ctx->m.mb_num ) + mb;
    1250     1293610 :             max_bits -= ctx->mb_rc[rc].bits -
    1251      646805 :                         ctx->mb_rc[rc + ctx->m.mb_num].bits;
    1252      646805 :             ctx->mb_qscale[mb] = ctx->qscale + 1;
    1253      646805 :             ctx->mb_bits[mb]   = ctx->mb_rc[rc + ctx->m.mb_num].bits;
    1254             :         }
    1255             :     }
    1256         365 :     return 0;
    1257             : }
    1258             : 
    1259         325 : static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame)
    1260             : {
    1261             :     int i;
    1262             : 
    1263         650 :     for (i = 0; i < ctx->m.avctx->thread_count; i++) {
    1264         325 :         ctx->thread[i]->m.linesize    = frame->linesize[0] << ctx->interlaced;
    1265         325 :         ctx->thread[i]->m.uvlinesize  = frame->linesize[1] << ctx->interlaced;
    1266         325 :         ctx->thread[i]->dct_y_offset  = ctx->m.linesize  *8;
    1267         325 :         ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8;
    1268             :     }
    1269             : 
    1270             : #if FF_API_CODED_FRAME
    1271             : FF_DISABLE_DEPRECATION_WARNINGS
    1272         325 :     ctx->m.avctx->coded_frame->interlaced_frame = frame->interlaced_frame;
    1273             : FF_ENABLE_DEPRECATION_WARNINGS
    1274             : #endif
    1275         325 :     ctx->cur_field = frame->interlaced_frame && !frame->top_field_first;
    1276         325 : }
    1277             : 
    1278         325 : static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
    1279             :                                 const AVFrame *frame, int *got_packet)
    1280             : {
    1281         325 :     DNXHDEncContext *ctx = avctx->priv_data;
    1282         325 :     int first_field = 1;
    1283             :     int offset, i, ret;
    1284             :     uint8_t *buf;
    1285             : 
    1286         325 :     if ((ret = ff_alloc_packet2(avctx, pkt, ctx->frame_size, 0)) < 0)
    1287           0 :         return ret;
    1288         325 :     buf = pkt->data;
    1289             : 
    1290         325 :     dnxhd_load_picture(ctx, frame);
    1291             : 
    1292         380 : encode_coding_unit:
    1293        1520 :     for (i = 0; i < 3; i++) {
    1294        1140 :         ctx->src[i] = frame->data[i];
    1295        1140 :         if (ctx->interlaced && ctx->cur_field)
    1296         165 :             ctx->src[i] += frame->linesize[i];
    1297             :     }
    1298             : 
    1299         380 :     dnxhd_write_header(avctx, buf);
    1300             : 
    1301         380 :     if (avctx->mb_decision == FF_MB_DECISION_RD)
    1302          15 :         ret = dnxhd_encode_rdo(avctx, ctx);
    1303             :     else
    1304         365 :         ret = dnxhd_encode_fast(avctx, ctx);
    1305         380 :     if (ret < 0) {
    1306           0 :         av_log(avctx, AV_LOG_ERROR,
    1307             :                "picture could not fit ratecontrol constraints, increase qmax\n");
    1308           0 :         return ret;
    1309             :     }
    1310             : 
    1311         380 :     dnxhd_setup_threads_slices(ctx);
    1312             : 
    1313         380 :     offset = 0;
    1314       21145 :     for (i = 0; i < ctx->m.mb_height; i++) {
    1315       20765 :         AV_WB32(ctx->msip + i * 4, offset);
    1316       20765 :         offset += ctx->slice_size[i];
    1317             :         av_assert1(!(ctx->slice_size[i] & 3));
    1318             :     }
    1319             : 
    1320         380 :     avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
    1321             : 
    1322             :     av_assert1(ctx->data_offset + offset + 4 <= ctx->coding_unit_size);
    1323         380 :     memset(buf + ctx->data_offset + offset, 0,
    1324         380 :            ctx->coding_unit_size - 4 - offset - ctx->data_offset);
    1325             : 
    1326         380 :     AV_WB32(buf + ctx->coding_unit_size - 4, 0x600DC0DE); // EOF
    1327             : 
    1328         380 :     if (ctx->interlaced && first_field) {
    1329          55 :         first_field     = 0;
    1330          55 :         ctx->cur_field ^= 1;
    1331          55 :         buf            += ctx->coding_unit_size;
    1332          55 :         goto encode_coding_unit;
    1333             :     }
    1334             : 
    1335             : #if FF_API_CODED_FRAME
    1336             : FF_DISABLE_DEPRECATION_WARNINGS
    1337         325 :     avctx->coded_frame->quality = ctx->qscale * FF_QP2LAMBDA;
    1338             : FF_ENABLE_DEPRECATION_WARNINGS
    1339             : #endif
    1340             : 
    1341         325 :     ff_side_data_set_encoder_stats(pkt, ctx->qscale * FF_QP2LAMBDA, NULL, 0, AV_PICTURE_TYPE_I);
    1342             : 
    1343         325 :     pkt->flags |= AV_PKT_FLAG_KEY;
    1344         325 :     *got_packet = 1;
    1345         325 :     return 0;
    1346             : }
    1347             : 
    1348          73 : static av_cold int dnxhd_encode_end(AVCodecContext *avctx)
    1349             : {
    1350          73 :     DNXHDEncContext *ctx = avctx->priv_data;
    1351          73 :     int max_level        = 1 << (ctx->bit_depth + 2);
    1352             :     int i;
    1353             : 
    1354          73 :     av_free(ctx->vlc_codes - max_level * 2);
    1355          73 :     av_free(ctx->vlc_bits - max_level * 2);
    1356          73 :     av_freep(&ctx->run_codes);
    1357          73 :     av_freep(&ctx->run_bits);
    1358             : 
    1359          73 :     av_freep(&ctx->mb_bits);
    1360          73 :     av_freep(&ctx->mb_qscale);
    1361          73 :     av_freep(&ctx->mb_rc);
    1362          73 :     av_freep(&ctx->mb_cmp);
    1363          73 :     av_freep(&ctx->mb_cmp_tmp);
    1364          73 :     av_freep(&ctx->slice_size);
    1365          73 :     av_freep(&ctx->slice_offs);
    1366             : 
    1367          73 :     av_freep(&ctx->qmatrix_c);
    1368          73 :     av_freep(&ctx->qmatrix_l);
    1369          73 :     av_freep(&ctx->qmatrix_c16);
    1370          73 :     av_freep(&ctx->qmatrix_l16);
    1371             : 
    1372          82 :     for (i = 1; i < avctx->thread_count; i++)
    1373           9 :         av_freep(&ctx->thread[i]);
    1374             : 
    1375          73 :     return 0;
    1376             : }
    1377             : 
    1378             : static const AVCodecDefault dnxhd_defaults[] = {
    1379             :     { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */
    1380             :     { NULL },
    1381             : };
    1382             : 
    1383             : AVCodec ff_dnxhd_encoder = {
    1384             :     .name           = "dnxhd",
    1385             :     .long_name      = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
    1386             :     .type           = AVMEDIA_TYPE_VIDEO,
    1387             :     .id             = AV_CODEC_ID_DNXHD,
    1388             :     .priv_data_size = sizeof(DNXHDEncContext),
    1389             :     .init           = dnxhd_encode_init,
    1390             :     .encode2        = dnxhd_encode_picture,
    1391             :     .close          = dnxhd_encode_end,
    1392             :     .capabilities   = AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_INTRA_ONLY,
    1393             :     .pix_fmts       = (const enum AVPixelFormat[]) {
    1394             :         AV_PIX_FMT_YUV422P,
    1395             :         AV_PIX_FMT_YUV422P10,
    1396             :         AV_PIX_FMT_YUV444P10,
    1397             :         AV_PIX_FMT_GBRP10,
    1398             :         AV_PIX_FMT_NONE
    1399             :     },
    1400             :     .priv_class     = &dnxhd_class,
    1401             :     .defaults       = dnxhd_defaults,
    1402             :     .profiles       = NULL_IF_CONFIG_SMALL(ff_dnxhd_profiles),
    1403             : };

Generated by: LCOV version 1.13