| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Opus encoder | ||
| 3 | * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <float.h> | ||
| 23 | |||
| 24 | #include "encode.h" | ||
| 25 | #include "enc.h" | ||
| 26 | #include "pvq.h" | ||
| 27 | #include "enc_psy.h" | ||
| 28 | #include "tab.h" | ||
| 29 | |||
| 30 | #include "libavutil/channel_layout.h" | ||
| 31 | #include "libavutil/float_dsp.h" | ||
| 32 | #include "libavutil/mem.h" | ||
| 33 | #include "libavutil/mem_internal.h" | ||
| 34 | #include "libavutil/opt.h" | ||
| 35 | #include "bytestream.h" | ||
| 36 | #include "audio_frame_queue.h" | ||
| 37 | #include "codec_internal.h" | ||
| 38 | |||
| 39 | typedef struct OpusEncContext { | ||
| 40 | AVClass *av_class; | ||
| 41 | OpusEncOptions options; | ||
| 42 | OpusPsyContext psyctx; | ||
| 43 | AVCodecContext *avctx; | ||
| 44 | AudioFrameQueue afq; | ||
| 45 | AVFloatDSPContext *dsp; | ||
| 46 | AVTXContext *tx[CELT_BLOCK_NB]; | ||
| 47 | av_tx_fn tx_fn[CELT_BLOCK_NB]; | ||
| 48 | CeltPVQ *pvq; | ||
| 49 | struct FFBufQueue bufqueue; | ||
| 50 | |||
| 51 | uint8_t enc_id[64]; | ||
| 52 | int enc_id_bits; | ||
| 53 | |||
| 54 | OpusPacketInfo packet; | ||
| 55 | |||
| 56 | int channels; | ||
| 57 | |||
| 58 | CeltFrame *frame; | ||
| 59 | OpusRangeCoder *rc; | ||
| 60 | |||
| 61 | /* Actual energy the decoder will have */ | ||
| 62 | float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]; | ||
| 63 | |||
| 64 | DECLARE_ALIGNED(32, float, scratch)[2048]; | ||
| 65 | } OpusEncContext; | ||
| 66 | |||
| 67 | ✗ | static void opus_write_extradata(AVCodecContext *avctx) | |
| 68 | { | ||
| 69 | ✗ | uint8_t *bs = avctx->extradata; | |
| 70 | |||
| 71 | ✗ | bytestream_put_buffer(&bs, "OpusHead", 8); | |
| 72 | ✗ | bytestream_put_byte (&bs, 0x1); | |
| 73 | ✗ | bytestream_put_byte (&bs, avctx->ch_layout.nb_channels); | |
| 74 | ✗ | bytestream_put_le16 (&bs, avctx->initial_padding); | |
| 75 | ✗ | bytestream_put_le32 (&bs, avctx->sample_rate); | |
| 76 | ✗ | bytestream_put_le16 (&bs, 0x0); | |
| 77 | ✗ | bytestream_put_byte (&bs, 0x0); /* Default layout */ | |
| 78 | ✗ | } | |
| 79 | |||
| 80 | ✗ | static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed) | |
| 81 | { | ||
| 82 | ✗ | int tmp = 0x0, extended_toc = 0; | |
| 83 | static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = { | ||
| 84 | /* Silk Hybrid Celt Layer */ | ||
| 85 | /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */ | ||
| 86 | { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */ | ||
| 87 | { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */ | ||
| 88 | { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */ | ||
| 89 | { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */ | ||
| 90 | { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */ | ||
| 91 | { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */ | ||
| 92 | }; | ||
| 93 | ✗ | int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth]; | |
| 94 | ✗ | *fsize_needed = 0; | |
| 95 | ✗ | if (!cfg) | |
| 96 | ✗ | return 1; | |
| 97 | ✗ | if (s->packet.frames == 2) { /* 2 packets */ | |
| 98 | ✗ | if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */ | |
| 99 | ✗ | tmp = 0x1; | |
| 100 | } else { /* different size */ | ||
| 101 | ✗ | tmp = 0x2; | |
| 102 | ✗ | *fsize_needed = 1; /* put frame sizes in the packet */ | |
| 103 | } | ||
| 104 | ✗ | } else if (s->packet.frames > 2) { | |
| 105 | ✗ | tmp = 0x3; | |
| 106 | ✗ | extended_toc = 1; | |
| 107 | } | ||
| 108 | ✗ | tmp |= (s->channels > 1) << 2; /* Stereo or mono */ | |
| 109 | ✗ | tmp |= (cfg - 1) << 3; /* codec configuration */ | |
| 110 | ✗ | *toc++ = tmp; | |
| 111 | ✗ | if (extended_toc) { | |
| 112 | ✗ | for (int i = 0; i < (s->packet.frames - 1); i++) | |
| 113 | ✗ | *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits); | |
| 114 | ✗ | tmp = (*fsize_needed) << 7; /* vbr flag */ | |
| 115 | ✗ | tmp |= (0) << 6; /* padding flag */ | |
| 116 | ✗ | tmp |= s->packet.frames; | |
| 117 | ✗ | *toc++ = tmp; | |
| 118 | } | ||
| 119 | ✗ | *size = 1 + extended_toc; | |
| 120 | ✗ | return 0; | |
| 121 | } | ||
| 122 | |||
| 123 | ✗ | static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f) | |
| 124 | { | ||
| 125 | ✗ | AVFrame *cur = NULL; | |
| 126 | ✗ | const int subframesize = s->avctx->frame_size; | |
| 127 | ✗ | int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize; | |
| 128 | |||
| 129 | ✗ | cur = ff_bufqueue_get(&s->bufqueue); | |
| 130 | |||
| 131 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 132 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 133 | ✗ | const void *input = cur->extended_data[ch]; | |
| 134 | ✗ | size_t bps = av_get_bytes_per_sample(cur->format); | |
| 135 | ✗ | memcpy(b->overlap, input, bps*cur->nb_samples); | |
| 136 | } | ||
| 137 | |||
| 138 | ✗ | av_frame_free(&cur); | |
| 139 | |||
| 140 | ✗ | for (int sf = 0; sf < subframes; sf++) { | |
| 141 | ✗ | if (sf != (subframes - 1)) | |
| 142 | ✗ | cur = ff_bufqueue_get(&s->bufqueue); | |
| 143 | else | ||
| 144 | ✗ | cur = ff_bufqueue_peek(&s->bufqueue, 0); | |
| 145 | |||
| 146 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 147 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 148 | ✗ | const void *input = cur->extended_data[ch]; | |
| 149 | ✗ | const size_t bps = av_get_bytes_per_sample(cur->format); | |
| 150 | ✗ | const size_t left = (subframesize - cur->nb_samples)*bps; | |
| 151 | ✗ | const size_t len = FFMIN(subframesize, cur->nb_samples)*bps; | |
| 152 | ✗ | memcpy(&b->samples[sf*subframesize], input, len); | |
| 153 | ✗ | memset(&b->samples[cur->nb_samples], 0, left); | |
| 154 | } | ||
| 155 | |||
| 156 | /* Last frame isn't popped off and freed yet - we need it for overlap */ | ||
| 157 | ✗ | if (sf != (subframes - 1)) | |
| 158 | ✗ | av_frame_free(&cur); | |
| 159 | } | ||
| 160 | ✗ | } | |
| 161 | |||
| 162 | /* Apply the pre emphasis filter */ | ||
| 163 | ✗ | static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f) | |
| 164 | { | ||
| 165 | ✗ | const int subframesize = s->avctx->frame_size; | |
| 166 | ✗ | const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize; | |
| 167 | ✗ | const float c = ff_opus_deemph_weights[0]; | |
| 168 | |||
| 169 | /* Filter overlap */ | ||
| 170 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 171 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 172 | ✗ | float m = b->emph_coeff; | |
| 173 | ✗ | for (int i = 0; i < CELT_OVERLAP; i++) { | |
| 174 | ✗ | float sample = b->overlap[i]; | |
| 175 | ✗ | b->overlap[i] = sample - m; | |
| 176 | ✗ | m = sample * c; | |
| 177 | } | ||
| 178 | ✗ | b->emph_coeff = m; | |
| 179 | } | ||
| 180 | |||
| 181 | /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */ | ||
| 182 | ✗ | for (int sf = 0; sf < subframes; sf++) { | |
| 183 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 184 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 185 | ✗ | float m = b->emph_coeff; | |
| 186 | ✗ | for (int i = 0; i < subframesize; i++) { | |
| 187 | ✗ | float sample = b->samples[sf*subframesize + i]; | |
| 188 | ✗ | b->samples[sf*subframesize + i] = sample - m; | |
| 189 | ✗ | m = sample * c; | |
| 190 | } | ||
| 191 | ✗ | if (sf != (subframes - 1)) | |
| 192 | ✗ | b->emph_coeff = m; | |
| 193 | } | ||
| 194 | } | ||
| 195 | ✗ | } | |
| 196 | |||
| 197 | /* Create the window and do the mdct */ | ||
| 198 | ✗ | static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f) | |
| 199 | { | ||
| 200 | ✗ | float *win = s->scratch, *temp = s->scratch + 1920; | |
| 201 | |||
| 202 | ✗ | if (f->transient) { | |
| 203 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 204 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 205 | ✗ | float *src1 = b->overlap; | |
| 206 | ✗ | for (int t = 0; t < f->blocks; t++) { | |
| 207 | ✗ | float *src2 = &b->samples[CELT_OVERLAP*t]; | |
| 208 | ✗ | s->dsp->vector_fmul(win, src1, ff_celt_window, 128); | |
| 209 | ✗ | s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2, | |
| 210 | ff_celt_window_padded, 128); | ||
| 211 | ✗ | src1 = src2; | |
| 212 | ✗ | s->tx_fn[0](s->tx[0], b->coeffs + t, win, sizeof(float)*f->blocks); | |
| 213 | } | ||
| 214 | } | ||
| 215 | } else { | ||
| 216 | ✗ | int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1); | |
| 217 | ✗ | int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1; | |
| 218 | ✗ | memset(win, 0, wlen*sizeof(float)); | |
| 219 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 220 | ✗ | CeltBlock *b = &f->block[ch]; | |
| 221 | |||
| 222 | /* Overlap */ | ||
| 223 | ✗ | s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128); | |
| 224 | ✗ | memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float)); | |
| 225 | |||
| 226 | /* Samples, flat top window */ | ||
| 227 | ✗ | memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float)); | |
| 228 | |||
| 229 | /* Samples, windowed */ | ||
| 230 | ✗ | s->dsp->vector_fmul_reverse(temp, b->samples + rwin, | |
| 231 | ff_celt_window_padded, 128); | ||
| 232 | ✗ | memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float)); | |
| 233 | |||
| 234 | ✗ | s->tx_fn[f->size](s->tx[f->size], b->coeffs, win, sizeof(float)); | |
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 239 | ✗ | CeltBlock *block = &f->block[ch]; | |
| 240 | ✗ | for (int i = 0; i < CELT_MAX_BANDS; i++) { | |
| 241 | ✗ | float ener = 0.0f; | |
| 242 | ✗ | int band_offset = ff_celt_freq_bands[i] << f->size; | |
| 243 | ✗ | int band_size = ff_celt_freq_range[i] << f->size; | |
| 244 | ✗ | float *coeffs = &block->coeffs[band_offset]; | |
| 245 | |||
| 246 | ✗ | for (int j = 0; j < band_size; j++) | |
| 247 | ✗ | ener += coeffs[j]*coeffs[j]; | |
| 248 | |||
| 249 | ✗ | block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON; | |
| 250 | ✗ | ener = 1.0f/block->lin_energy[i]; | |
| 251 | |||
| 252 | ✗ | for (int j = 0; j < band_size; j++) | |
| 253 | ✗ | coeffs[j] *= ener; | |
| 254 | |||
| 255 | ✗ | block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i]; | |
| 256 | |||
| 257 | /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */ | ||
| 258 | ✗ | block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE); | |
| 259 | } | ||
| 260 | } | ||
| 261 | ✗ | } | |
| 262 | |||
| 263 | ✗ | static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc) | |
| 264 | { | ||
| 265 | ✗ | int tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed; | |
| 266 | ✗ | int bits = f->transient ? 2 : 4; | |
| 267 | |||
| 268 | ✗ | tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits)); | |
| 269 | |||
| 270 | ✗ | for (int i = f->start_band; i < f->end_band; i++) { | |
| 271 | ✗ | if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) { | |
| 272 | ✗ | const int tbit = (diff ^ 1) == f->tf_change[i]; | |
| 273 | ✗ | ff_opus_rc_enc_log(rc, tbit, bits); | |
| 274 | ✗ | diff ^= tbit; | |
| 275 | ✗ | tf_changed |= diff; | |
| 276 | } | ||
| 277 | ✗ | bits = f->transient ? 4 : 5; | |
| 278 | } | ||
| 279 | |||
| 280 | ✗ | if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] != | |
| 281 | ✗ | ff_celt_tf_select[f->size][f->transient][1][tf_changed]) { | |
| 282 | ✗ | ff_opus_rc_enc_log(rc, f->tf_select, 1); | |
| 283 | ✗ | tf_select = f->tf_select; | |
| 284 | } | ||
| 285 | |||
| 286 | ✗ | for (int i = f->start_band; i < f->end_band; i++) | |
| 287 | ✗ | f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]]; | |
| 288 | ✗ | } | |
| 289 | |||
| 290 | ✗ | static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f) | |
| 291 | { | ||
| 292 | ✗ | float gain = f->pf_gain; | |
| 293 | ✗ | int txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset; | |
| 294 | |||
| 295 | ✗ | ff_opus_rc_enc_log(rc, f->pfilter, 1); | |
| 296 | ✗ | if (!f->pfilter) | |
| 297 | ✗ | return; | |
| 298 | |||
| 299 | /* Octave */ | ||
| 300 | ✗ | txval = FFMIN(octave, 6); | |
| 301 | ✗ | ff_opus_rc_enc_uint(rc, txval, 6); | |
| 302 | ✗ | octave = txval; | |
| 303 | /* Period */ | ||
| 304 | ✗ | txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1); | |
| 305 | ✗ | ff_opus_rc_put_raw(rc, period, 4 + octave); | |
| 306 | ✗ | period = txval + (16 << octave) - 1; | |
| 307 | /* Gain */ | ||
| 308 | ✗ | txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7); | |
| 309 | ✗ | ff_opus_rc_put_raw(rc, txval, 3); | |
| 310 | ✗ | gain = 0.09375f * (txval + 1); | |
| 311 | /* Tapset */ | ||
| 312 | ✗ | if ((opus_rc_tell(rc) + 2) <= f->framebits) | |
| 313 | ✗ | ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset); | |
| 314 | else | ||
| 315 | ✗ | tapset = 0; | |
| 316 | /* Finally create the coeffs */ | ||
| 317 | ✗ | for (int i = 0; i < 2; i++) { | |
| 318 | ✗ | CeltBlock *block = &f->block[i]; | |
| 319 | |||
| 320 | ✗ | block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD); | |
| 321 | ✗ | block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0]; | |
| 322 | ✗ | block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1]; | |
| 323 | ✗ | block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2]; | |
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | ✗ | static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f, | |
| 328 | float last_energy[][CELT_MAX_BANDS], int intra) | ||
| 329 | { | ||
| 330 | ✗ | float alpha, beta, prev[2] = { 0, 0 }; | |
| 331 | ✗ | const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra]; | |
| 332 | |||
| 333 | /* Inter is really just differential coding */ | ||
| 334 | ✗ | if (opus_rc_tell(rc) + 3 <= f->framebits) | |
| 335 | ✗ | ff_opus_rc_enc_log(rc, intra, 3); | |
| 336 | else | ||
| 337 | ✗ | intra = 0; | |
| 338 | |||
| 339 | ✗ | if (intra) { | |
| 340 | ✗ | alpha = 0.0f; | |
| 341 | ✗ | beta = 1.0f - (4915.0f/32768.0f); | |
| 342 | } else { | ||
| 343 | ✗ | alpha = ff_celt_alpha_coef[f->size]; | |
| 344 | ✗ | beta = ff_celt_beta_coef[f->size]; | |
| 345 | } | ||
| 346 | |||
| 347 | ✗ | for (int i = f->start_band; i < f->end_band; i++) { | |
| 348 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 349 | ✗ | CeltBlock *block = &f->block[ch]; | |
| 350 | ✗ | const int left = f->framebits - opus_rc_tell(rc); | |
| 351 | ✗ | const float last = FFMAX(-9.0f, last_energy[ch][i]); | |
| 352 | ✗ | float diff = block->energy[i] - prev[ch] - last*alpha; | |
| 353 | ✗ | int q_en = lrintf(diff); | |
| 354 | ✗ | if (left >= 15) { | |
| 355 | ✗ | ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6); | |
| 356 | ✗ | } else if (left >= 2) { | |
| 357 | ✗ | q_en = av_clip(q_en, -1, 1); | |
| 358 | ✗ | ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small); | |
| 359 | ✗ | } else if (left >= 1) { | |
| 360 | ✗ | q_en = av_clip(q_en, -1, 0); | |
| 361 | ✗ | ff_opus_rc_enc_log(rc, (q_en & 1), 1); | |
| 362 | ✗ | } else q_en = -1; | |
| 363 | |||
| 364 | ✗ | block->error_energy[i] = q_en - diff; | |
| 365 | ✗ | prev[ch] += beta * q_en; | |
| 366 | } | ||
| 367 | } | ||
| 368 | ✗ | } | |
| 369 | |||
| 370 | ✗ | static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc, | |
| 371 | float last_energy[][CELT_MAX_BANDS]) | ||
| 372 | { | ||
| 373 | uint32_t inter, intra; | ||
| 374 | ✗ | OPUS_RC_CHECKPOINT_SPAWN(rc); | |
| 375 | |||
| 376 | ✗ | exp_quant_coarse(rc, f, last_energy, 1); | |
| 377 | ✗ | intra = OPUS_RC_CHECKPOINT_BITS(rc); | |
| 378 | |||
| 379 | ✗ | OPUS_RC_CHECKPOINT_ROLLBACK(rc); | |
| 380 | |||
| 381 | ✗ | exp_quant_coarse(rc, f, last_energy, 0); | |
| 382 | ✗ | inter = OPUS_RC_CHECKPOINT_BITS(rc); | |
| 383 | |||
| 384 | ✗ | if (inter > intra) { /* Unlikely */ | |
| 385 | ✗ | OPUS_RC_CHECKPOINT_ROLLBACK(rc); | |
| 386 | ✗ | exp_quant_coarse(rc, f, last_energy, 1); | |
| 387 | } | ||
| 388 | ✗ | } | |
| 389 | |||
| 390 | ✗ | static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc) | |
| 391 | { | ||
| 392 | ✗ | for (int i = f->start_band; i < f->end_band; i++) { | |
| 393 | ✗ | if (!f->fine_bits[i]) | |
| 394 | ✗ | continue; | |
| 395 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 396 | ✗ | CeltBlock *block = &f->block[ch]; | |
| 397 | ✗ | int quant, lim = (1 << f->fine_bits[i]); | |
| 398 | ✗ | float offset, diff = 0.5f - block->error_energy[i]; | |
| 399 | ✗ | quant = av_clip(floor(diff*lim), 0, lim - 1); | |
| 400 | ✗ | ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]); | |
| 401 | ✗ | offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f); | |
| 402 | ✗ | block->error_energy[i] -= offset; | |
| 403 | } | ||
| 404 | } | ||
| 405 | ✗ | } | |
| 406 | |||
| 407 | ✗ | static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f) | |
| 408 | { | ||
| 409 | ✗ | for (int priority = 0; priority < 2; priority++) { | |
| 410 | ✗ | for (int i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) { | |
| 411 | ✗ | if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS) | |
| 412 | ✗ | continue; | |
| 413 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 414 | ✗ | CeltBlock *block = &f->block[ch]; | |
| 415 | ✗ | const float err = block->error_energy[i]; | |
| 416 | ✗ | const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f; | |
| 417 | ✗ | const int sign = FFABS(err + offset) < FFABS(err - offset); | |
| 418 | ✗ | ff_opus_rc_put_raw(rc, sign, 1); | |
| 419 | ✗ | block->error_energy[i] -= offset*(1 - 2*sign); | |
| 420 | } | ||
| 421 | } | ||
| 422 | } | ||
| 423 | ✗ | } | |
| 424 | |||
| 425 | ✗ | static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc, | |
| 426 | CeltFrame *f, int index) | ||
| 427 | { | ||
| 428 | ✗ | ff_opus_rc_enc_init(rc); | |
| 429 | |||
| 430 | ✗ | ff_opus_psy_celt_frame_init(&s->psyctx, f, index); | |
| 431 | |||
| 432 | ✗ | celt_frame_setup_input(s, f); | |
| 433 | |||
| 434 | ✗ | if (f->silence) { | |
| 435 | ✗ | if (f->framebits >= 16) | |
| 436 | ✗ | ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit signalling) */ | |
| 437 | ✗ | for (int ch = 0; ch < s->channels; ch++) | |
| 438 | ✗ | memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS); | |
| 439 | ✗ | return; | |
| 440 | } | ||
| 441 | |||
| 442 | /* Filters */ | ||
| 443 | ✗ | celt_apply_preemph_filter(s, f); | |
| 444 | ✗ | if (f->pfilter) { | |
| 445 | ✗ | ff_opus_rc_enc_log(rc, 0, 15); | |
| 446 | ✗ | celt_enc_quant_pfilter(rc, f); | |
| 447 | } | ||
| 448 | |||
| 449 | /* Transform */ | ||
| 450 | ✗ | celt_frame_mdct(s, f); | |
| 451 | |||
| 452 | /* Need to handle transient/non-transient switches at any point during analysis */ | ||
| 453 | ✗ | while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index)) | |
| 454 | ✗ | celt_frame_mdct(s, f); | |
| 455 | |||
| 456 | ✗ | ff_opus_rc_enc_init(rc); | |
| 457 | |||
| 458 | /* Silence */ | ||
| 459 | ✗ | ff_opus_rc_enc_log(rc, 0, 15); | |
| 460 | |||
| 461 | /* Pitch filter */ | ||
| 462 | ✗ | if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits) | |
| 463 | ✗ | celt_enc_quant_pfilter(rc, f); | |
| 464 | |||
| 465 | /* Transient flag */ | ||
| 466 | ✗ | if (f->size && opus_rc_tell(rc) + 3 <= f->framebits) | |
| 467 | ✗ | ff_opus_rc_enc_log(rc, f->transient, 3); | |
| 468 | |||
| 469 | /* Main encoding */ | ||
| 470 | ✗ | celt_quant_coarse (f, rc, s->last_quantized_energy); | |
| 471 | ✗ | celt_enc_tf (f, rc); | |
| 472 | ✗ | ff_celt_bitalloc (f, rc, 1); | |
| 473 | ✗ | celt_quant_fine (f, rc); | |
| 474 | ✗ | ff_celt_quant_bands(f, rc); | |
| 475 | |||
| 476 | /* Anticollapse bit */ | ||
| 477 | ✗ | if (f->anticollapse_needed) | |
| 478 | ✗ | ff_opus_rc_put_raw(rc, f->anticollapse, 1); | |
| 479 | |||
| 480 | /* Final per-band energy adjustments from leftover bits */ | ||
| 481 | ✗ | celt_quant_final(s, rc, f); | |
| 482 | |||
| 483 | ✗ | for (int ch = 0; ch < f->channels; ch++) { | |
| 484 | ✗ | CeltBlock *block = &f->block[ch]; | |
| 485 | ✗ | for (int i = 0; i < CELT_MAX_BANDS; i++) | |
| 486 | ✗ | s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i]; | |
| 487 | } | ||
| 488 | } | ||
| 489 | |||
| 490 | ✗ | static inline int write_opuslacing(uint8_t *dst, int v) | |
| 491 | { | ||
| 492 | ✗ | dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v); | |
| 493 | ✗ | dst[1] = v - dst[0] >> 2; | |
| 494 | ✗ | return 1 + (v >= 252); | |
| 495 | } | ||
| 496 | |||
| 497 | ✗ | static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt) | |
| 498 | { | ||
| 499 | int offset, fsize_needed; | ||
| 500 | |||
| 501 | /* Write toc */ | ||
| 502 | ✗ | opus_gen_toc(s, avpkt->data, &offset, &fsize_needed); | |
| 503 | |||
| 504 | /* Frame sizes if needed */ | ||
| 505 | ✗ | if (fsize_needed) { | |
| 506 | ✗ | for (int i = 0; i < s->packet.frames - 1; i++) { | |
| 507 | ✗ | offset += write_opuslacing(avpkt->data + offset, | |
| 508 | ✗ | s->frame[i].framebits >> 3); | |
| 509 | } | ||
| 510 | } | ||
| 511 | |||
| 512 | /* Packets */ | ||
| 513 | ✗ | for (int i = 0; i < s->packet.frames; i++) { | |
| 514 | ✗ | ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset, | |
| 515 | ✗ | s->frame[i].framebits >> 3); | |
| 516 | ✗ | offset += s->frame[i].framebits >> 3; | |
| 517 | } | ||
| 518 | |||
| 519 | ✗ | avpkt->size = offset; | |
| 520 | ✗ | } | |
| 521 | |||
| 522 | /* Used as overlap for the first frame and padding for the last encoded packet */ | ||
| 523 | ✗ | static AVFrame *spawn_empty_frame(OpusEncContext *s) | |
| 524 | { | ||
| 525 | ✗ | AVFrame *f = av_frame_alloc(); | |
| 526 | int ret; | ||
| 527 | ✗ | if (!f) | |
| 528 | ✗ | return NULL; | |
| 529 | ✗ | f->format = s->avctx->sample_fmt; | |
| 530 | ✗ | f->nb_samples = s->avctx->frame_size; | |
| 531 | ✗ | ret = av_channel_layout_copy(&f->ch_layout, &s->avctx->ch_layout); | |
| 532 | ✗ | if (ret < 0) { | |
| 533 | ✗ | av_frame_free(&f); | |
| 534 | ✗ | return NULL; | |
| 535 | } | ||
| 536 | ✗ | if (av_frame_get_buffer(f, 4)) { | |
| 537 | ✗ | av_frame_free(&f); | |
| 538 | ✗ | return NULL; | |
| 539 | } | ||
| 540 | ✗ | for (int i = 0; i < s->channels; i++) { | |
| 541 | ✗ | size_t bps = av_get_bytes_per_sample(f->format); | |
| 542 | ✗ | memset(f->extended_data[i], 0, bps*f->nb_samples); | |
| 543 | } | ||
| 544 | ✗ | return f; | |
| 545 | } | ||
| 546 | |||
| 547 | ✗ | static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, | |
| 548 | const AVFrame *frame, int *got_packet_ptr) | ||
| 549 | { | ||
| 550 | ✗ | OpusEncContext *s = avctx->priv_data; | |
| 551 | ✗ | int ret, frame_size, alloc_size = 0; | |
| 552 | |||
| 553 | ✗ | if (frame) { /* Add new frame to queue */ | |
| 554 | ✗ | if ((ret = ff_af_queue_add(&s->afq, frame)) < 0) | |
| 555 | ✗ | return ret; | |
| 556 | ✗ | ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame)); | |
| 557 | } else { | ||
| 558 | ✗ | ff_opus_psy_signal_eof(&s->psyctx); | |
| 559 | ✗ | if (!s->afq.remaining_samples || !avctx->frame_num) | |
| 560 | ✗ | return 0; /* We've been flushed and there's nothing left to encode */ | |
| 561 | } | ||
| 562 | |||
| 563 | /* Run the psychoacoustic system */ | ||
| 564 | ✗ | if (ff_opus_psy_process(&s->psyctx, &s->packet)) | |
| 565 | ✗ | return 0; | |
| 566 | |||
| 567 | ✗ | frame_size = OPUS_BLOCK_SIZE(s->packet.framesize); | |
| 568 | |||
| 569 | ✗ | if (!frame) { | |
| 570 | /* This can go negative, that's not a problem, we only pad if positive */ | ||
| 571 | ✗ | int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1; | |
| 572 | /* Pad with empty 2.5 ms frames to whatever framesize was decided, | ||
| 573 | * this should only happen at the very last flush frame. The frames | ||
| 574 | * allocated here will be freed (because they have no other references) | ||
| 575 | * after they get used by celt_frame_setup_input() */ | ||
| 576 | ✗ | for (int i = 0; i < pad_empty; i++) { | |
| 577 | ✗ | AVFrame *empty = spawn_empty_frame(s); | |
| 578 | ✗ | if (!empty) | |
| 579 | ✗ | return AVERROR(ENOMEM); | |
| 580 | ✗ | ff_bufqueue_add(avctx, &s->bufqueue, empty); | |
| 581 | } | ||
| 582 | } | ||
| 583 | |||
| 584 | ✗ | for (int i = 0; i < s->packet.frames; i++) { | |
| 585 | ✗ | celt_encode_frame(s, &s->rc[i], &s->frame[i], i); | |
| 586 | ✗ | alloc_size += s->frame[i].framebits >> 3; | |
| 587 | } | ||
| 588 | |||
| 589 | /* Worst case toc + the frame lengths if needed */ | ||
| 590 | ✗ | alloc_size += 2 + s->packet.frames*2; | |
| 591 | |||
| 592 | ✗ | if ((ret = ff_alloc_packet(avctx, avpkt, alloc_size)) < 0) | |
| 593 | ✗ | return ret; | |
| 594 | |||
| 595 | /* Assemble packet */ | ||
| 596 | ✗ | opus_packet_assembler(s, avpkt); | |
| 597 | |||
| 598 | /* Update the psychoacoustic system */ | ||
| 599 | ✗ | ff_opus_psy_postencode_update(&s->psyctx, s->frame); | |
| 600 | |||
| 601 | /* Remove samples from queue and skip if needed */ | ||
| 602 | ✗ | ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration); | |
| 603 | ✗ | if (s->packet.frames*frame_size > avpkt->duration) { | |
| 604 | ✗ | uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10); | |
| 605 | ✗ | if (!side) | |
| 606 | ✗ | return AVERROR(ENOMEM); | |
| 607 | ✗ | AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120); | |
| 608 | } | ||
| 609 | |||
| 610 | ✗ | *got_packet_ptr = 1; | |
| 611 | |||
| 612 | ✗ | return 0; | |
| 613 | } | ||
| 614 | |||
| 615 | ✗ | static av_cold int opus_encode_end(AVCodecContext *avctx) | |
| 616 | { | ||
| 617 | ✗ | OpusEncContext *s = avctx->priv_data; | |
| 618 | |||
| 619 | ✗ | for (int i = 0; i < CELT_BLOCK_NB; i++) | |
| 620 | ✗ | av_tx_uninit(&s->tx[i]); | |
| 621 | |||
| 622 | ✗ | ff_celt_pvq_uninit(&s->pvq); | |
| 623 | ✗ | av_freep(&s->dsp); | |
| 624 | ✗ | av_freep(&s->frame); | |
| 625 | ✗ | av_freep(&s->rc); | |
| 626 | ✗ | ff_af_queue_close(&s->afq); | |
| 627 | ✗ | ff_opus_psy_end(&s->psyctx); | |
| 628 | ✗ | ff_bufqueue_discard_all(&s->bufqueue); | |
| 629 | |||
| 630 | ✗ | return 0; | |
| 631 | } | ||
| 632 | |||
| 633 | ✗ | static av_cold int opus_encode_init(AVCodecContext *avctx) | |
| 634 | { | ||
| 635 | int ret, max_frames; | ||
| 636 | ✗ | OpusEncContext *s = avctx->priv_data; | |
| 637 | |||
| 638 | ✗ | s->avctx = avctx; | |
| 639 | ✗ | s->channels = avctx->ch_layout.nb_channels; | |
| 640 | |||
| 641 | /* Opus allows us to change the framesize on each packet (and each packet may | ||
| 642 | * have multiple frames in it) but we can't change the codec's frame size on | ||
| 643 | * runtime, so fix it to the lowest possible number of samples and use a queue | ||
| 644 | * to accumulate AVFrames until we have enough to encode whatever the encoder | ||
| 645 | * decides is the best */ | ||
| 646 | ✗ | avctx->frame_size = 120; | |
| 647 | /* Initial padding will change if SILK is ever supported */ | ||
| 648 | ✗ | avctx->initial_padding = 120; | |
| 649 | |||
| 650 | ✗ | if (!avctx->bit_rate) { | |
| 651 | ✗ | int coupled = ff_opus_default_coupled_streams[s->channels - 1]; | |
| 652 | ✗ | avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000); | |
| 653 | ✗ | } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) { | |
| 654 | ✗ | int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels); | |
| 655 | ✗ | av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n", | |
| 656 | ✗ | avctx->bit_rate/1000, clipped_rate/1000); | |
| 657 | ✗ | avctx->bit_rate = clipped_rate; | |
| 658 | } | ||
| 659 | |||
| 660 | /* Extradata */ | ||
| 661 | ✗ | avctx->extradata_size = 19; | |
| 662 | ✗ | avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE); | |
| 663 | ✗ | if (!avctx->extradata) | |
| 664 | ✗ | return AVERROR(ENOMEM); | |
| 665 | ✗ | opus_write_extradata(avctx); | |
| 666 | |||
| 667 | ✗ | ff_af_queue_init(avctx, &s->afq); | |
| 668 | |||
| 669 | ✗ | if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0) | |
| 670 | ✗ | return ret; | |
| 671 | |||
| 672 | ✗ | if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT))) | |
| 673 | ✗ | return AVERROR(ENOMEM); | |
| 674 | |||
| 675 | /* I have no idea why a base scaling factor of 68 works, could be the twiddles */ | ||
| 676 | ✗ | for (int i = 0; i < CELT_BLOCK_NB; i++) { | |
| 677 | ✗ | const float scale = 68 << (CELT_BLOCK_NB - 1 - i); | |
| 678 | ✗ | if ((ret = av_tx_init(&s->tx[i], &s->tx_fn[i], AV_TX_FLOAT_MDCT, 0, 15 << (i + 3), &scale, 0))) | |
| 679 | ✗ | return AVERROR(ENOMEM); | |
| 680 | } | ||
| 681 | |||
| 682 | /* Zero out previous energy (matters for inter first frame) */ | ||
| 683 | ✗ | for (int ch = 0; ch < s->channels; ch++) | |
| 684 | ✗ | memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS); | |
| 685 | |||
| 686 | /* Allocate an empty frame to use as overlap for the first frame of audio */ | ||
| 687 | ✗ | ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s)); | |
| 688 | ✗ | if (!ff_bufqueue_peek(&s->bufqueue, 0)) | |
| 689 | ✗ | return AVERROR(ENOMEM); | |
| 690 | |||
| 691 | ✗ | if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options))) | |
| 692 | ✗ | return ret; | |
| 693 | |||
| 694 | /* Frame structs and range coder buffers */ | ||
| 695 | ✗ | max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f); | |
| 696 | ✗ | s->frame = av_malloc(max_frames*sizeof(CeltFrame)); | |
| 697 | ✗ | if (!s->frame) | |
| 698 | ✗ | return AVERROR(ENOMEM); | |
| 699 | ✗ | s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder)); | |
| 700 | ✗ | if (!s->rc) | |
| 701 | ✗ | return AVERROR(ENOMEM); | |
| 702 | |||
| 703 | ✗ | for (int i = 0; i < max_frames; i++) { | |
| 704 | ✗ | s->frame[i].dsp = s->dsp; | |
| 705 | ✗ | s->frame[i].avctx = s->avctx; | |
| 706 | ✗ | s->frame[i].seed = 0; | |
| 707 | ✗ | s->frame[i].pvq = s->pvq; | |
| 708 | ✗ | s->frame[i].apply_phase_inv = s->options.apply_phase_inv; | |
| 709 | ✗ | s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f; | |
| 710 | } | ||
| 711 | |||
| 712 | ✗ | return 0; | |
| 713 | } | ||
| 714 | |||
| 715 | #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM | ||
| 716 | static const AVOption opusenc_options[] = { | ||
| 717 | { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, .unit = "max_delay_ms" }, | ||
| 718 | { "apply_phase_inv", "Apply intensity stereo phase inversion", offsetof(OpusEncContext, options.apply_phase_inv), AV_OPT_TYPE_BOOL, { .i64 = 1 }, 0, 1, OPUSENC_FLAGS, .unit = "apply_phase_inv" }, | ||
| 719 | { NULL }, | ||
| 720 | }; | ||
| 721 | |||
| 722 | static const AVClass opusenc_class = { | ||
| 723 | .class_name = "Opus encoder", | ||
| 724 | .item_name = av_default_item_name, | ||
| 725 | .option = opusenc_options, | ||
| 726 | .version = LIBAVUTIL_VERSION_INT, | ||
| 727 | }; | ||
| 728 | |||
| 729 | static const FFCodecDefault opusenc_defaults[] = { | ||
| 730 | { "b", "0" }, | ||
| 731 | { "compression_level", "10" }, | ||
| 732 | { NULL }, | ||
| 733 | }; | ||
| 734 | |||
| 735 | const FFCodec ff_opus_encoder = { | ||
| 736 | .p.name = "opus", | ||
| 737 | CODEC_LONG_NAME("Opus"), | ||
| 738 | .p.type = AVMEDIA_TYPE_AUDIO, | ||
| 739 | .p.id = AV_CODEC_ID_OPUS, | ||
| 740 | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY | | ||
| 741 | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_EXPERIMENTAL, | ||
| 742 | .defaults = opusenc_defaults, | ||
| 743 | .p.priv_class = &opusenc_class, | ||
| 744 | .priv_data_size = sizeof(OpusEncContext), | ||
| 745 | .init = opus_encode_init, | ||
| 746 | FF_CODEC_ENCODE_CB(opus_encode_frame), | ||
| 747 | .close = opus_encode_end, | ||
| 748 | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, | ||
| 749 | CODEC_SAMPLERATES(48000), | ||
| 750 | CODEC_CH_LAYOUTS(AV_CHANNEL_LAYOUT_MONO, AV_CHANNEL_LAYOUT_STEREO), | ||
| 751 | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP), | ||
| 752 | }; | ||
| 753 |