| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Opus encoder | ||
| 3 | * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <float.h> | ||
| 23 | |||
| 24 | #include "libavutil/mem.h" | ||
| 25 | #include "enc_psy.h" | ||
| 26 | #include "celt.h" | ||
| 27 | #include "pvq.h" | ||
| 28 | #include "tab.h" | ||
| 29 | #include "libavfilter/window_func.h" | ||
| 30 | |||
| 31 | ✗ | static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, | |
| 32 | float *bits, float lambda) | ||
| 33 | { | ||
| 34 | ✗ | int i, b = 0; | |
| 35 | ✗ | uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 }; | |
| 36 | ✗ | const int band_size = ff_celt_freq_range[band] << f->size; | |
| 37 | float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176]; | ||
| 38 | ✗ | float dist, cost, err_x = 0.0f, err_y = 0.0f; | |
| 39 | ✗ | float *X = buf; | |
| 40 | ✗ | float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size); | |
| 41 | ✗ | float *Y = (f->channels == 2) ? &buf[176] : NULL; | |
| 42 | ✗ | float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size); | |
| 43 | ✗ | OPUS_RC_CHECKPOINT_SPAWN(rc); | |
| 44 | |||
| 45 | ✗ | memcpy(X, X_orig, band_size*sizeof(float)); | |
| 46 | ✗ | if (Y) | |
| 47 | ✗ | memcpy(Y, Y_orig, band_size*sizeof(float)); | |
| 48 | |||
| 49 | ✗ | f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1; | |
| 50 | ✗ | if (band <= f->coded_bands - 1) { | |
| 51 | ✗ | int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band); | |
| 52 | ✗ | b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14); | |
| 53 | } | ||
| 54 | |||
| 55 | ✗ | if (f->dual_stereo) { | |
| 56 | ✗ | pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL, | |
| 57 | ✗ | f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]); | |
| 58 | |||
| 59 | ✗ | pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL, | |
| 60 | ✗ | f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]); | |
| 61 | } else { | ||
| 62 | ✗ | pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size, | |
| 63 | ✗ | norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); | |
| 64 | } | ||
| 65 | |||
| 66 | ✗ | for (i = 0; i < band_size; i++) { | |
| 67 | ✗ | err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]); | |
| 68 | ✗ | if (Y) | |
| 69 | ✗ | err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]); | |
| 70 | } | ||
| 71 | |||
| 72 | ✗ | dist = sqrtf(err_x) + sqrtf(err_y); | |
| 73 | ✗ | cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f; | |
| 74 | ✗ | *bits += cost; | |
| 75 | |||
| 76 | ✗ | OPUS_RC_CHECKPOINT_ROLLBACK(rc); | |
| 77 | |||
| 78 | ✗ | return lambda*dist*cost; | |
| 79 | } | ||
| 80 | |||
| 81 | /* Populate metrics without taking into consideration neighbouring steps */ | ||
| 82 | ✗ | static void step_collect_psy_metrics(OpusPsyContext *s, int index) | |
| 83 | { | ||
| 84 | ✗ | int silence = 0, ch, i, j; | |
| 85 | ✗ | OpusPsyStep *st = s->steps[index]; | |
| 86 | |||
| 87 | ✗ | st->index = index; | |
| 88 | |||
| 89 | ✗ | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { | |
| 90 | ✗ | const int lap_size = (1 << s->bsize_analysis); | |
| 91 | ✗ | for (i = 1; i <= FFMIN(lap_size, index); i++) { | |
| 92 | ✗ | const int offset = i*120; | |
| 93 | ✗ | AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i); | |
| 94 | ✗ | memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); | |
| 95 | } | ||
| 96 | ✗ | for (i = 0; i < lap_size; i++) { | |
| 97 | ✗ | const int offset = i*120 + lap_size; | |
| 98 | ✗ | AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i); | |
| 99 | ✗ | memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); | |
| 100 | } | ||
| 101 | |||
| 102 | ✗ | s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis], | |
| 103 | ✗ | (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1)); | |
| 104 | |||
| 105 | ✗ | s->mdct_fn[s->bsize_analysis](s->mdct[s->bsize_analysis], st->coeffs[ch], | |
| 106 | ✗ | s->scratch, sizeof(float)); | |
| 107 | |||
| 108 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) | |
| 109 | ✗ | st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis]; | |
| 110 | } | ||
| 111 | |||
| 112 | ✗ | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { | |
| 113 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 114 | ✗ | float avg_c_s, energy = 0.0f, dist_dev = 0.0f; | |
| 115 | ✗ | const int range = ff_celt_freq_range[i] << s->bsize_analysis; | |
| 116 | ✗ | const float *coeffs = st->bands[ch][i]; | |
| 117 | ✗ | for (j = 0; j < range; j++) | |
| 118 | ✗ | energy += coeffs[j]*coeffs[j]; | |
| 119 | |||
| 120 | ✗ | st->energy[ch][i] += sqrtf(energy); | |
| 121 | ✗ | silence |= !!st->energy[ch][i]; | |
| 122 | ✗ | avg_c_s = energy / range; | |
| 123 | |||
| 124 | ✗ | for (j = 0; j < range; j++) { | |
| 125 | ✗ | const float c_s = coeffs[j]*coeffs[j]; | |
| 126 | ✗ | dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s); | |
| 127 | } | ||
| 128 | |||
| 129 | ✗ | st->tone[ch][i] += sqrtf(dist_dev); | |
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | ✗ | st->silence = !silence; | |
| 134 | |||
| 135 | ✗ | if (s->avctx->ch_layout.nb_channels > 1) { | |
| 136 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 137 | ✗ | float incompat = 0.0f; | |
| 138 | ✗ | const float *coeffs1 = st->bands[0][i]; | |
| 139 | ✗ | const float *coeffs2 = st->bands[1][i]; | |
| 140 | ✗ | const int range = ff_celt_freq_range[i] << s->bsize_analysis; | |
| 141 | ✗ | for (j = 0; j < range; j++) | |
| 142 | ✗ | incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]); | |
| 143 | ✗ | st->stereo[i] = sqrtf(incompat); | |
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | ✗ | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { | |
| 148 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 149 | ✗ | OpusBandExcitation *ex = &s->ex[ch][i]; | |
| 150 | ✗ | float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]); | |
| 151 | ✗ | bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e); | |
| 152 | ✗ | bp_e *= bp_e; | |
| 153 | ✗ | if (bp_e > ex->excitation) { | |
| 154 | ✗ | st->change_amp[ch][i] = bp_e - ex->excitation; | |
| 155 | ✗ | st->total_change += st->change_amp[ch][i]; | |
| 156 | ✗ | ex->excitation = ex->excitation_init = bp_e; | |
| 157 | ✗ | ex->excitation_dist = 0.0f; | |
| 158 | } | ||
| 159 | ✗ | if (ex->excitation > 0.0f) { | |
| 160 | ✗ | ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09); | |
| 161 | ✗ | ex->excitation = FFMAX(ex->excitation, 0.0f); | |
| 162 | ✗ | ex->excitation_dist += 1.0f; | |
| 163 | } | ||
| 164 | } | ||
| 165 | } | ||
| 166 | ✗ | } | |
| 167 | |||
| 168 | ✗ | static void search_for_change_points(OpusPsyContext *s, float tgt_change, | |
| 169 | int offset_s, int offset_e, int resolution, | ||
| 170 | int level) | ||
| 171 | { | ||
| 172 | int i; | ||
| 173 | ✗ | float c_change = 0.0f; | |
| 174 | ✗ | if ((offset_e - offset_s) <= resolution) | |
| 175 | ✗ | return; | |
| 176 | ✗ | for (i = offset_s; i < offset_e; i++) { | |
| 177 | ✗ | c_change += s->steps[i]->total_change; | |
| 178 | ✗ | if (c_change > tgt_change) | |
| 179 | ✗ | break; | |
| 180 | } | ||
| 181 | ✗ | if (i == offset_e) | |
| 182 | ✗ | return; | |
| 183 | ✗ | search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1); | |
| 184 | ✗ | s->inflection_points[s->inflection_points_count++] = i; | |
| 185 | ✗ | search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1); | |
| 186 | } | ||
| 187 | |||
| 188 | ✗ | static int flush_silent_frames(OpusPsyContext *s) | |
| 189 | { | ||
| 190 | int fsize, silent_frames; | ||
| 191 | |||
| 192 | ✗ | for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++) | |
| 193 | ✗ | if (!s->steps[silent_frames]->silence) | |
| 194 | ✗ | break; | |
| 195 | ✗ | if (--silent_frames < 0) | |
| 196 | ✗ | return 0; | |
| 197 | |||
| 198 | ✗ | for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) { | |
| 199 | ✗ | if ((1 << fsize) > silent_frames) | |
| 200 | ✗ | continue; | |
| 201 | ✗ | s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize); | |
| 202 | ✗ | s->p.framesize = fsize; | |
| 203 | ✗ | return 1; | |
| 204 | } | ||
| 205 | |||
| 206 | ✗ | return 0; | |
| 207 | } | ||
| 208 | |||
| 209 | /* Main function which decides frame size and frames per current packet */ | ||
| 210 | ✗ | static void psy_output_groups(OpusPsyContext *s) | |
| 211 | { | ||
| 212 | ✗ | int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000; | |
| 213 | ✗ | int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); | |
| 214 | |||
| 215 | /* These don't change for now */ | ||
| 216 | ✗ | s->p.mode = OPUS_MODE_CELT; | |
| 217 | ✗ | s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND; | |
| 218 | |||
| 219 | /* Flush silent frames ASAP */ | ||
| 220 | ✗ | if (s->steps[0]->silence && flush_silent_frames(s)) | |
| 221 | ✗ | return; | |
| 222 | |||
| 223 | ✗ | s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960); | |
| 224 | ✗ | s->p.frames = 1; | |
| 225 | } | ||
| 226 | |||
| 227 | ✗ | int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p) | |
| 228 | { | ||
| 229 | int i; | ||
| 230 | ✗ | float total_energy_change = 0.0f; | |
| 231 | |||
| 232 | ✗ | if (s->buffered_steps < s->max_steps && !s->eof) { | |
| 233 | ✗ | const int awin = (1 << s->bsize_analysis); | |
| 234 | ✗ | if (++s->steps_to_process >= awin) { | |
| 235 | ✗ | step_collect_psy_metrics(s, s->buffered_steps - awin + 1); | |
| 236 | ✗ | s->steps_to_process = 0; | |
| 237 | } | ||
| 238 | ✗ | if ((++s->buffered_steps) < s->max_steps) | |
| 239 | ✗ | return 1; | |
| 240 | } | ||
| 241 | |||
| 242 | ✗ | for (i = 0; i < s->buffered_steps; i++) | |
| 243 | ✗ | total_energy_change += s->steps[i]->total_change; | |
| 244 | |||
| 245 | ✗ | search_for_change_points(s, total_energy_change / 2.0f, 0, | |
| 246 | s->buffered_steps, 1, 0); | ||
| 247 | |||
| 248 | ✗ | psy_output_groups(s); | |
| 249 | |||
| 250 | ✗ | p->frames = s->p.frames; | |
| 251 | ✗ | p->framesize = s->p.framesize; | |
| 252 | ✗ | p->mode = s->p.mode; | |
| 253 | ✗ | p->bandwidth = s->p.bandwidth; | |
| 254 | |||
| 255 | ✗ | return 0; | |
| 256 | } | ||
| 257 | |||
| 258 | ✗ | void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index) | |
| 259 | { | ||
| 260 | ✗ | int i, neighbouring_points = 0, start_offset = 0; | |
| 261 | ✗ | int radius = (1 << s->p.framesize), step_offset = radius*index; | |
| 262 | ✗ | int silence = 1; | |
| 263 | |||
| 264 | ✗ | f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0; | |
| 265 | ✗ | f->end_band = ff_celt_band_end[s->p.bandwidth]; | |
| 266 | ✗ | f->channels = s->avctx->ch_layout.nb_channels; | |
| 267 | ✗ | f->size = s->p.framesize; | |
| 268 | |||
| 269 | ✗ | for (i = 0; i < (1 << f->size); i++) | |
| 270 | ✗ | silence &= s->steps[index*(1 << f->size) + i]->silence; | |
| 271 | |||
| 272 | ✗ | f->silence = silence; | |
| 273 | ✗ | if (f->silence) { | |
| 274 | ✗ | f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */ | |
| 275 | ✗ | return; | |
| 276 | } | ||
| 277 | |||
| 278 | ✗ | for (i = 0; i < s->inflection_points_count; i++) { | |
| 279 | ✗ | if (s->inflection_points[i] >= step_offset) { | |
| 280 | ✗ | start_offset = i; | |
| 281 | ✗ | break; | |
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | ✗ | for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) { | |
| 286 | ✗ | if (s->inflection_points[i] < (step_offset + radius)) { | |
| 287 | ✗ | neighbouring_points++; | |
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 291 | /* Transient flagging */ | ||
| 292 | ✗ | f->transient = neighbouring_points > 0; | |
| 293 | ✗ | f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; | |
| 294 | |||
| 295 | /* Some sane defaults */ | ||
| 296 | ✗ | f->pfilter = 0; | |
| 297 | ✗ | f->pf_gain = 0.5f; | |
| 298 | ✗ | f->pf_octave = 2; | |
| 299 | ✗ | f->pf_period = 1; | |
| 300 | ✗ | f->pf_tapset = 2; | |
| 301 | |||
| 302 | /* More sane defaults */ | ||
| 303 | ✗ | f->tf_select = 0; | |
| 304 | ✗ | f->anticollapse = 1; | |
| 305 | ✗ | f->alloc_trim = 5; | |
| 306 | ✗ | f->skip_band_floor = f->end_band; | |
| 307 | ✗ | f->intensity_stereo = f->end_band; | |
| 308 | ✗ | f->dual_stereo = 0; | |
| 309 | ✗ | f->spread = CELT_SPREAD_NORMAL; | |
| 310 | ✗ | memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); | |
| 311 | ✗ | memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); | |
| 312 | } | ||
| 313 | |||
| 314 | ✗ | static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, | |
| 315 | CeltFrame *f_out) | ||
| 316 | { | ||
| 317 | int i, f, ch; | ||
| 318 | ✗ | int frame_size = OPUS_BLOCK_SIZE(s->p.framesize); | |
| 319 | ✗ | float rate, frame_bits = 0; | |
| 320 | |||
| 321 | /* Used for the global ROTATE flag */ | ||
| 322 | ✗ | float tonal = 0.0f; | |
| 323 | |||
| 324 | /* Pseudo-weights */ | ||
| 325 | ✗ | float band_score[CELT_MAX_BANDS] = { 0 }; | |
| 326 | ✗ | float max_score = 1.0f; | |
| 327 | |||
| 328 | /* Pass one - one loop around each band, computing unquant stuff */ | ||
| 329 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 330 | ✗ | float weight = 0.0f; | |
| 331 | ✗ | float tonal_contrib = 0.0f; | |
| 332 | ✗ | for (f = 0; f < (1 << s->p.framesize); f++) { | |
| 333 | ✗ | weight = start[f]->stereo[i]; | |
| 334 | ✗ | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { | |
| 335 | ✗ | weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i]; | |
| 336 | ✗ | tonal_contrib += start[f]->tone[ch][i]; | |
| 337 | } | ||
| 338 | } | ||
| 339 | ✗ | tonal += tonal_contrib; | |
| 340 | ✗ | band_score[i] = weight; | |
| 341 | } | ||
| 342 | |||
| 343 | ✗ | tonal /= (float)CELT_MAX_BANDS; | |
| 344 | |||
| 345 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 346 | ✗ | if (band_score[i] > max_score) | |
| 347 | ✗ | max_score = band_score[i]; | |
| 348 | } | ||
| 349 | |||
| 350 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 351 | ✗ | f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f); | |
| 352 | ✗ | frame_bits += band_score[i]*8.0f; | |
| 353 | } | ||
| 354 | |||
| 355 | ✗ | tonal /= 1333136.0f; | |
| 356 | ✗ | f_out->spread = av_clip_uintp2(lrintf(tonal), 2); | |
| 357 | |||
| 358 | ✗ | rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16; | |
| 359 | ✗ | rate *= s->lambda; | |
| 360 | ✗ | rate /= s->avctx->sample_rate/frame_size; | |
| 361 | |||
| 362 | ✗ | f_out->framebits = lrintf(rate); | |
| 363 | ✗ | f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_FRAME_SIZE * 8); | |
| 364 | ✗ | f_out->framebits = FFALIGN(f_out->framebits, 8); | |
| 365 | ✗ | } | |
| 366 | |||
| 367 | ✗ | static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist) | |
| 368 | { | ||
| 369 | ✗ | int i, tdist = 0.0f; | |
| 370 | OpusRangeCoder dump; | ||
| 371 | |||
| 372 | ✗ | ff_opus_rc_enc_init(&dump); | |
| 373 | ✗ | ff_celt_bitalloc(f, &dump, 1); | |
| 374 | |||
| 375 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 376 | ✗ | float bits = 0.0f; | |
| 377 | ✗ | float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda); | |
| 378 | ✗ | tdist += dist; | |
| 379 | } | ||
| 380 | |||
| 381 | ✗ | *total_dist = tdist; | |
| 382 | |||
| 383 | ✗ | return 0; | |
| 384 | } | ||
| 385 | |||
| 386 | ✗ | static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f) | |
| 387 | { | ||
| 388 | float td1, td2; | ||
| 389 | ✗ | f->dual_stereo = 0; | |
| 390 | |||
| 391 | ✗ | if (s->avctx->ch_layout.nb_channels < 2) | |
| 392 | ✗ | return; | |
| 393 | |||
| 394 | ✗ | bands_dist(s, f, &td1); | |
| 395 | ✗ | f->dual_stereo = 1; | |
| 396 | ✗ | bands_dist(s, f, &td2); | |
| 397 | |||
| 398 | ✗ | f->dual_stereo = td2 < td1; | |
| 399 | ✗ | s->dual_stereo_used += td2 < td1; | |
| 400 | } | ||
| 401 | |||
| 402 | ✗ | static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f) | |
| 403 | { | ||
| 404 | ✗ | int i, best_band = CELT_MAX_BANDS - 1; | |
| 405 | ✗ | float dist, best_dist = FLT_MAX; | |
| 406 | /* TODO: fix, make some heuristic up here using the lambda value */ | ||
| 407 | ✗ | float end_band = 0; | |
| 408 | |||
| 409 | ✗ | if (s->avctx->ch_layout.nb_channels < 2) | |
| 410 | ✗ | return; | |
| 411 | |||
| 412 | ✗ | for (i = f->end_band; i >= end_band; i--) { | |
| 413 | ✗ | f->intensity_stereo = i; | |
| 414 | ✗ | bands_dist(s, f, &dist); | |
| 415 | ✗ | if (best_dist > dist) { | |
| 416 | ✗ | best_dist = dist; | |
| 417 | ✗ | best_band = i; | |
| 418 | } | ||
| 419 | } | ||
| 420 | |||
| 421 | ✗ | f->intensity_stereo = best_band; | |
| 422 | ✗ | s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f; | |
| 423 | } | ||
| 424 | |||
| 425 | ✗ | static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f) | |
| 426 | { | ||
| 427 | ✗ | int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } }; | |
| 428 | ✗ | float score[2] = { 0 }; | |
| 429 | |||
| 430 | ✗ | for (cway = 0; cway < 2; cway++) { | |
| 431 | int mag[2]; | ||
| 432 | ✗ | int base = f->transient ? 120 : 960; | |
| 433 | |||
| 434 | ✗ | for (i = 0; i < 2; i++) { | |
| 435 | ✗ | int c = ff_celt_tf_select[f->size][f->transient][cway][i]; | |
| 436 | ✗ | mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c); | |
| 437 | } | ||
| 438 | |||
| 439 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 440 | ✗ | float iscore0 = 0.0f; | |
| 441 | ✗ | float iscore1 = 0.0f; | |
| 442 | ✗ | for (j = 0; j < (1 << f->size); j++) { | |
| 443 | ✗ | for (k = 0; k < s->avctx->ch_layout.nb_channels; k++) { | |
| 444 | ✗ | iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0]; | |
| 445 | ✗ | iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1]; | |
| 446 | } | ||
| 447 | } | ||
| 448 | ✗ | config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f); | |
| 449 | ✗ | score[cway] += config[cway][i] ? iscore1 : iscore0; | |
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | ✗ | f->tf_select = score[0] < score[1]; | |
| 454 | ✗ | memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS); | |
| 455 | |||
| 456 | ✗ | return 0; | |
| 457 | } | ||
| 458 | |||
| 459 | ✗ | int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index) | |
| 460 | { | ||
| 461 | ✗ | int start_transient_flag = f->transient; | |
| 462 | ✗ | OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)]; | |
| 463 | |||
| 464 | ✗ | if (f->silence) | |
| 465 | ✗ | return 0; | |
| 466 | |||
| 467 | ✗ | celt_gauge_psy_weight(s, start, f); | |
| 468 | ✗ | celt_search_for_intensity(s, f); | |
| 469 | ✗ | celt_search_for_dual_stereo(s, f); | |
| 470 | ✗ | celt_search_for_tf(s, start, f); | |
| 471 | |||
| 472 | ✗ | if (f->transient != start_transient_flag) { | |
| 473 | ✗ | f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; | |
| 474 | ✗ | return 1; | |
| 475 | } | ||
| 476 | |||
| 477 | ✗ | return 0; | |
| 478 | } | ||
| 479 | |||
| 480 | ✗ | void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f) | |
| 481 | { | ||
| 482 | ✗ | int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize); | |
| 483 | ✗ | int steps_out = s->p.frames*(frame_size/120); | |
| 484 | void *tmp[FF_BUFQUEUE_SIZE]; | ||
| 485 | float ideal_fbits; | ||
| 486 | |||
| 487 | ✗ | for (i = 0; i < steps_out; i++) | |
| 488 | ✗ | memset(s->steps[i], 0, sizeof(OpusPsyStep)); | |
| 489 | |||
| 490 | ✗ | for (i = 0; i < s->max_steps; i++) | |
| 491 | ✗ | tmp[i] = s->steps[i]; | |
| 492 | |||
| 493 | ✗ | for (i = 0; i < s->max_steps; i++) { | |
| 494 | ✗ | const int i_new = i - steps_out; | |
| 495 | ✗ | s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i]; | |
| 496 | } | ||
| 497 | |||
| 498 | ✗ | for (i = steps_out; i < s->buffered_steps; i++) | |
| 499 | ✗ | s->steps[i]->index -= steps_out; | |
| 500 | |||
| 501 | ✗ | ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size); | |
| 502 | |||
| 503 | ✗ | for (i = 0; i < s->p.frames; i++) { | |
| 504 | ✗ | s->avg_is_band += f[i].intensity_stereo; | |
| 505 | ✗ | s->lambda *= ideal_fbits / f[i].framebits; | |
| 506 | } | ||
| 507 | |||
| 508 | ✗ | s->avg_is_band /= (s->p.frames + 1); | |
| 509 | |||
| 510 | ✗ | s->steps_to_process = 0; | |
| 511 | ✗ | s->buffered_steps -= steps_out; | |
| 512 | ✗ | s->total_packets_out += s->p.frames; | |
| 513 | ✗ | s->inflection_points_count = 0; | |
| 514 | ✗ | } | |
| 515 | |||
| 516 | ✗ | av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, | |
| 517 | struct FFBufQueue *bufqueue, OpusEncOptions *options) | ||
| 518 | { | ||
| 519 | int i, ch, ret; | ||
| 520 | |||
| 521 | ✗ | s->lambda = 1.0f; | |
| 522 | ✗ | s->options = options; | |
| 523 | ✗ | s->avctx = avctx; | |
| 524 | ✗ | s->bufqueue = bufqueue; | |
| 525 | ✗ | s->max_steps = ceilf(s->options->max_delay_ms/2.5f); | |
| 526 | ✗ | s->bsize_analysis = CELT_BLOCK_960; | |
| 527 | ✗ | s->avg_is_band = CELT_MAX_BANDS - 1; | |
| 528 | ✗ | s->inflection_points_count = 0; | |
| 529 | |||
| 530 | ✗ | s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps); | |
| 531 | ✗ | if (!s->inflection_points) { | |
| 532 | ✗ | ret = AVERROR(ENOMEM); | |
| 533 | ✗ | goto fail; | |
| 534 | } | ||
| 535 | |||
| 536 | ✗ | s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); | |
| 537 | ✗ | if (!s->dsp) { | |
| 538 | ✗ | ret = AVERROR(ENOMEM); | |
| 539 | ✗ | goto fail; | |
| 540 | } | ||
| 541 | |||
| 542 | ✗ | for (ch = 0; ch < s->avctx->ch_layout.nb_channels; ch++) { | |
| 543 | ✗ | for (i = 0; i < CELT_MAX_BANDS; i++) { | |
| 544 | ✗ | bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1); | |
| 545 | ✗ | bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0); | |
| 546 | } | ||
| 547 | } | ||
| 548 | |||
| 549 | ✗ | for (i = 0; i < s->max_steps; i++) { | |
| 550 | ✗ | s->steps[i] = av_mallocz(sizeof(OpusPsyStep)); | |
| 551 | ✗ | if (!s->steps[i]) { | |
| 552 | ✗ | ret = AVERROR(ENOMEM); | |
| 553 | ✗ | goto fail; | |
| 554 | } | ||
| 555 | } | ||
| 556 | |||
| 557 | ✗ | for (i = 0; i < CELT_BLOCK_NB; i++) { | |
| 558 | float tmp; | ||
| 559 | ✗ | const int len = OPUS_BLOCK_SIZE(i); | |
| 560 | ✗ | const float scale = 68 << (CELT_BLOCK_NB - 1 - i); | |
| 561 | ✗ | s->window[i] = av_malloc(2*len*sizeof(float)); | |
| 562 | ✗ | if (!s->window[i]) { | |
| 563 | ✗ | ret = AVERROR(ENOMEM); | |
| 564 | ✗ | goto fail; | |
| 565 | } | ||
| 566 | ✗ | generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp); | |
| 567 | ✗ | ret = av_tx_init(&s->mdct[i], &s->mdct_fn[i], AV_TX_FLOAT_MDCT, | |
| 568 | ✗ | 0, 15 << (i + 3), &scale, 0); | |
| 569 | ✗ | if (ret < 0) | |
| 570 | ✗ | goto fail; | |
| 571 | } | ||
| 572 | |||
| 573 | ✗ | return 0; | |
| 574 | |||
| 575 | ✗ | fail: | |
| 576 | ✗ | av_freep(&s->inflection_points); | |
| 577 | ✗ | av_freep(&s->dsp); | |
| 578 | |||
| 579 | ✗ | for (i = 0; i < CELT_BLOCK_NB; i++) { | |
| 580 | ✗ | av_tx_uninit(&s->mdct[i]); | |
| 581 | ✗ | av_freep(&s->window[i]); | |
| 582 | } | ||
| 583 | |||
| 584 | ✗ | for (i = 0; i < s->max_steps; i++) | |
| 585 | ✗ | av_freep(&s->steps[i]); | |
| 586 | |||
| 587 | ✗ | return ret; | |
| 588 | } | ||
| 589 | |||
| 590 | ✗ | void ff_opus_psy_signal_eof(OpusPsyContext *s) | |
| 591 | { | ||
| 592 | ✗ | s->eof = 1; | |
| 593 | ✗ | } | |
| 594 | |||
| 595 | ✗ | av_cold int ff_opus_psy_end(OpusPsyContext *s) | |
| 596 | { | ||
| 597 | int i; | ||
| 598 | |||
| 599 | ✗ | av_freep(&s->inflection_points); | |
| 600 | ✗ | av_freep(&s->dsp); | |
| 601 | |||
| 602 | ✗ | for (i = 0; i < CELT_BLOCK_NB; i++) { | |
| 603 | ✗ | av_tx_uninit(&s->mdct[i]); | |
| 604 | ✗ | av_freep(&s->window[i]); | |
| 605 | } | ||
| 606 | |||
| 607 | ✗ | for (i = 0; i < s->max_steps; i++) | |
| 608 | ✗ | av_freep(&s->steps[i]); | |
| 609 | |||
| 610 | ✗ | av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band); | |
| 611 | ✗ | av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f); | |
| 612 | |||
| 613 | ✗ | return 0; | |
| 614 | } | ||
| 615 |