1 |
|
|
/* |
2 |
|
|
* Opus encoder |
3 |
|
|
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> |
4 |
|
|
* |
5 |
|
|
* This file is part of FFmpeg. |
6 |
|
|
* |
7 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
8 |
|
|
* modify it under the terms of the GNU Lesser General Public |
9 |
|
|
* License as published by the Free Software Foundation; either |
10 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 |
|
|
* Lesser General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU Lesser General Public |
18 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
#include "opusenc_psy.h" |
23 |
|
|
#include "opus_pvq.h" |
24 |
|
|
#include "opustab.h" |
25 |
|
|
#include "mdct15.h" |
26 |
|
|
#include "libavutil/qsort.h" |
27 |
|
|
|
28 |
|
|
static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, |
29 |
|
|
float *bits, float lambda) |
30 |
|
|
{ |
31 |
|
|
int i, b = 0; |
32 |
|
|
uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 }; |
33 |
|
|
const int band_size = ff_celt_freq_range[band] << f->size; |
34 |
|
|
float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176]; |
35 |
|
|
float dist, cost, err_x = 0.0f, err_y = 0.0f; |
36 |
|
|
float *X = buf; |
37 |
|
|
float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size); |
38 |
|
|
float *Y = (f->channels == 2) ? &buf[176] : NULL; |
39 |
|
|
float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size); |
40 |
|
|
OPUS_RC_CHECKPOINT_SPAWN(rc); |
41 |
|
|
|
42 |
|
|
memcpy(X, X_orig, band_size*sizeof(float)); |
43 |
|
|
if (Y) |
44 |
|
|
memcpy(Y, Y_orig, band_size*sizeof(float)); |
45 |
|
|
|
46 |
|
|
f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1; |
47 |
|
|
if (band <= f->coded_bands - 1) { |
48 |
|
|
int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band); |
49 |
|
|
b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14); |
50 |
|
|
} |
51 |
|
|
|
52 |
|
|
if (f->dual_stereo) { |
53 |
|
|
pvq->quant_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL, |
54 |
|
|
f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]); |
55 |
|
|
|
56 |
|
|
pvq->quant_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL, |
57 |
|
|
f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]); |
58 |
|
|
} else { |
59 |
|
|
pvq->quant_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size, |
60 |
|
|
norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); |
61 |
|
|
} |
62 |
|
|
|
63 |
|
|
for (i = 0; i < band_size; i++) { |
64 |
|
|
err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]); |
65 |
|
|
if (Y) |
66 |
|
|
err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]); |
67 |
|
|
} |
68 |
|
|
|
69 |
|
|
dist = sqrtf(err_x) + sqrtf(err_y); |
70 |
|
|
cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f; |
71 |
|
|
*bits += cost; |
72 |
|
|
|
73 |
|
|
OPUS_RC_CHECKPOINT_ROLLBACK(rc); |
74 |
|
|
|
75 |
|
|
return lambda*dist*cost; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
/* Populate metrics without taking into consideration neighbouring steps */ |
79 |
|
|
static void step_collect_psy_metrics(OpusPsyContext *s, int index) |
80 |
|
|
{ |
81 |
|
|
int silence = 0, ch, i, j; |
82 |
|
|
OpusPsyStep *st = s->steps[index]; |
83 |
|
|
|
84 |
|
|
st->index = index; |
85 |
|
|
|
86 |
|
|
for (ch = 0; ch < s->avctx->channels; ch++) { |
87 |
|
|
const int lap_size = (1 << s->bsize_analysis); |
88 |
|
|
for (i = 1; i <= FFMIN(lap_size, index); i++) { |
89 |
|
|
const int offset = i*120; |
90 |
|
|
AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i); |
91 |
|
|
memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); |
92 |
|
|
} |
93 |
|
|
for (i = 0; i < lap_size; i++) { |
94 |
|
|
const int offset = i*120 + lap_size; |
95 |
|
|
AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i); |
96 |
|
|
memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float)); |
97 |
|
|
} |
98 |
|
|
|
99 |
|
|
s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis], |
100 |
|
|
(OPUS_BLOCK_SIZE(s->bsize_analysis) << 1)); |
101 |
|
|
|
102 |
|
|
s->mdct[s->bsize_analysis]->mdct(s->mdct[s->bsize_analysis], st->coeffs[ch], s->scratch, 1); |
103 |
|
|
|
104 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) |
105 |
|
|
st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis]; |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
for (ch = 0; ch < s->avctx->channels; ch++) { |
109 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
110 |
|
|
float avg_c_s, energy = 0.0f, dist_dev = 0.0f; |
111 |
|
|
const int range = ff_celt_freq_range[i] << s->bsize_analysis; |
112 |
|
|
const float *coeffs = st->bands[ch][i]; |
113 |
|
|
for (j = 0; j < range; j++) |
114 |
|
|
energy += coeffs[j]*coeffs[j]; |
115 |
|
|
|
116 |
|
|
st->energy[ch][i] += sqrtf(energy); |
117 |
|
|
silence |= !!st->energy[ch][i]; |
118 |
|
|
avg_c_s = energy / range; |
119 |
|
|
|
120 |
|
|
for (j = 0; j < range; j++) { |
121 |
|
|
const float c_s = coeffs[j]*coeffs[j]; |
122 |
|
|
dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s); |
123 |
|
|
} |
124 |
|
|
|
125 |
|
|
st->tone[ch][i] += sqrtf(dist_dev); |
126 |
|
|
} |
127 |
|
|
} |
128 |
|
|
|
129 |
|
|
st->silence = !silence; |
130 |
|
|
|
131 |
|
|
if (s->avctx->channels > 1) { |
132 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
133 |
|
|
float incompat = 0.0f; |
134 |
|
|
const float *coeffs1 = st->bands[0][i]; |
135 |
|
|
const float *coeffs2 = st->bands[1][i]; |
136 |
|
|
const int range = ff_celt_freq_range[i] << s->bsize_analysis; |
137 |
|
|
for (j = 0; j < range; j++) |
138 |
|
|
incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]); |
139 |
|
|
st->stereo[i] = sqrtf(incompat); |
140 |
|
|
} |
141 |
|
|
} |
142 |
|
|
|
143 |
|
|
for (ch = 0; ch < s->avctx->channels; ch++) { |
144 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
145 |
|
|
OpusBandExcitation *ex = &s->ex[ch][i]; |
146 |
|
|
float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]); |
147 |
|
|
bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e); |
148 |
|
|
bp_e *= bp_e; |
149 |
|
|
if (bp_e > ex->excitation) { |
150 |
|
|
st->change_amp[ch][i] = bp_e - ex->excitation; |
151 |
|
|
st->total_change += st->change_amp[ch][i]; |
152 |
|
|
ex->excitation = ex->excitation_init = bp_e; |
153 |
|
|
ex->excitation_dist = 0.0f; |
154 |
|
|
} |
155 |
|
|
if (ex->excitation > 0.0f) { |
156 |
|
|
ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09); |
157 |
|
|
ex->excitation = FFMAX(ex->excitation, 0.0f); |
158 |
|
|
ex->excitation_dist += 1.0f; |
159 |
|
|
} |
160 |
|
|
} |
161 |
|
|
} |
162 |
|
|
} |
163 |
|
|
|
164 |
|
|
static void search_for_change_points(OpusPsyContext *s, float tgt_change, |
165 |
|
|
int offset_s, int offset_e, int resolution, |
166 |
|
|
int level) |
167 |
|
|
{ |
168 |
|
|
int i; |
169 |
|
|
float c_change = 0.0f; |
170 |
|
|
if ((offset_e - offset_s) <= resolution) |
171 |
|
|
return; |
172 |
|
|
for (i = offset_s; i < offset_e; i++) { |
173 |
|
|
c_change += s->steps[i]->total_change; |
174 |
|
|
if (c_change > tgt_change) |
175 |
|
|
break; |
176 |
|
|
} |
177 |
|
|
if (i == offset_e) |
178 |
|
|
return; |
179 |
|
|
search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1); |
180 |
|
|
s->inflection_points[s->inflection_points_count++] = i; |
181 |
|
|
search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1); |
182 |
|
|
} |
183 |
|
|
|
184 |
|
|
static int flush_silent_frames(OpusPsyContext *s) |
185 |
|
|
{ |
186 |
|
|
int fsize, silent_frames; |
187 |
|
|
|
188 |
|
|
for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++) |
189 |
|
|
if (!s->steps[silent_frames]->silence) |
190 |
|
|
break; |
191 |
|
|
if (--silent_frames < 0) |
192 |
|
|
return 0; |
193 |
|
|
|
194 |
|
|
for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) { |
195 |
|
|
if ((1 << fsize) > silent_frames) |
196 |
|
|
continue; |
197 |
|
|
s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize); |
198 |
|
|
s->p.framesize = fsize; |
199 |
|
|
return 1; |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
return 0; |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
/* Main function which decides frame size and frames per current packet */ |
206 |
|
|
static void psy_output_groups(OpusPsyContext *s) |
207 |
|
|
{ |
208 |
|
|
int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000; |
209 |
|
|
int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960); |
210 |
|
|
|
211 |
|
|
/* These don't change for now */ |
212 |
|
|
s->p.mode = OPUS_MODE_CELT; |
213 |
|
|
s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND; |
214 |
|
|
|
215 |
|
|
/* Flush silent frames ASAP */ |
216 |
|
|
if (s->steps[0]->silence && flush_silent_frames(s)) |
217 |
|
|
return; |
218 |
|
|
|
219 |
|
|
s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960); |
220 |
|
|
s->p.frames = 1; |
221 |
|
|
} |
222 |
|
|
|
223 |
|
|
int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p) |
224 |
|
|
{ |
225 |
|
|
int i; |
226 |
|
|
float total_energy_change = 0.0f; |
227 |
|
|
|
228 |
|
|
if (s->buffered_steps < s->max_steps && !s->eof) { |
229 |
|
|
const int awin = (1 << s->bsize_analysis); |
230 |
|
|
if (++s->steps_to_process >= awin) { |
231 |
|
|
step_collect_psy_metrics(s, s->buffered_steps - awin + 1); |
232 |
|
|
s->steps_to_process = 0; |
233 |
|
|
} |
234 |
|
|
if ((++s->buffered_steps) < s->max_steps) |
235 |
|
|
return 1; |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
for (i = 0; i < s->buffered_steps; i++) |
239 |
|
|
total_energy_change += s->steps[i]->total_change; |
240 |
|
|
|
241 |
|
|
search_for_change_points(s, total_energy_change / 2.0f, 0, |
242 |
|
|
s->buffered_steps, 1, 0); |
243 |
|
|
|
244 |
|
|
psy_output_groups(s); |
245 |
|
|
|
246 |
|
|
p->frames = s->p.frames; |
247 |
|
|
p->framesize = s->p.framesize; |
248 |
|
|
p->mode = s->p.mode; |
249 |
|
|
p->bandwidth = s->p.bandwidth; |
250 |
|
|
|
251 |
|
|
return 0; |
252 |
|
|
} |
253 |
|
|
|
254 |
|
|
void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index) |
255 |
|
|
{ |
256 |
|
|
int i, neighbouring_points = 0, start_offset = 0; |
257 |
|
|
int radius = (1 << s->p.framesize), step_offset = radius*index; |
258 |
|
|
int silence = 1; |
259 |
|
|
|
260 |
|
|
f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0; |
261 |
|
|
f->end_band = ff_celt_band_end[s->p.bandwidth]; |
262 |
|
|
f->channels = s->avctx->channels; |
263 |
|
|
f->size = s->p.framesize; |
264 |
|
|
|
265 |
|
|
for (i = 0; i < (1 << f->size); i++) |
266 |
|
|
silence &= s->steps[index*(1 << f->size) + i]->silence; |
267 |
|
|
|
268 |
|
|
f->silence = silence; |
269 |
|
|
if (f->silence) { |
270 |
|
|
f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */ |
271 |
|
|
return; |
272 |
|
|
} |
273 |
|
|
|
274 |
|
|
for (i = 0; i < s->inflection_points_count; i++) { |
275 |
|
|
if (s->inflection_points[i] >= step_offset) { |
276 |
|
|
start_offset = i; |
277 |
|
|
break; |
278 |
|
|
} |
279 |
|
|
} |
280 |
|
|
|
281 |
|
|
for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) { |
282 |
|
|
if (s->inflection_points[i] < (step_offset + radius)) { |
283 |
|
|
neighbouring_points++; |
284 |
|
|
} |
285 |
|
|
} |
286 |
|
|
|
287 |
|
|
/* Transient flagging */ |
288 |
|
|
f->transient = neighbouring_points > 0; |
289 |
|
|
f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; |
290 |
|
|
|
291 |
|
|
/* Some sane defaults */ |
292 |
|
|
f->pfilter = 0; |
293 |
|
|
f->pf_gain = 0.5f; |
294 |
|
|
f->pf_octave = 2; |
295 |
|
|
f->pf_period = 1; |
296 |
|
|
f->pf_tapset = 2; |
297 |
|
|
|
298 |
|
|
/* More sane defaults */ |
299 |
|
|
f->tf_select = 0; |
300 |
|
|
f->anticollapse = 1; |
301 |
|
|
f->alloc_trim = 5; |
302 |
|
|
f->skip_band_floor = f->end_band; |
303 |
|
|
f->intensity_stereo = f->end_band; |
304 |
|
|
f->dual_stereo = 0; |
305 |
|
|
f->spread = CELT_SPREAD_NORMAL; |
306 |
|
|
memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS); |
307 |
|
|
memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS); |
308 |
|
|
} |
309 |
|
|
|
310 |
|
|
static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, |
311 |
|
|
CeltFrame *f_out) |
312 |
|
|
{ |
313 |
|
|
int i, f, ch; |
314 |
|
|
int frame_size = OPUS_BLOCK_SIZE(s->p.framesize); |
315 |
|
|
float rate, frame_bits = 0; |
316 |
|
|
|
317 |
|
|
/* Used for the global ROTATE flag */ |
318 |
|
|
float tonal = 0.0f; |
319 |
|
|
|
320 |
|
|
/* Pseudo-weights */ |
321 |
|
|
float band_score[CELT_MAX_BANDS] = { 0 }; |
322 |
|
|
float max_score = 1.0f; |
323 |
|
|
|
324 |
|
|
/* Pass one - one loop around each band, computing unquant stuff */ |
325 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
326 |
|
|
float weight = 0.0f; |
327 |
|
|
float tonal_contrib = 0.0f; |
328 |
|
|
for (f = 0; f < (1 << s->p.framesize); f++) { |
329 |
|
|
weight = start[f]->stereo[i]; |
330 |
|
|
for (ch = 0; ch < s->avctx->channels; ch++) { |
331 |
|
|
weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i]; |
332 |
|
|
tonal_contrib += start[f]->tone[ch][i]; |
333 |
|
|
} |
334 |
|
|
} |
335 |
|
|
tonal += tonal_contrib; |
336 |
|
|
band_score[i] = weight; |
337 |
|
|
} |
338 |
|
|
|
339 |
|
|
tonal /= (float)CELT_MAX_BANDS; |
340 |
|
|
|
341 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
342 |
|
|
if (band_score[i] > max_score) |
343 |
|
|
max_score = band_score[i]; |
344 |
|
|
} |
345 |
|
|
|
346 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
347 |
|
|
f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f); |
348 |
|
|
frame_bits += band_score[i]*8.0f; |
349 |
|
|
} |
350 |
|
|
|
351 |
|
|
tonal /= 1333136.0f; |
352 |
|
|
f_out->spread = av_clip_uintp2(lrintf(tonal), 2); |
353 |
|
|
|
354 |
|
|
rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16; |
355 |
|
|
rate *= s->lambda; |
356 |
|
|
rate /= s->avctx->sample_rate/frame_size; |
357 |
|
|
|
358 |
|
|
f_out->framebits = lrintf(rate); |
359 |
|
|
f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_PACKET_SIZE*8); |
360 |
|
|
f_out->framebits = FFALIGN(f_out->framebits, 8); |
361 |
|
|
} |
362 |
|
|
|
363 |
|
|
static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist) |
364 |
|
|
{ |
365 |
|
|
int i, tdist = 0.0f; |
366 |
|
|
OpusRangeCoder dump; |
367 |
|
|
|
368 |
|
|
ff_opus_rc_enc_init(&dump); |
369 |
|
|
ff_celt_bitalloc(f, &dump, 1); |
370 |
|
|
|
371 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
372 |
|
|
float bits = 0.0f; |
373 |
|
|
float dist = pvq_band_cost(f->pvq, f, &dump, i, &bits, s->lambda); |
374 |
|
|
tdist += dist; |
375 |
|
|
} |
376 |
|
|
|
377 |
|
|
*total_dist = tdist; |
378 |
|
|
|
379 |
|
|
return 0; |
380 |
|
|
} |
381 |
|
|
|
382 |
|
|
static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f) |
383 |
|
|
{ |
384 |
|
|
float td1, td2; |
385 |
|
|
f->dual_stereo = 0; |
386 |
|
|
|
387 |
|
|
if (s->avctx->channels < 2) |
388 |
|
|
return; |
389 |
|
|
|
390 |
|
|
bands_dist(s, f, &td1); |
391 |
|
|
f->dual_stereo = 1; |
392 |
|
|
bands_dist(s, f, &td2); |
393 |
|
|
|
394 |
|
|
f->dual_stereo = td2 < td1; |
395 |
|
|
s->dual_stereo_used += td2 < td1; |
396 |
|
|
} |
397 |
|
|
|
398 |
|
|
static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f) |
399 |
|
|
{ |
400 |
|
|
int i, best_band = CELT_MAX_BANDS - 1; |
401 |
|
|
float dist, best_dist = FLT_MAX; |
402 |
|
|
/* TODO: fix, make some heuristic up here using the lambda value */ |
403 |
|
|
float end_band = 0; |
404 |
|
|
|
405 |
|
|
if (s->avctx->channels < 2) |
406 |
|
|
return; |
407 |
|
|
|
408 |
|
|
for (i = f->end_band; i >= end_band; i--) { |
409 |
|
|
f->intensity_stereo = i; |
410 |
|
|
bands_dist(s, f, &dist); |
411 |
|
|
if (best_dist > dist) { |
412 |
|
|
best_dist = dist; |
413 |
|
|
best_band = i; |
414 |
|
|
} |
415 |
|
|
} |
416 |
|
|
|
417 |
|
|
f->intensity_stereo = best_band; |
418 |
|
|
s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f; |
419 |
|
|
} |
420 |
|
|
|
421 |
|
|
static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f) |
422 |
|
|
{ |
423 |
|
|
int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } }; |
424 |
|
|
float score[2] = { 0 }; |
425 |
|
|
|
426 |
|
|
for (cway = 0; cway < 2; cway++) { |
427 |
|
|
int mag[2]; |
428 |
|
|
int base = f->transient ? 120 : 960; |
429 |
|
|
|
430 |
|
|
for (i = 0; i < 2; i++) { |
431 |
|
|
int c = ff_celt_tf_select[f->size][f->transient][cway][i]; |
432 |
|
|
mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c); |
433 |
|
|
} |
434 |
|
|
|
435 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
436 |
|
|
float iscore0 = 0.0f; |
437 |
|
|
float iscore1 = 0.0f; |
438 |
|
|
for (j = 0; j < (1 << f->size); j++) { |
439 |
|
|
for (k = 0; k < s->avctx->channels; k++) { |
440 |
|
|
iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0]; |
441 |
|
|
iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1]; |
442 |
|
|
} |
443 |
|
|
} |
444 |
|
|
config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f); |
445 |
|
|
score[cway] += config[cway][i] ? iscore1 : iscore0; |
446 |
|
|
} |
447 |
|
|
} |
448 |
|
|
|
449 |
|
|
f->tf_select = score[0] < score[1]; |
450 |
|
|
memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS); |
451 |
|
|
|
452 |
|
|
return 0; |
453 |
|
|
} |
454 |
|
|
|
455 |
|
|
int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index) |
456 |
|
|
{ |
457 |
|
|
int start_transient_flag = f->transient; |
458 |
|
|
OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)]; |
459 |
|
|
|
460 |
|
|
if (f->silence) |
461 |
|
|
return 0; |
462 |
|
|
|
463 |
|
|
celt_gauge_psy_weight(s, start, f); |
464 |
|
|
celt_search_for_intensity(s, f); |
465 |
|
|
celt_search_for_dual_stereo(s, f); |
466 |
|
|
celt_search_for_tf(s, start, f); |
467 |
|
|
|
468 |
|
|
if (f->transient != start_transient_flag) { |
469 |
|
|
f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1; |
470 |
|
|
s->redo_analysis = 1; |
471 |
|
|
return 1; |
472 |
|
|
} |
473 |
|
|
|
474 |
|
|
s->redo_analysis = 0; |
475 |
|
|
|
476 |
|
|
return 0; |
477 |
|
|
} |
478 |
|
|
|
479 |
|
|
void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc) |
480 |
|
|
{ |
481 |
|
|
int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize); |
482 |
|
|
int steps_out = s->p.frames*(frame_size/120); |
483 |
|
|
void *tmp[FF_BUFQUEUE_SIZE]; |
484 |
|
|
float ideal_fbits; |
485 |
|
|
|
486 |
|
|
for (i = 0; i < steps_out; i++) |
487 |
|
|
memset(s->steps[i], 0, sizeof(OpusPsyStep)); |
488 |
|
|
|
489 |
|
|
for (i = 0; i < s->max_steps; i++) |
490 |
|
|
tmp[i] = s->steps[i]; |
491 |
|
|
|
492 |
|
|
for (i = 0; i < s->max_steps; i++) { |
493 |
|
|
const int i_new = i - steps_out; |
494 |
|
|
s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i]; |
495 |
|
|
} |
496 |
|
|
|
497 |
|
|
for (i = steps_out; i < s->buffered_steps; i++) |
498 |
|
|
s->steps[i]->index -= steps_out; |
499 |
|
|
|
500 |
|
|
ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size); |
501 |
|
|
|
502 |
|
|
for (i = 0; i < s->p.frames; i++) { |
503 |
|
|
s->avg_is_band += f[i].intensity_stereo; |
504 |
|
|
s->lambda *= ideal_fbits / f[i].framebits; |
505 |
|
|
} |
506 |
|
|
|
507 |
|
|
s->avg_is_band /= (s->p.frames + 1); |
508 |
|
|
|
509 |
|
|
s->cs_num = 0; |
510 |
|
|
s->steps_to_process = 0; |
511 |
|
|
s->buffered_steps -= steps_out; |
512 |
|
|
s->total_packets_out += s->p.frames; |
513 |
|
|
s->inflection_points_count = 0; |
514 |
|
|
} |
515 |
|
|
|
516 |
|
|
av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, |
517 |
|
|
struct FFBufQueue *bufqueue, OpusEncOptions *options) |
518 |
|
|
{ |
519 |
|
|
int i, ch, ret; |
520 |
|
|
|
521 |
|
|
s->redo_analysis = 0; |
522 |
|
|
s->lambda = 1.0f; |
523 |
|
|
s->options = options; |
524 |
|
|
s->avctx = avctx; |
525 |
|
|
s->bufqueue = bufqueue; |
526 |
|
|
s->max_steps = ceilf(s->options->max_delay_ms/2.5f); |
527 |
|
|
s->bsize_analysis = CELT_BLOCK_960; |
528 |
|
|
s->avg_is_band = CELT_MAX_BANDS - 1; |
529 |
|
|
s->inflection_points_count = 0; |
530 |
|
|
|
531 |
|
|
s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps); |
532 |
|
|
if (!s->inflection_points) { |
533 |
|
|
ret = AVERROR(ENOMEM); |
534 |
|
|
goto fail; |
535 |
|
|
} |
536 |
|
|
|
537 |
|
|
s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); |
538 |
|
|
if (!s->dsp) { |
539 |
|
|
ret = AVERROR(ENOMEM); |
540 |
|
|
goto fail; |
541 |
|
|
} |
542 |
|
|
|
543 |
|
|
for (ch = 0; ch < s->avctx->channels; ch++) { |
544 |
|
|
for (i = 0; i < CELT_MAX_BANDS; i++) { |
545 |
|
|
bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1); |
546 |
|
|
bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0); |
547 |
|
|
} |
548 |
|
|
} |
549 |
|
|
|
550 |
|
|
for (i = 0; i < s->max_steps; i++) { |
551 |
|
|
s->steps[i] = av_mallocz(sizeof(OpusPsyStep)); |
552 |
|
|
if (!s->steps[i]) { |
553 |
|
|
ret = AVERROR(ENOMEM); |
554 |
|
|
goto fail; |
555 |
|
|
} |
556 |
|
|
} |
557 |
|
|
|
558 |
|
|
for (i = 0; i < CELT_BLOCK_NB; i++) { |
559 |
|
|
float tmp; |
560 |
|
|
const int len = OPUS_BLOCK_SIZE(i); |
561 |
|
|
s->window[i] = av_malloc(2*len*sizeof(float)); |
562 |
|
|
if (!s->window[i]) { |
563 |
|
|
ret = AVERROR(ENOMEM); |
564 |
|
|
goto fail; |
565 |
|
|
} |
566 |
|
|
generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp); |
567 |
|
|
if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i)))) |
568 |
|
|
goto fail; |
569 |
|
|
} |
570 |
|
|
|
571 |
|
|
return 0; |
572 |
|
|
|
573 |
|
|
fail: |
574 |
|
|
av_freep(&s->inflection_points); |
575 |
|
|
av_freep(&s->dsp); |
576 |
|
|
|
577 |
|
|
for (i = 0; i < CELT_BLOCK_NB; i++) { |
578 |
|
|
ff_mdct15_uninit(&s->mdct[i]); |
579 |
|
|
av_freep(&s->window[i]); |
580 |
|
|
} |
581 |
|
|
|
582 |
|
|
for (i = 0; i < s->max_steps; i++) |
583 |
|
|
av_freep(&s->steps[i]); |
584 |
|
|
|
585 |
|
|
return ret; |
586 |
|
|
} |
587 |
|
|
|
588 |
|
|
void ff_opus_psy_signal_eof(OpusPsyContext *s) |
589 |
|
|
{ |
590 |
|
|
s->eof = 1; |
591 |
|
|
} |
592 |
|
|
|
593 |
|
|
av_cold int ff_opus_psy_end(OpusPsyContext *s) |
594 |
|
|
{ |
595 |
|
|
int i; |
596 |
|
|
|
597 |
|
|
av_freep(&s->inflection_points); |
598 |
|
|
av_freep(&s->dsp); |
599 |
|
|
|
600 |
|
|
for (i = 0; i < CELT_BLOCK_NB; i++) { |
601 |
|
|
ff_mdct15_uninit(&s->mdct[i]); |
602 |
|
|
av_freep(&s->window[i]); |
603 |
|
|
} |
604 |
|
|
|
605 |
|
|
for (i = 0; i < s->max_steps; i++) |
606 |
|
|
av_freep(&s->steps[i]); |
607 |
|
|
|
608 |
|
|
av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band); |
609 |
|
|
av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f); |
610 |
|
|
|
611 |
|
|
return 0; |
612 |
|
|
} |