1 |
|
|
/* |
2 |
|
|
* AAC encoder main-type prediction |
3 |
|
|
* Copyright (C) 2015 Rostislav Pehlivanov |
4 |
|
|
* |
5 |
|
|
* This file is part of FFmpeg. |
6 |
|
|
* |
7 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
8 |
|
|
* modify it under the terms of the GNU Lesser General Public |
9 |
|
|
* License as published by the Free Software Foundation; either |
10 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
11 |
|
|
* |
12 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
13 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 |
|
|
* Lesser General Public License for more details. |
16 |
|
|
* |
17 |
|
|
* You should have received a copy of the GNU Lesser General Public |
18 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
19 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 |
|
|
*/ |
21 |
|
|
|
22 |
|
|
/** |
23 |
|
|
* @file |
24 |
|
|
* AAC encoder main-type prediction |
25 |
|
|
* @author Rostislav Pehlivanov ( atomnuker gmail com ) |
26 |
|
|
*/ |
27 |
|
|
|
28 |
|
|
#include "aactab.h" |
29 |
|
|
#include "aacenc_pred.h" |
30 |
|
|
#include "aacenc_utils.h" |
31 |
|
|
#include "aacenc_is.h" /* <- Needed for common window distortions */ |
32 |
|
|
#include "aacenc_quantization.h" |
33 |
|
|
|
34 |
|
|
#define RESTORE_PRED(sce, sfb) \ |
35 |
|
|
if (sce->ics.prediction_used[sfb]) {\ |
36 |
|
|
sce->ics.prediction_used[sfb] = 0;\ |
37 |
|
|
sce->band_type[sfb] = sce->band_alt[sfb];\ |
38 |
|
|
} |
39 |
|
|
|
40 |
|
531072 |
static inline float flt16_round(float pf) |
41 |
|
|
{ |
42 |
|
|
union av_intfloat32 tmp; |
43 |
|
531072 |
tmp.f = pf; |
44 |
|
531072 |
tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; |
45 |
|
531072 |
return tmp.f; |
46 |
|
|
} |
47 |
|
|
|
48 |
|
1062136 |
static inline float flt16_even(float pf) |
49 |
|
|
{ |
50 |
|
|
union av_intfloat32 tmp; |
51 |
|
1062136 |
tmp.f = pf; |
52 |
|
1062136 |
tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; |
53 |
|
1062136 |
return tmp.f; |
54 |
|
|
} |
55 |
|
|
|
56 |
|
3186432 |
static inline float flt16_trunc(float pf) |
57 |
|
|
{ |
58 |
|
|
union av_intfloat32 pun; |
59 |
|
3186432 |
pun.f = pf; |
60 |
|
3186432 |
pun.i &= 0xFFFF0000U; |
61 |
|
3186432 |
return pun.f; |
62 |
|
|
} |
63 |
|
|
|
64 |
|
531072 |
static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set) |
65 |
|
|
{ |
66 |
|
|
float k2; |
67 |
|
531072 |
const float a = 0.953125; // 61.0 / 64 |
68 |
|
531072 |
const float alpha = 0.90625; // 29.0 / 32 |
69 |
|
531072 |
const float k1 = ps->k1; |
70 |
|
531072 |
const float r0 = ps->r0, r1 = ps->r1; |
71 |
|
531072 |
const float cor0 = ps->cor0, cor1 = ps->cor1; |
72 |
|
531072 |
const float var0 = ps->var0, var1 = ps->var1; |
73 |
|
531072 |
const float e0 = *coef - ps->x_est; |
74 |
|
531072 |
const float e1 = e0 - k1 * r0; |
75 |
|
|
|
76 |
✓✓ |
531072 |
if (set) |
77 |
|
90572 |
*coef = e0; |
78 |
|
|
|
79 |
|
531072 |
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1); |
80 |
|
531072 |
ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1)); |
81 |
|
531072 |
ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0); |
82 |
|
531072 |
ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0)); |
83 |
|
531072 |
ps->r1 = flt16_trunc(a * (r0 - k1 * e0)); |
84 |
|
531072 |
ps->r0 = flt16_trunc(a * e0); |
85 |
|
|
|
86 |
|
|
/* Prediction for next frame */ |
87 |
✓✓ |
531072 |
ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0; |
88 |
✓✓ |
531072 |
k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0; |
89 |
|
531072 |
*rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1); |
90 |
|
531072 |
} |
91 |
|
|
|
92 |
|
37349 |
static inline void reset_predict_state(PredictorState *ps) |
93 |
|
|
{ |
94 |
|
37349 |
ps->r0 = 0.0f; |
95 |
|
37349 |
ps->r1 = 0.0f; |
96 |
|
37349 |
ps->k1 = 0.0f; |
97 |
|
37349 |
ps->cor0 = 0.0f; |
98 |
|
37349 |
ps->cor1 = 0.0f; |
99 |
|
37349 |
ps->var0 = 1.0f; |
100 |
|
37349 |
ps->var1 = 1.0f; |
101 |
|
37349 |
ps->x_est = 0.0f; |
102 |
|
37349 |
} |
103 |
|
|
|
104 |
|
31 |
static inline void reset_all_predictors(PredictorState *ps) |
105 |
|
|
{ |
106 |
|
|
int i; |
107 |
✓✓ |
20863 |
for (i = 0; i < MAX_PREDICTORS; i++) |
108 |
|
20832 |
reset_predict_state(&ps[i]); |
109 |
|
31 |
} |
110 |
|
|
|
111 |
|
735 |
static inline void reset_predictor_group(SingleChannelElement *sce, int group_num) |
112 |
|
|
{ |
113 |
|
|
int i; |
114 |
|
735 |
PredictorState *ps = sce->predictor_state; |
115 |
✓✓ |
17252 |
for (i = group_num - 1; i < MAX_PREDICTORS; i += 30) |
116 |
|
16517 |
reset_predict_state(&ps[i]); |
117 |
|
735 |
} |
118 |
|
|
|
119 |
|
832 |
void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce) |
120 |
|
|
{ |
121 |
|
|
int sfb, k; |
122 |
|
832 |
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); |
123 |
|
|
|
124 |
✓✓ |
832 |
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { |
125 |
✓✓ |
32531 |
for (sfb = 0; sfb < pmax; sfb++) { |
126 |
✓✓ |
562800 |
for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) { |
127 |
|
531072 |
predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k], |
128 |
✓✓✓✓
|
531072 |
sce->ics.predictor_present && sce->ics.prediction_used[sfb]); |
129 |
|
|
} |
130 |
|
|
} |
131 |
✓✓ |
803 |
if (sce->ics.predictor_reset_group) { |
132 |
|
735 |
reset_predictor_group(sce, sce->ics.predictor_reset_group); |
133 |
|
|
} |
134 |
|
|
} else { |
135 |
|
29 |
reset_all_predictors(sce->predictor_state); |
136 |
|
|
} |
137 |
|
832 |
} |
138 |
|
|
|
139 |
|
|
/* If inc = 0 you can check if this returns 0 to see if you can reset freely */ |
140 |
|
803 |
static inline int update_counters(IndividualChannelStream *ics, int inc) |
141 |
|
|
{ |
142 |
|
|
int i; |
143 |
✓✓ |
14273 |
for (i = 1; i < 31; i++) { |
144 |
|
13853 |
ics->predictor_reset_count[i] += inc; |
145 |
✓✓ |
13853 |
if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN) |
146 |
|
383 |
return i; /* Reset this immediately */ |
147 |
|
|
} |
148 |
|
420 |
return 0; |
149 |
|
|
} |
150 |
|
|
|
151 |
|
416 |
void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe) |
152 |
|
|
{ |
153 |
|
416 |
int start, w, w2, g, i, count = 0; |
154 |
|
416 |
SingleChannelElement *sce0 = &cpe->ch[0]; |
155 |
|
416 |
SingleChannelElement *sce1 = &cpe->ch[1]; |
156 |
|
416 |
const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); |
157 |
|
416 |
const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); |
158 |
|
416 |
const int pmax = FFMIN(pmax0, pmax1); |
159 |
|
|
|
160 |
✓✓ |
416 |
if (!cpe->common_window || |
161 |
✓✓ |
390 |
sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE || |
162 |
✗✓ |
381 |
sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) |
163 |
|
35 |
return; |
164 |
|
|
|
165 |
✓✓ |
762 |
for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { |
166 |
|
381 |
start = 0; |
167 |
✓✓ |
19050 |
for (g = 0; g < sce0->ics.num_swb; g++) { |
168 |
|
18669 |
int sfb = w*16+g; |
169 |
|
18669 |
int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb]; |
170 |
|
18669 |
float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; |
171 |
|
|
struct AACISError ph_err1, ph_err2, *erf; |
172 |
✓✓✓✓ ✓✓ |
18669 |
if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) { |
173 |
✓✓ |
16676 |
RESTORE_PRED(sce0, sfb); |
174 |
✓✓ |
16676 |
RESTORE_PRED(sce1, sfb); |
175 |
|
16676 |
start += sce0->ics.swb_sizes[g]; |
176 |
|
16676 |
continue; |
177 |
|
|
} |
178 |
✓✓ |
3986 |
for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { |
179 |
✓✓ |
43957 |
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { |
180 |
|
41964 |
float coef0 = sce0->pcoeffs[start+(w+w2)*128+i]; |
181 |
|
41964 |
float coef1 = sce1->pcoeffs[start+(w+w2)*128+i]; |
182 |
|
41964 |
ener0 += coef0*coef0; |
183 |
|
41964 |
ener1 += coef1*coef1; |
184 |
|
41964 |
ener01 += (coef0 + coef1)*(coef0 + coef1); |
185 |
|
|
} |
186 |
|
|
} |
187 |
|
1993 |
ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g, |
188 |
|
|
ener0, ener1, ener01, 1, -1); |
189 |
|
1993 |
ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g, |
190 |
|
|
ener0, ener1, ener01, 1, +1); |
191 |
✗✓ |
1993 |
erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2; |
192 |
✓✗ |
1993 |
if (erf->pass) { |
193 |
|
1993 |
sce0->ics.prediction_used[sfb] = 1; |
194 |
|
1993 |
sce1->ics.prediction_used[sfb] = 1; |
195 |
|
1993 |
count++; |
196 |
|
|
} else { |
197 |
|
|
RESTORE_PRED(sce0, sfb); |
198 |
|
|
RESTORE_PRED(sce1, sfb); |
199 |
|
|
} |
200 |
|
1993 |
start += sce0->ics.swb_sizes[g]; |
201 |
|
|
} |
202 |
|
|
} |
203 |
|
|
|
204 |
|
381 |
sce1->ics.predictor_present = sce0->ics.predictor_present = !!count; |
205 |
|
|
} |
206 |
|
|
|
207 |
|
803 |
static void update_pred_resets(SingleChannelElement *sce) |
208 |
|
|
{ |
209 |
|
803 |
int i, max_group_id_c, max_frame = 0; |
210 |
|
803 |
float avg_frame = 0.0f; |
211 |
|
803 |
IndividualChannelStream *ics = &sce->ics; |
212 |
|
|
|
213 |
|
|
/* Update the counters and immediately update any frame behind schedule */ |
214 |
✓✓ |
803 |
if ((ics->predictor_reset_group = update_counters(&sce->ics, 1))) |
215 |
|
383 |
return; |
216 |
|
|
|
217 |
✓✓ |
13020 |
for (i = 1; i < 31; i++) { |
218 |
|
|
/* Count-based */ |
219 |
✓✗ |
12600 |
if (ics->predictor_reset_count[i] > max_frame) { |
220 |
|
12600 |
max_group_id_c = i; |
221 |
|
12600 |
max_frame = ics->predictor_reset_count[i]; |
222 |
|
|
} |
223 |
|
12600 |
avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2; |
224 |
|
|
} |
225 |
|
|
|
226 |
✓✓ |
420 |
if (max_frame > PRED_RESET_MIN) { |
227 |
|
352 |
ics->predictor_reset_group = max_group_id_c; |
228 |
|
|
} else { |
229 |
|
68 |
ics->predictor_reset_group = 0; |
230 |
|
|
} |
231 |
|
|
} |
232 |
|
|
|
233 |
|
832 |
void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) |
234 |
|
|
{ |
235 |
|
832 |
int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0; |
236 |
|
832 |
const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); |
237 |
|
832 |
float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1]; |
238 |
|
832 |
float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3]; |
239 |
|
832 |
float *QERR = &s->scoefs[128*4]; |
240 |
|
|
|
241 |
✓✓ |
832 |
if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
242 |
|
29 |
sce->ics.predictor_present = 0; |
243 |
|
29 |
return; |
244 |
|
|
} |
245 |
|
|
|
246 |
✓✓ |
803 |
if (!sce->ics.predictor_initialized) { |
247 |
|
2 |
reset_all_predictors(sce->predictor_state); |
248 |
|
2 |
sce->ics.predictor_initialized = 1; |
249 |
|
2 |
memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float)); |
250 |
✓✓ |
62 |
for (i = 1; i < 31; i++) |
251 |
|
60 |
sce->ics.predictor_reset_count[i] = i; |
252 |
|
|
} |
253 |
|
|
|
254 |
|
803 |
update_pred_resets(sce); |
255 |
|
803 |
memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); |
256 |
|
|
|
257 |
✓✓ |
24521 |
for (sfb = PRED_SFB_START; sfb < pmax; sfb++) { |
258 |
|
|
int cost1, cost2, cb_p; |
259 |
|
23718 |
float dist1, dist2, dist_spec_err = 0.0f; |
260 |
✓✗ |
23718 |
const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb]; |
261 |
|
23718 |
const int cb_min = sce->zeroes[sfb] ? 0 : 1; |
262 |
✗✓ |
23718 |
const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT; |
263 |
|
23718 |
const int start_coef = sce->ics.swb_offset[sfb]; |
264 |
|
23718 |
const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef; |
265 |
|
23718 |
const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb]; |
266 |
|
|
|
267 |
✓✗ |
23718 |
if (start_coef + num_coeffs > MAX_PREDICTORS || |
268 |
✓✓✓✗
|
23718 |
(s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) || |
269 |
✗✓ |
23718 |
sce->band_type[sfb] == NOISE_BT) |
270 |
|
|
continue; |
271 |
|
|
|
272 |
|
|
/* Normal coefficients */ |
273 |
|
23718 |
s->abs_pow34(O34, &sce->coeffs[start_coef], num_coeffs); |
274 |
|
23718 |
dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL, |
275 |
|
|
O34, num_coeffs, sce->sf_idx[sfb], |
276 |
|
|
cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0); |
277 |
|
23718 |
cost_coeffs += cost1; |
278 |
|
|
|
279 |
|
|
/* Encoded coefficients - needed for #bits, band type and quant. error */ |
280 |
✓✓ |
522750 |
for (i = 0; i < num_coeffs; i++) |
281 |
|
499032 |
SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i]; |
282 |
|
23718 |
s->abs_pow34(S34, SENT, num_coeffs); |
283 |
✓✗ |
23718 |
if (cb_n < RESERVED_BT) |
284 |
|
23718 |
cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max); |
285 |
|
|
else |
286 |
|
|
cb_p = cb_n; |
287 |
|
23718 |
quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs, |
288 |
|
|
sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY, |
289 |
|
|
&cost2, NULL, 0); |
290 |
|
|
|
291 |
|
|
/* Reconstructed coefficients - needed for distortion measurements */ |
292 |
✓✓ |
522750 |
for (i = 0; i < num_coeffs; i++) |
293 |
✓✓ |
499032 |
sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f; |
294 |
|
23718 |
s->abs_pow34(P34, &sce->prcoeffs[start_coef], num_coeffs); |
295 |
✓✗ |
23718 |
if (cb_n < RESERVED_BT) |
296 |
|
23718 |
cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max); |
297 |
|
|
else |
298 |
|
|
cb_p = cb_n; |
299 |
|
23718 |
dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL, |
300 |
|
|
P34, num_coeffs, sce->sf_idx[sfb], |
301 |
|
|
cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0); |
302 |
✓✓ |
522750 |
for (i = 0; i < num_coeffs; i++) |
303 |
|
499032 |
dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]); |
304 |
|
23718 |
dist_spec_err *= s->lambda / band->threshold; |
305 |
|
23718 |
dist2 += dist_spec_err; |
306 |
|
|
|
307 |
✓✓✓✓
|
23718 |
if (dist2 <= dist1 && cb_p <= cb_n) { |
308 |
|
15236 |
cost_pred += cost2; |
309 |
|
15236 |
sce->ics.prediction_used[sfb] = 1; |
310 |
|
15236 |
sce->band_alt[sfb] = cb_n; |
311 |
|
15236 |
sce->band_type[sfb] = cb_p; |
312 |
|
15236 |
count++; |
313 |
|
|
} else { |
314 |
|
8482 |
cost_pred += cost1; |
315 |
|
8482 |
sce->band_alt[sfb] = cb_p; |
316 |
|
|
} |
317 |
|
|
} |
318 |
|
|
|
319 |
✓✓✓✓
|
803 |
if (count && cost_coeffs < cost_pred) { |
320 |
|
356 |
count = 0; |
321 |
✓✓ |
11010 |
for (sfb = PRED_SFB_START; sfb < pmax; sfb++) |
322 |
✓✓ |
10654 |
RESTORE_PRED(sce, sfb); |
323 |
|
356 |
memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used)); |
324 |
|
|
} |
325 |
|
|
|
326 |
|
803 |
sce->ics.predictor_present = !!count; |
327 |
|
|
} |
328 |
|
|
|
329 |
|
|
/** |
330 |
|
|
* Encoder predictors data. |
331 |
|
|
*/ |
332 |
|
6714 |
void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce) |
333 |
|
|
{ |
334 |
|
|
int sfb; |
335 |
|
6714 |
IndividualChannelStream *ics = &sce->ics; |
336 |
|
6714 |
const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]); |
337 |
|
|
|
338 |
✓✓ |
6714 |
if (s->profile != FF_PROFILE_AAC_MAIN || |
339 |
✓✓ |
442 |
!ics->predictor_present) |
340 |
|
6561 |
return; |
341 |
|
|
|
342 |
|
153 |
put_bits(&s->pb, 1, !!ics->predictor_reset_group); |
343 |
✓✓ |
153 |
if (ics->predictor_reset_group) |
344 |
|
147 |
put_bits(&s->pb, 5, ics->predictor_reset_group); |
345 |
✓✓ |
6273 |
for (sfb = 0; sfb < pmax; sfb++) |
346 |
|
6120 |
put_bits(&s->pb, 1, ics->prediction_used[sfb]); |
347 |
|
|
} |