FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/sonic.c
Date: 2021-09-24 20:55:06
Exec Total Coverage
Lines: 0 389 0.0%
Branches: 0 266 0.0%

Line Branch Exec Source
1 /*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21 #include "avcodec.h"
22 #include "encode.h"
23 #include "get_bits.h"
24 #include "golomb.h"
25 #include "internal.h"
26 #include "rangecoder.h"
27
28
29 /**
30 * @file
31 * Simple free lossless/lossy audio codec
32 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
33 * Written and designed by Alex Beregszaszi
34 *
35 * TODO:
36 * - CABAC put/get_symbol
37 * - independent quantizer for channels
38 * - >2 channels support
39 * - more decorrelation types
40 * - more tap_quant tests
41 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
42 */
43
44 #define MAX_CHANNELS 2
45
46 #define MID_SIDE 0
47 #define LEFT_SIDE 1
48 #define RIGHT_SIDE 2
49
50 typedef struct SonicContext {
51 int version;
52 int minor_version;
53 int lossless, decorrelation;
54
55 int num_taps, downsampling;
56 double quantization;
57
58 int channels, samplerate, block_align, frame_size;
59
60 int *tap_quant;
61 int *int_samples;
62 int *coded_samples[MAX_CHANNELS];
63
64 // for encoding
65 int *tail;
66 int tail_size;
67 int *window;
68 int window_size;
69
70 // for decoding
71 int *predictor_k;
72 int *predictor_state[MAX_CHANNELS];
73 } SonicContext;
74
75 #define LATTICE_SHIFT 10
76 #define SAMPLE_SHIFT 4
77 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
78 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
79
80 #define BASE_QUANT 0.6
81 #define RATE_VARIATION 3.0
82
83 static inline int shift(int a,int b)
84 {
85 return (a+(1<<(b-1))) >> b;
86 }
87
88 static inline int shift_down(int a,int b)
89 {
90 return (a>>b)+(a<0);
91 }
92
93 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
94 int i;
95
96 #define put_rac(C,S,B) \
97 do{\
98 if(rc_stat){\
99 rc_stat[*(S)][B]++;\
100 rc_stat2[(S)-state][B]++;\
101 }\
102 put_rac(C,S,B);\
103 }while(0)
104
105 if(v){
106 const int a= FFABS(v);
107 const int e= av_log2(a);
108 put_rac(c, state+0, 0);
109 if(e<=9){
110 for(i=0; i<e; i++){
111 put_rac(c, state+1+i, 1); //1..10
112 }
113 put_rac(c, state+1+i, 0);
114
115 for(i=e-1; i>=0; i--){
116 put_rac(c, state+22+i, (a>>i)&1); //22..31
117 }
118
119 if(is_signed)
120 put_rac(c, state+11 + e, v < 0); //11..21
121 }else{
122 for(i=0; i<e; i++){
123 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
124 }
125 put_rac(c, state+1+9, 0);
126
127 for(i=e-1; i>=0; i--){
128 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
129 }
130
131 if(is_signed)
132 put_rac(c, state+11 + 10, v < 0); //11..21
133 }
134 }else{
135 put_rac(c, state+0, 1);
136 }
137 #undef put_rac
138 }
139
140 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
141 if(get_rac(c, state+0))
142 return 0;
143 else{
144 int i, e;
145 unsigned a;
146 e= 0;
147 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
148 e++;
149 if (e > 31)
150 return AVERROR_INVALIDDATA;
151 }
152
153 a= 1;
154 for(i=e-1; i>=0; i--){
155 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
156 }
157
158 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
159 return (a^e)-e;
160 }
161 }
162
163 #if 1
164 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
165 {
166 int i;
167
168 for (i = 0; i < entries; i++)
169 put_symbol(c, state, buf[i], 1, NULL, NULL);
170
171 return 1;
172 }
173
174 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
175 {
176 int i;
177
178 for (i = 0; i < entries; i++)
179 buf[i] = get_symbol(c, state, 1);
180
181 return 1;
182 }
183 #elif 1
184 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
185 {
186 int i;
187
188 for (i = 0; i < entries; i++)
189 set_se_golomb(pb, buf[i]);
190
191 return 1;
192 }
193
194 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
195 {
196 int i;
197
198 for (i = 0; i < entries; i++)
199 buf[i] = get_se_golomb(gb);
200
201 return 1;
202 }
203
204 #else
205
206 #define ADAPT_LEVEL 8
207
208 static int bits_to_store(uint64_t x)
209 {
210 int res = 0;
211
212 while(x)
213 {
214 res++;
215 x >>= 1;
216 }
217 return res;
218 }
219
220 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
221 {
222 int i, bits;
223
224 if (!max)
225 return;
226
227 bits = bits_to_store(max);
228
229 for (i = 0; i < bits-1; i++)
230 put_bits(pb, 1, value & (1 << i));
231
232 if ( (value | (1 << (bits-1))) <= max)
233 put_bits(pb, 1, value & (1 << (bits-1)));
234 }
235
236 static unsigned int read_uint_max(GetBitContext *gb, int max)
237 {
238 int i, bits, value = 0;
239
240 if (!max)
241 return 0;
242
243 bits = bits_to_store(max);
244
245 for (i = 0; i < bits-1; i++)
246 if (get_bits1(gb))
247 value += 1 << i;
248
249 if ( (value | (1<<(bits-1))) <= max)
250 if (get_bits1(gb))
251 value += 1 << (bits-1);
252
253 return value;
254 }
255
256 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
257 {
258 int i, j, x = 0, low_bits = 0, max = 0;
259 int step = 256, pos = 0, dominant = 0, any = 0;
260 int *copy, *bits;
261
262 copy = av_calloc(entries, sizeof(*copy));
263 if (!copy)
264 return AVERROR(ENOMEM);
265
266 if (base_2_part)
267 {
268 int energy = 0;
269
270 for (i = 0; i < entries; i++)
271 energy += abs(buf[i]);
272
273 low_bits = bits_to_store(energy / (entries * 2));
274 if (low_bits > 15)
275 low_bits = 15;
276
277 put_bits(pb, 4, low_bits);
278 }
279
280 for (i = 0; i < entries; i++)
281 {
282 put_bits(pb, low_bits, abs(buf[i]));
283 copy[i] = abs(buf[i]) >> low_bits;
284 if (copy[i] > max)
285 max = abs(copy[i]);
286 }
287
288 bits = av_calloc(entries*max, sizeof(*bits));
289 if (!bits)
290 {
291 av_free(copy);
292 return AVERROR(ENOMEM);
293 }
294
295 for (i = 0; i <= max; i++)
296 {
297 for (j = 0; j < entries; j++)
298 if (copy[j] >= i)
299 bits[x++] = copy[j] > i;
300 }
301
302 // store bitstream
303 while (pos < x)
304 {
305 int steplet = step >> 8;
306
307 if (pos + steplet > x)
308 steplet = x - pos;
309
310 for (i = 0; i < steplet; i++)
311 if (bits[i+pos] != dominant)
312 any = 1;
313
314 put_bits(pb, 1, any);
315
316 if (!any)
317 {
318 pos += steplet;
319 step += step / ADAPT_LEVEL;
320 }
321 else
322 {
323 int interloper = 0;
324
325 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
326 interloper++;
327
328 // note change
329 write_uint_max(pb, interloper, (step >> 8) - 1);
330
331 pos += interloper + 1;
332 step -= step / ADAPT_LEVEL;
333 }
334
335 if (step < 256)
336 {
337 step = 65536 / step;
338 dominant = !dominant;
339 }
340 }
341
342 // store signs
343 for (i = 0; i < entries; i++)
344 if (buf[i])
345 put_bits(pb, 1, buf[i] < 0);
346
347 av_free(bits);
348 av_free(copy);
349
350 return 0;
351 }
352
353 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
354 {
355 int i, low_bits = 0, x = 0;
356 int n_zeros = 0, step = 256, dominant = 0;
357 int pos = 0, level = 0;
358 int *bits = av_calloc(entries, sizeof(*bits));
359
360 if (!bits)
361 return AVERROR(ENOMEM);
362
363 if (base_2_part)
364 {
365 low_bits = get_bits(gb, 4);
366
367 if (low_bits)
368 for (i = 0; i < entries; i++)
369 buf[i] = get_bits(gb, low_bits);
370 }
371
372 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
373
374 while (n_zeros < entries)
375 {
376 int steplet = step >> 8;
377
378 if (!get_bits1(gb))
379 {
380 for (i = 0; i < steplet; i++)
381 bits[x++] = dominant;
382
383 if (!dominant)
384 n_zeros += steplet;
385
386 step += step / ADAPT_LEVEL;
387 }
388 else
389 {
390 int actual_run = read_uint_max(gb, steplet-1);
391
392 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
393
394 for (i = 0; i < actual_run; i++)
395 bits[x++] = dominant;
396
397 bits[x++] = !dominant;
398
399 if (!dominant)
400 n_zeros += actual_run;
401 else
402 n_zeros++;
403
404 step -= step / ADAPT_LEVEL;
405 }
406
407 if (step < 256)
408 {
409 step = 65536 / step;
410 dominant = !dominant;
411 }
412 }
413
414 // reconstruct unsigned values
415 n_zeros = 0;
416 for (i = 0; n_zeros < entries; i++)
417 {
418 while(1)
419 {
420 if (pos >= entries)
421 {
422 pos = 0;
423 level += 1 << low_bits;
424 }
425
426 if (buf[pos] >= level)
427 break;
428
429 pos++;
430 }
431
432 if (bits[i])
433 buf[pos] += 1 << low_bits;
434 else
435 n_zeros++;
436
437 pos++;
438 }
439 av_free(bits);
440
441 // read signs
442 for (i = 0; i < entries; i++)
443 if (buf[i] && get_bits1(gb))
444 buf[i] = -buf[i];
445
446 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
447
448 return 0;
449 }
450 #endif
451
452 static void predictor_init_state(int *k, int *state, int order)
453 {
454 int i;
455
456 for (i = order-2; i >= 0; i--)
457 {
458 int j, p, x = state[i];
459
460 for (j = 0, p = i+1; p < order; j++,p++)
461 {
462 int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
463 state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
464 x = tmp;
465 }
466 }
467 }
468
469 static int predictor_calc_error(int *k, int *state, int order, int error)
470 {
471 int i, x = error - shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
472
473 #if 1
474 int *k_ptr = &(k[order-2]),
475 *state_ptr = &(state[order-2]);
476 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
477 {
478 int k_value = *k_ptr, state_value = *state_ptr;
479 x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
480 state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
481 }
482 #else
483 for (i = order-2; i >= 0; i--)
484 {
485 x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
486 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
487 }
488 #endif
489
490 // don't drift too far, to avoid overflows
491 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
492 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
493
494 state[0] = x;
495
496 return x;
497 }
498
499 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
500 // Heavily modified Levinson-Durbin algorithm which
501 // copes better with quantization, and calculates the
502 // actual whitened result as it goes.
503
504 static void modified_levinson_durbin(int *window, int window_entries,
505 int *out, int out_entries, int channels, int *tap_quant)
506 {
507 int i;
508 int *state = window + window_entries;
509
510 memcpy(state, window, window_entries * sizeof(*state));
511
512 for (i = 0; i < out_entries; i++)
513 {
514 int step = (i+1)*channels, k, j;
515 double xx = 0.0, xy = 0.0;
516 #if 1
517 int *x_ptr = &(window[step]);
518 int *state_ptr = &(state[0]);
519 j = window_entries - step;
520 for (;j>0;j--,x_ptr++,state_ptr++)
521 {
522 double x_value = *x_ptr;
523 double state_value = *state_ptr;
524 xx += state_value*state_value;
525 xy += x_value*state_value;
526 }
527 #else
528 for (j = 0; j <= (window_entries - step); j++);
529 {
530 double stepval = window[step+j];
531 double stateval = window[j];
532 // xx += (double)window[j]*(double)window[j];
533 // xy += (double)window[step+j]*(double)window[j];
534 xx += stateval*stateval;
535 xy += stepval*stateval;
536 }
537 #endif
538 if (xx == 0.0)
539 k = 0;
540 else
541 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
542
543 if (k > (LATTICE_FACTOR/tap_quant[i]))
544 k = LATTICE_FACTOR/tap_quant[i];
545 if (-k > (LATTICE_FACTOR/tap_quant[i]))
546 k = -(LATTICE_FACTOR/tap_quant[i]);
547
548 out[i] = k;
549 k *= tap_quant[i];
550
551 #if 1
552 x_ptr = &(window[step]);
553 state_ptr = &(state[0]);
554 j = window_entries - step;
555 for (;j>0;j--,x_ptr++,state_ptr++)
556 {
557 int x_value = *x_ptr;
558 int state_value = *state_ptr;
559 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
560 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
561 }
562 #else
563 for (j=0; j <= (window_entries - step); j++)
564 {
565 int stepval = window[step+j];
566 int stateval=state[j];
567 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
568 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
569 }
570 #endif
571 }
572 }
573
574 static inline int code_samplerate(int samplerate)
575 {
576 switch (samplerate)
577 {
578 case 44100: return 0;
579 case 22050: return 1;
580 case 11025: return 2;
581 case 96000: return 3;
582 case 48000: return 4;
583 case 32000: return 5;
584 case 24000: return 6;
585 case 16000: return 7;
586 case 8000: return 8;
587 }
588 return AVERROR(EINVAL);
589 }
590
591 static av_cold int sonic_encode_init(AVCodecContext *avctx)
592 {
593 SonicContext *s = avctx->priv_data;
594 int *coded_samples;
595 PutBitContext pb;
596 int i;
597
598 s->version = 2;
599
600 if (avctx->channels > MAX_CHANNELS)
601 {
602 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
603 return AVERROR(EINVAL); /* only stereo or mono for now */
604 }
605
606 if (avctx->channels == 2)
607 s->decorrelation = MID_SIDE;
608 else
609 s->decorrelation = 3;
610
611 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
612 {
613 s->lossless = 1;
614 s->num_taps = 32;
615 s->downsampling = 1;
616 s->quantization = 0.0;
617 }
618 else
619 {
620 s->num_taps = 128;
621 s->downsampling = 2;
622 s->quantization = 1.0;
623 }
624
625 // max tap 2048
626 if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
627 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
628 return AVERROR_INVALIDDATA;
629 }
630
631 // generate taps
632 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
633 if (!s->tap_quant)
634 return AVERROR(ENOMEM);
635
636 for (i = 0; i < s->num_taps; i++)
637 s->tap_quant[i] = ff_sqrt(i+1);
638
639 s->channels = avctx->channels;
640 s->samplerate = avctx->sample_rate;
641
642 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
643 s->frame_size = s->channels*s->block_align*s->downsampling;
644
645 s->tail_size = s->num_taps*s->channels;
646 s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
647 if (!s->tail)
648 return AVERROR(ENOMEM);
649
650 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
651 if (!s->predictor_k)
652 return AVERROR(ENOMEM);
653
654 coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
655 if (!coded_samples)
656 return AVERROR(ENOMEM);
657 for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
658 s->coded_samples[i] = coded_samples;
659
660 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
661
662 s->window_size = ((2*s->tail_size)+s->frame_size);
663 s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
664 if (!s->window || !s->int_samples)
665 return AVERROR(ENOMEM);
666
667 avctx->extradata = av_mallocz(16);
668 if (!avctx->extradata)
669 return AVERROR(ENOMEM);
670 init_put_bits(&pb, avctx->extradata, 16*8);
671
672 put_bits(&pb, 2, s->version); // version
673 if (s->version >= 1)
674 {
675 if (s->version >= 2) {
676 put_bits(&pb, 8, s->version);
677 put_bits(&pb, 8, s->minor_version);
678 }
679 put_bits(&pb, 2, s->channels);
680 put_bits(&pb, 4, code_samplerate(s->samplerate));
681 }
682 put_bits(&pb, 1, s->lossless);
683 if (!s->lossless)
684 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
685 put_bits(&pb, 2, s->decorrelation);
686 put_bits(&pb, 2, s->downsampling);
687 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
688 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
689
690 flush_put_bits(&pb);
691 avctx->extradata_size = put_bytes_output(&pb);
692
693 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
694 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
695
696 avctx->frame_size = s->block_align*s->downsampling;
697
698 return 0;
699 }
700
701 static av_cold int sonic_encode_close(AVCodecContext *avctx)
702 {
703 SonicContext *s = avctx->priv_data;
704
705 av_freep(&s->coded_samples[0]);
706 av_freep(&s->predictor_k);
707 av_freep(&s->tail);
708 av_freep(&s->tap_quant);
709 av_freep(&s->window);
710 av_freep(&s->int_samples);
711
712 return 0;
713 }
714
715 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
716 const AVFrame *frame, int *got_packet_ptr)
717 {
718 SonicContext *s = avctx->priv_data;
719 RangeCoder c;
720 int i, j, ch, quant = 0, x = 0;
721 int ret;
722 const short *samples = (const int16_t*)frame->data[0];
723 uint8_t state[32];
724
725 if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
726 return ret;
727
728 ff_init_range_encoder(&c, avpkt->data, avpkt->size);
729 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
730 memset(state, 128, sizeof(state));
731
732 // short -> internal
733 for (i = 0; i < s->frame_size; i++)
734 s->int_samples[i] = samples[i];
735
736 if (!s->lossless)
737 for (i = 0; i < s->frame_size; i++)
738 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
739
740 switch(s->decorrelation)
741 {
742 case MID_SIDE:
743 for (i = 0; i < s->frame_size; i += s->channels)
744 {
745 s->int_samples[i] += s->int_samples[i+1];
746 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
747 }
748 break;
749 case LEFT_SIDE:
750 for (i = 0; i < s->frame_size; i += s->channels)
751 s->int_samples[i+1] -= s->int_samples[i];
752 break;
753 case RIGHT_SIDE:
754 for (i = 0; i < s->frame_size; i += s->channels)
755 s->int_samples[i] -= s->int_samples[i+1];
756 break;
757 }
758
759 memset(s->window, 0, s->window_size * sizeof(*s->window));
760
761 for (i = 0; i < s->tail_size; i++)
762 s->window[x++] = s->tail[i];
763
764 for (i = 0; i < s->frame_size; i++)
765 s->window[x++] = s->int_samples[i];
766
767 for (i = 0; i < s->tail_size; i++)
768 s->window[x++] = 0;
769
770 for (i = 0; i < s->tail_size; i++)
771 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
772
773 // generate taps
774 modified_levinson_durbin(s->window, s->window_size,
775 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
776
777 if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
778 return ret;
779
780 for (ch = 0; ch < s->channels; ch++)
781 {
782 x = s->tail_size+ch;
783 for (i = 0; i < s->block_align; i++)
784 {
785 int sum = 0;
786 for (j = 0; j < s->downsampling; j++, x += s->channels)
787 sum += s->window[x];
788 s->coded_samples[ch][i] = sum;
789 }
790 }
791
792 // simple rate control code
793 if (!s->lossless)
794 {
795 double energy1 = 0.0, energy2 = 0.0;
796 for (ch = 0; ch < s->channels; ch++)
797 {
798 for (i = 0; i < s->block_align; i++)
799 {
800 double sample = s->coded_samples[ch][i];
801 energy2 += sample*sample;
802 energy1 += fabs(sample);
803 }
804 }
805
806 energy2 = sqrt(energy2/(s->channels*s->block_align));
807 energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
808
809 // increase bitrate when samples are like a gaussian distribution
810 // reduce bitrate when samples are like a two-tailed exponential distribution
811
812 if (energy2 > energy1)
813 energy2 += (energy2-energy1)*RATE_VARIATION;
814
815 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
816 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
817
818 quant = av_clip(quant, 1, 65534);
819
820 put_symbol(&c, state, quant, 0, NULL, NULL);
821
822 quant *= SAMPLE_FACTOR;
823 }
824
825 // write out coded samples
826 for (ch = 0; ch < s->channels; ch++)
827 {
828 if (!s->lossless)
829 for (i = 0; i < s->block_align; i++)
830 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
831
832 if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
833 return ret;
834 }
835
836 avpkt->size = ff_rac_terminate(&c, 0);
837 *got_packet_ptr = 1;
838 return 0;
839
840 }
841 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
842
843 #if CONFIG_SONIC_DECODER
844 static const int samplerate_table[] =
845 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
846
847 static av_cold int sonic_decode_init(AVCodecContext *avctx)
848 {
849 SonicContext *s = avctx->priv_data;
850 int *tmp;
851 GetBitContext gb;
852 int i;
853 int ret;
854
855 s->channels = avctx->channels;
856 s->samplerate = avctx->sample_rate;
857
858 if (!avctx->extradata)
859 {
860 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
861 return AVERROR_INVALIDDATA;
862 }
863
864 ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
865 if (ret < 0)
866 return ret;
867
868 s->version = get_bits(&gb, 2);
869 if (s->version >= 2) {
870 s->version = get_bits(&gb, 8);
871 s->minor_version = get_bits(&gb, 8);
872 }
873 if (s->version != 2)
874 {
875 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
876 return AVERROR_INVALIDDATA;
877 }
878
879 if (s->version >= 1)
880 {
881 int sample_rate_index;
882 s->channels = get_bits(&gb, 2);
883 sample_rate_index = get_bits(&gb, 4);
884 if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
885 av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
886 return AVERROR_INVALIDDATA;
887 }
888 s->samplerate = samplerate_table[sample_rate_index];
889 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
890 s->channels, s->samplerate);
891 }
892
893 if (s->channels > MAX_CHANNELS || s->channels < 1)
894 {
895 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
896 return AVERROR_INVALIDDATA;
897 }
898 avctx->channels = s->channels;
899
900 s->lossless = get_bits1(&gb);
901 if (!s->lossless)
902 skip_bits(&gb, 3); // XXX FIXME
903 s->decorrelation = get_bits(&gb, 2);
904 if (s->decorrelation != 3 && s->channels != 2) {
905 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
906 return AVERROR_INVALIDDATA;
907 }
908
909 s->downsampling = get_bits(&gb, 2);
910 if (!s->downsampling) {
911 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
912 return AVERROR_INVALIDDATA;
913 }
914
915 s->num_taps = (get_bits(&gb, 5)+1)<<5;
916 if (get_bits1(&gb)) // XXX FIXME
917 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
918
919 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
920 s->frame_size = s->channels*s->block_align*s->downsampling;
921 // avctx->frame_size = s->block_align;
922
923 if (s->num_taps * s->channels > s->frame_size) {
924 av_log(avctx, AV_LOG_ERROR,
925 "number of taps times channels (%d * %d) larger than frame size %d\n",
926 s->num_taps, s->channels, s->frame_size);
927 return AVERROR_INVALIDDATA;
928 }
929
930 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
931 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
932
933 // generate taps
934 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
935 if (!s->tap_quant)
936 return AVERROR(ENOMEM);
937
938 for (i = 0; i < s->num_taps; i++)
939 s->tap_quant[i] = ff_sqrt(i+1);
940
941 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
942
943 tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
944 if (!tmp)
945 return AVERROR(ENOMEM);
946 for (i = 0; i < s->channels; i++, tmp += s->num_taps)
947 s->predictor_state[i] = tmp;
948
949 tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
950 if (!tmp)
951 return AVERROR(ENOMEM);
952 for (i = 0; i < s->channels; i++, tmp += s->block_align)
953 s->coded_samples[i] = tmp;
954
955 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
956 if (!s->int_samples)
957 return AVERROR(ENOMEM);
958
959 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
960 return 0;
961 }
962
963 static av_cold int sonic_decode_close(AVCodecContext *avctx)
964 {
965 SonicContext *s = avctx->priv_data;
966
967 av_freep(&s->int_samples);
968 av_freep(&s->tap_quant);
969 av_freep(&s->predictor_k);
970 av_freep(&s->predictor_state[0]);
971 av_freep(&s->coded_samples[0]);
972
973 return 0;
974 }
975
976 static int sonic_decode_frame(AVCodecContext *avctx,
977 void *data, int *got_frame_ptr,
978 AVPacket *avpkt)
979 {
980 const uint8_t *buf = avpkt->data;
981 int buf_size = avpkt->size;
982 SonicContext *s = avctx->priv_data;
983 RangeCoder c;
984 uint8_t state[32];
985 int i, quant, ch, j, ret;
986 int16_t *samples;
987 AVFrame *frame = data;
988
989 if (buf_size == 0) return 0;
990
991 frame->nb_samples = s->frame_size / avctx->channels;
992 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
993 return ret;
994 samples = (int16_t *)frame->data[0];
995
996 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
997
998 memset(state, 128, sizeof(state));
999 ff_init_range_decoder(&c, buf, buf_size);
1000 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1001
1002 intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1003
1004 // dequantize
1005 for (i = 0; i < s->num_taps; i++)
1006 s->predictor_k[i] *= s->tap_quant[i];
1007
1008 if (s->lossless)
1009 quant = 1;
1010 else
1011 quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
1012
1013 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1014
1015 for (ch = 0; ch < s->channels; ch++)
1016 {
1017 int x = ch;
1018
1019 if (c.overread > MAX_OVERREAD)
1020 return AVERROR_INVALIDDATA;
1021
1022 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1023
1024 intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1025
1026 for (i = 0; i < s->block_align; i++)
1027 {
1028 for (j = 0; j < s->downsampling - 1; j++)
1029 {
1030 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1031 x += s->channels;
1032 }
1033
1034 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1035 x += s->channels;
1036 }
1037
1038 for (i = 0; i < s->num_taps; i++)
1039 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1040 }
1041
1042 switch(s->decorrelation)
1043 {
1044 case MID_SIDE:
1045 for (i = 0; i < s->frame_size; i += s->channels)
1046 {
1047 s->int_samples[i+1] += shift(s->int_samples[i], 1);
1048 s->int_samples[i] -= s->int_samples[i+1];
1049 }
1050 break;
1051 case LEFT_SIDE:
1052 for (i = 0; i < s->frame_size; i += s->channels)
1053 s->int_samples[i+1] += s->int_samples[i];
1054 break;
1055 case RIGHT_SIDE:
1056 for (i = 0; i < s->frame_size; i += s->channels)
1057 s->int_samples[i] += s->int_samples[i+1];
1058 break;
1059 }
1060
1061 if (!s->lossless)
1062 for (i = 0; i < s->frame_size; i++)
1063 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1064
1065 // internal -> short
1066 for (i = 0; i < s->frame_size; i++)
1067 samples[i] = av_clip_int16(s->int_samples[i]);
1068
1069 *got_frame_ptr = 1;
1070
1071 return buf_size;
1072 }
1073
1074 const AVCodec ff_sonic_decoder = {
1075 .name = "sonic",
1076 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1077 .type = AVMEDIA_TYPE_AUDIO,
1078 .id = AV_CODEC_ID_SONIC,
1079 .priv_data_size = sizeof(SonicContext),
1080 .init = sonic_decode_init,
1081 .close = sonic_decode_close,
1082 .decode = sonic_decode_frame,
1083 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
1084 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1085 };
1086 #endif /* CONFIG_SONIC_DECODER */
1087
1088 #if CONFIG_SONIC_ENCODER
1089 const AVCodec ff_sonic_encoder = {
1090 .name = "sonic",
1091 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1092 .type = AVMEDIA_TYPE_AUDIO,
1093 .id = AV_CODEC_ID_SONIC,
1094 .priv_data_size = sizeof(SonicContext),
1095 .init = sonic_encode_init,
1096 .encode2 = sonic_encode_frame,
1097 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1098 .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1099 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1100 .close = sonic_encode_close,
1101 };
1102 #endif
1103
1104 #if CONFIG_SONIC_LS_ENCODER
1105 const AVCodec ff_sonic_ls_encoder = {
1106 .name = "sonicls",
1107 .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1108 .type = AVMEDIA_TYPE_AUDIO,
1109 .id = AV_CODEC_ID_SONIC_LS,
1110 .priv_data_size = sizeof(SonicContext),
1111 .init = sonic_encode_init,
1112 .encode2 = sonic_encode_frame,
1113 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1114 .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1115 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1116 .close = sonic_encode_close,
1117 };
1118 #endif
1119