FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/sonic.c
Date: 2022-01-16 20:33:26
Exec Total Coverage
Lines: 0 389 0.0%
Branches: 0 266 0.0%

Line Branch Exec Source
1 /*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21 #include "avcodec.h"
22 #include "encode.h"
23 #include "get_bits.h"
24 #include "golomb.h"
25 #include "internal.h"
26 #include "put_golomb.h"
27 #include "rangecoder.h"
28
29
30 /**
31 * @file
32 * Simple free lossless/lossy audio codec
33 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
34 * Written and designed by Alex Beregszaszi
35 *
36 * TODO:
37 * - CABAC put/get_symbol
38 * - independent quantizer for channels
39 * - >2 channels support
40 * - more decorrelation types
41 * - more tap_quant tests
42 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
43 */
44
45 #define MAX_CHANNELS 2
46
47 #define MID_SIDE 0
48 #define LEFT_SIDE 1
49 #define RIGHT_SIDE 2
50
51 typedef struct SonicContext {
52 int version;
53 int minor_version;
54 int lossless, decorrelation;
55
56 int num_taps, downsampling;
57 double quantization;
58
59 int channels, samplerate, block_align, frame_size;
60
61 int *tap_quant;
62 int *int_samples;
63 int *coded_samples[MAX_CHANNELS];
64
65 // for encoding
66 int *tail;
67 int tail_size;
68 int *window;
69 int window_size;
70
71 // for decoding
72 int *predictor_k;
73 int *predictor_state[MAX_CHANNELS];
74 } SonicContext;
75
76 #define LATTICE_SHIFT 10
77 #define SAMPLE_SHIFT 4
78 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
79 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
80
81 #define BASE_QUANT 0.6
82 #define RATE_VARIATION 3.0
83
84 static inline int shift(int a,int b)
85 {
86 return (a+(1<<(b-1))) >> b;
87 }
88
89 static inline int shift_down(int a,int b)
90 {
91 return (a>>b)+(a<0);
92 }
93
94 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
95 int i;
96
97 #define put_rac(C,S,B) \
98 do{\
99 if(rc_stat){\
100 rc_stat[*(S)][B]++;\
101 rc_stat2[(S)-state][B]++;\
102 }\
103 put_rac(C,S,B);\
104 }while(0)
105
106 if(v){
107 const int a= FFABS(v);
108 const int e= av_log2(a);
109 put_rac(c, state+0, 0);
110 if(e<=9){
111 for(i=0; i<e; i++){
112 put_rac(c, state+1+i, 1); //1..10
113 }
114 put_rac(c, state+1+i, 0);
115
116 for(i=e-1; i>=0; i--){
117 put_rac(c, state+22+i, (a>>i)&1); //22..31
118 }
119
120 if(is_signed)
121 put_rac(c, state+11 + e, v < 0); //11..21
122 }else{
123 for(i=0; i<e; i++){
124 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
125 }
126 put_rac(c, state+1+9, 0);
127
128 for(i=e-1; i>=0; i--){
129 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
130 }
131
132 if(is_signed)
133 put_rac(c, state+11 + 10, v < 0); //11..21
134 }
135 }else{
136 put_rac(c, state+0, 1);
137 }
138 #undef put_rac
139 }
140
141 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
142 if(get_rac(c, state+0))
143 return 0;
144 else{
145 int i, e;
146 unsigned a;
147 e= 0;
148 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
149 e++;
150 if (e > 31)
151 return AVERROR_INVALIDDATA;
152 }
153
154 a= 1;
155 for(i=e-1; i>=0; i--){
156 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
157 }
158
159 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
160 return (a^e)-e;
161 }
162 }
163
164 #if 1
165 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
166 {
167 int i;
168
169 for (i = 0; i < entries; i++)
170 put_symbol(c, state, buf[i], 1, NULL, NULL);
171
172 return 1;
173 }
174
175 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
176 {
177 int i;
178
179 for (i = 0; i < entries; i++)
180 buf[i] = get_symbol(c, state, 1);
181
182 return 1;
183 }
184 #elif 1
185 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
186 {
187 int i;
188
189 for (i = 0; i < entries; i++)
190 set_se_golomb(pb, buf[i]);
191
192 return 1;
193 }
194
195 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
196 {
197 int i;
198
199 for (i = 0; i < entries; i++)
200 buf[i] = get_se_golomb(gb);
201
202 return 1;
203 }
204
205 #else
206
207 #define ADAPT_LEVEL 8
208
209 static int bits_to_store(uint64_t x)
210 {
211 int res = 0;
212
213 while(x)
214 {
215 res++;
216 x >>= 1;
217 }
218 return res;
219 }
220
221 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
222 {
223 int i, bits;
224
225 if (!max)
226 return;
227
228 bits = bits_to_store(max);
229
230 for (i = 0; i < bits-1; i++)
231 put_bits(pb, 1, value & (1 << i));
232
233 if ( (value | (1 << (bits-1))) <= max)
234 put_bits(pb, 1, value & (1 << (bits-1)));
235 }
236
237 static unsigned int read_uint_max(GetBitContext *gb, int max)
238 {
239 int i, bits, value = 0;
240
241 if (!max)
242 return 0;
243
244 bits = bits_to_store(max);
245
246 for (i = 0; i < bits-1; i++)
247 if (get_bits1(gb))
248 value += 1 << i;
249
250 if ( (value | (1<<(bits-1))) <= max)
251 if (get_bits1(gb))
252 value += 1 << (bits-1);
253
254 return value;
255 }
256
257 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
258 {
259 int i, j, x = 0, low_bits = 0, max = 0;
260 int step = 256, pos = 0, dominant = 0, any = 0;
261 int *copy, *bits;
262
263 copy = av_calloc(entries, sizeof(*copy));
264 if (!copy)
265 return AVERROR(ENOMEM);
266
267 if (base_2_part)
268 {
269 int energy = 0;
270
271 for (i = 0; i < entries; i++)
272 energy += abs(buf[i]);
273
274 low_bits = bits_to_store(energy / (entries * 2));
275 if (low_bits > 15)
276 low_bits = 15;
277
278 put_bits(pb, 4, low_bits);
279 }
280
281 for (i = 0; i < entries; i++)
282 {
283 put_bits(pb, low_bits, abs(buf[i]));
284 copy[i] = abs(buf[i]) >> low_bits;
285 if (copy[i] > max)
286 max = abs(copy[i]);
287 }
288
289 bits = av_calloc(entries*max, sizeof(*bits));
290 if (!bits)
291 {
292 av_free(copy);
293 return AVERROR(ENOMEM);
294 }
295
296 for (i = 0; i <= max; i++)
297 {
298 for (j = 0; j < entries; j++)
299 if (copy[j] >= i)
300 bits[x++] = copy[j] > i;
301 }
302
303 // store bitstream
304 while (pos < x)
305 {
306 int steplet = step >> 8;
307
308 if (pos + steplet > x)
309 steplet = x - pos;
310
311 for (i = 0; i < steplet; i++)
312 if (bits[i+pos] != dominant)
313 any = 1;
314
315 put_bits(pb, 1, any);
316
317 if (!any)
318 {
319 pos += steplet;
320 step += step / ADAPT_LEVEL;
321 }
322 else
323 {
324 int interloper = 0;
325
326 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
327 interloper++;
328
329 // note change
330 write_uint_max(pb, interloper, (step >> 8) - 1);
331
332 pos += interloper + 1;
333 step -= step / ADAPT_LEVEL;
334 }
335
336 if (step < 256)
337 {
338 step = 65536 / step;
339 dominant = !dominant;
340 }
341 }
342
343 // store signs
344 for (i = 0; i < entries; i++)
345 if (buf[i])
346 put_bits(pb, 1, buf[i] < 0);
347
348 av_free(bits);
349 av_free(copy);
350
351 return 0;
352 }
353
354 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
355 {
356 int i, low_bits = 0, x = 0;
357 int n_zeros = 0, step = 256, dominant = 0;
358 int pos = 0, level = 0;
359 int *bits = av_calloc(entries, sizeof(*bits));
360
361 if (!bits)
362 return AVERROR(ENOMEM);
363
364 if (base_2_part)
365 {
366 low_bits = get_bits(gb, 4);
367
368 if (low_bits)
369 for (i = 0; i < entries; i++)
370 buf[i] = get_bits(gb, low_bits);
371 }
372
373 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
374
375 while (n_zeros < entries)
376 {
377 int steplet = step >> 8;
378
379 if (!get_bits1(gb))
380 {
381 for (i = 0; i < steplet; i++)
382 bits[x++] = dominant;
383
384 if (!dominant)
385 n_zeros += steplet;
386
387 step += step / ADAPT_LEVEL;
388 }
389 else
390 {
391 int actual_run = read_uint_max(gb, steplet-1);
392
393 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
394
395 for (i = 0; i < actual_run; i++)
396 bits[x++] = dominant;
397
398 bits[x++] = !dominant;
399
400 if (!dominant)
401 n_zeros += actual_run;
402 else
403 n_zeros++;
404
405 step -= step / ADAPT_LEVEL;
406 }
407
408 if (step < 256)
409 {
410 step = 65536 / step;
411 dominant = !dominant;
412 }
413 }
414
415 // reconstruct unsigned values
416 n_zeros = 0;
417 for (i = 0; n_zeros < entries; i++)
418 {
419 while(1)
420 {
421 if (pos >= entries)
422 {
423 pos = 0;
424 level += 1 << low_bits;
425 }
426
427 if (buf[pos] >= level)
428 break;
429
430 pos++;
431 }
432
433 if (bits[i])
434 buf[pos] += 1 << low_bits;
435 else
436 n_zeros++;
437
438 pos++;
439 }
440 av_free(bits);
441
442 // read signs
443 for (i = 0; i < entries; i++)
444 if (buf[i] && get_bits1(gb))
445 buf[i] = -buf[i];
446
447 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
448
449 return 0;
450 }
451 #endif
452
453 static void predictor_init_state(int *k, int *state, int order)
454 {
455 int i;
456
457 for (i = order-2; i >= 0; i--)
458 {
459 int j, p, x = state[i];
460
461 for (j = 0, p = i+1; p < order; j++,p++)
462 {
463 int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
464 state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
465 x = tmp;
466 }
467 }
468 }
469
470 static int predictor_calc_error(int *k, int *state, int order, int error)
471 {
472 int i, x = error - shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
473
474 #if 1
475 int *k_ptr = &(k[order-2]),
476 *state_ptr = &(state[order-2]);
477 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
478 {
479 int k_value = *k_ptr, state_value = *state_ptr;
480 x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
481 state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
482 }
483 #else
484 for (i = order-2; i >= 0; i--)
485 {
486 x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
487 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
488 }
489 #endif
490
491 // don't drift too far, to avoid overflows
492 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
493 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
494
495 state[0] = x;
496
497 return x;
498 }
499
500 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
501 // Heavily modified Levinson-Durbin algorithm which
502 // copes better with quantization, and calculates the
503 // actual whitened result as it goes.
504
505 static void modified_levinson_durbin(int *window, int window_entries,
506 int *out, int out_entries, int channels, int *tap_quant)
507 {
508 int i;
509 int *state = window + window_entries;
510
511 memcpy(state, window, window_entries * sizeof(*state));
512
513 for (i = 0; i < out_entries; i++)
514 {
515 int step = (i+1)*channels, k, j;
516 double xx = 0.0, xy = 0.0;
517 #if 1
518 int *x_ptr = &(window[step]);
519 int *state_ptr = &(state[0]);
520 j = window_entries - step;
521 for (;j>0;j--,x_ptr++,state_ptr++)
522 {
523 double x_value = *x_ptr;
524 double state_value = *state_ptr;
525 xx += state_value*state_value;
526 xy += x_value*state_value;
527 }
528 #else
529 for (j = 0; j <= (window_entries - step); j++);
530 {
531 double stepval = window[step+j];
532 double stateval = window[j];
533 // xx += (double)window[j]*(double)window[j];
534 // xy += (double)window[step+j]*(double)window[j];
535 xx += stateval*stateval;
536 xy += stepval*stateval;
537 }
538 #endif
539 if (xx == 0.0)
540 k = 0;
541 else
542 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
543
544 if (k > (LATTICE_FACTOR/tap_quant[i]))
545 k = LATTICE_FACTOR/tap_quant[i];
546 if (-k > (LATTICE_FACTOR/tap_quant[i]))
547 k = -(LATTICE_FACTOR/tap_quant[i]);
548
549 out[i] = k;
550 k *= tap_quant[i];
551
552 #if 1
553 x_ptr = &(window[step]);
554 state_ptr = &(state[0]);
555 j = window_entries - step;
556 for (;j>0;j--,x_ptr++,state_ptr++)
557 {
558 int x_value = *x_ptr;
559 int state_value = *state_ptr;
560 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
561 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
562 }
563 #else
564 for (j=0; j <= (window_entries - step); j++)
565 {
566 int stepval = window[step+j];
567 int stateval=state[j];
568 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
569 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
570 }
571 #endif
572 }
573 }
574
575 static inline int code_samplerate(int samplerate)
576 {
577 switch (samplerate)
578 {
579 case 44100: return 0;
580 case 22050: return 1;
581 case 11025: return 2;
582 case 96000: return 3;
583 case 48000: return 4;
584 case 32000: return 5;
585 case 24000: return 6;
586 case 16000: return 7;
587 case 8000: return 8;
588 }
589 return AVERROR(EINVAL);
590 }
591
592 static av_cold int sonic_encode_init(AVCodecContext *avctx)
593 {
594 SonicContext *s = avctx->priv_data;
595 int *coded_samples;
596 PutBitContext pb;
597 int i;
598
599 s->version = 2;
600
601 if (avctx->channels > MAX_CHANNELS)
602 {
603 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
604 return AVERROR(EINVAL); /* only stereo or mono for now */
605 }
606
607 if (avctx->channels == 2)
608 s->decorrelation = MID_SIDE;
609 else
610 s->decorrelation = 3;
611
612 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
613 {
614 s->lossless = 1;
615 s->num_taps = 32;
616 s->downsampling = 1;
617 s->quantization = 0.0;
618 }
619 else
620 {
621 s->num_taps = 128;
622 s->downsampling = 2;
623 s->quantization = 1.0;
624 }
625
626 // max tap 2048
627 if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
628 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
629 return AVERROR_INVALIDDATA;
630 }
631
632 // generate taps
633 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
634 if (!s->tap_quant)
635 return AVERROR(ENOMEM);
636
637 for (i = 0; i < s->num_taps; i++)
638 s->tap_quant[i] = ff_sqrt(i+1);
639
640 s->channels = avctx->channels;
641 s->samplerate = avctx->sample_rate;
642
643 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
644 s->frame_size = s->channels*s->block_align*s->downsampling;
645
646 s->tail_size = s->num_taps*s->channels;
647 s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
648 if (!s->tail)
649 return AVERROR(ENOMEM);
650
651 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
652 if (!s->predictor_k)
653 return AVERROR(ENOMEM);
654
655 coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
656 if (!coded_samples)
657 return AVERROR(ENOMEM);
658 for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
659 s->coded_samples[i] = coded_samples;
660
661 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
662
663 s->window_size = ((2*s->tail_size)+s->frame_size);
664 s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
665 if (!s->window || !s->int_samples)
666 return AVERROR(ENOMEM);
667
668 avctx->extradata = av_mallocz(16);
669 if (!avctx->extradata)
670 return AVERROR(ENOMEM);
671 init_put_bits(&pb, avctx->extradata, 16*8);
672
673 put_bits(&pb, 2, s->version); // version
674 if (s->version >= 1)
675 {
676 if (s->version >= 2) {
677 put_bits(&pb, 8, s->version);
678 put_bits(&pb, 8, s->minor_version);
679 }
680 put_bits(&pb, 2, s->channels);
681 put_bits(&pb, 4, code_samplerate(s->samplerate));
682 }
683 put_bits(&pb, 1, s->lossless);
684 if (!s->lossless)
685 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
686 put_bits(&pb, 2, s->decorrelation);
687 put_bits(&pb, 2, s->downsampling);
688 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
689 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
690
691 flush_put_bits(&pb);
692 avctx->extradata_size = put_bytes_output(&pb);
693
694 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
695 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
696
697 avctx->frame_size = s->block_align*s->downsampling;
698
699 return 0;
700 }
701
702 static av_cold int sonic_encode_close(AVCodecContext *avctx)
703 {
704 SonicContext *s = avctx->priv_data;
705
706 av_freep(&s->coded_samples[0]);
707 av_freep(&s->predictor_k);
708 av_freep(&s->tail);
709 av_freep(&s->tap_quant);
710 av_freep(&s->window);
711 av_freep(&s->int_samples);
712
713 return 0;
714 }
715
716 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
717 const AVFrame *frame, int *got_packet_ptr)
718 {
719 SonicContext *s = avctx->priv_data;
720 RangeCoder c;
721 int i, j, ch, quant = 0, x = 0;
722 int ret;
723 const short *samples = (const int16_t*)frame->data[0];
724 uint8_t state[32];
725
726 if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
727 return ret;
728
729 ff_init_range_encoder(&c, avpkt->data, avpkt->size);
730 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
731 memset(state, 128, sizeof(state));
732
733 // short -> internal
734 for (i = 0; i < s->frame_size; i++)
735 s->int_samples[i] = samples[i];
736
737 if (!s->lossless)
738 for (i = 0; i < s->frame_size; i++)
739 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
740
741 switch(s->decorrelation)
742 {
743 case MID_SIDE:
744 for (i = 0; i < s->frame_size; i += s->channels)
745 {
746 s->int_samples[i] += s->int_samples[i+1];
747 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
748 }
749 break;
750 case LEFT_SIDE:
751 for (i = 0; i < s->frame_size; i += s->channels)
752 s->int_samples[i+1] -= s->int_samples[i];
753 break;
754 case RIGHT_SIDE:
755 for (i = 0; i < s->frame_size; i += s->channels)
756 s->int_samples[i] -= s->int_samples[i+1];
757 break;
758 }
759
760 memset(s->window, 0, s->window_size * sizeof(*s->window));
761
762 for (i = 0; i < s->tail_size; i++)
763 s->window[x++] = s->tail[i];
764
765 for (i = 0; i < s->frame_size; i++)
766 s->window[x++] = s->int_samples[i];
767
768 for (i = 0; i < s->tail_size; i++)
769 s->window[x++] = 0;
770
771 for (i = 0; i < s->tail_size; i++)
772 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
773
774 // generate taps
775 modified_levinson_durbin(s->window, s->window_size,
776 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
777
778 if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
779 return ret;
780
781 for (ch = 0; ch < s->channels; ch++)
782 {
783 x = s->tail_size+ch;
784 for (i = 0; i < s->block_align; i++)
785 {
786 int sum = 0;
787 for (j = 0; j < s->downsampling; j++, x += s->channels)
788 sum += s->window[x];
789 s->coded_samples[ch][i] = sum;
790 }
791 }
792
793 // simple rate control code
794 if (!s->lossless)
795 {
796 double energy1 = 0.0, energy2 = 0.0;
797 for (ch = 0; ch < s->channels; ch++)
798 {
799 for (i = 0; i < s->block_align; i++)
800 {
801 double sample = s->coded_samples[ch][i];
802 energy2 += sample*sample;
803 energy1 += fabs(sample);
804 }
805 }
806
807 energy2 = sqrt(energy2/(s->channels*s->block_align));
808 energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
809
810 // increase bitrate when samples are like a gaussian distribution
811 // reduce bitrate when samples are like a two-tailed exponential distribution
812
813 if (energy2 > energy1)
814 energy2 += (energy2-energy1)*RATE_VARIATION;
815
816 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
817 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
818
819 quant = av_clip(quant, 1, 65534);
820
821 put_symbol(&c, state, quant, 0, NULL, NULL);
822
823 quant *= SAMPLE_FACTOR;
824 }
825
826 // write out coded samples
827 for (ch = 0; ch < s->channels; ch++)
828 {
829 if (!s->lossless)
830 for (i = 0; i < s->block_align; i++)
831 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
832
833 if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
834 return ret;
835 }
836
837 avpkt->size = ff_rac_terminate(&c, 0);
838 *got_packet_ptr = 1;
839 return 0;
840
841 }
842 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
843
844 #if CONFIG_SONIC_DECODER
845 static const int samplerate_table[] =
846 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
847
848 static av_cold int sonic_decode_init(AVCodecContext *avctx)
849 {
850 SonicContext *s = avctx->priv_data;
851 int *tmp;
852 GetBitContext gb;
853 int i;
854 int ret;
855
856 s->channels = avctx->channels;
857 s->samplerate = avctx->sample_rate;
858
859 if (!avctx->extradata)
860 {
861 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
862 return AVERROR_INVALIDDATA;
863 }
864
865 ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
866 if (ret < 0)
867 return ret;
868
869 s->version = get_bits(&gb, 2);
870 if (s->version >= 2) {
871 s->version = get_bits(&gb, 8);
872 s->minor_version = get_bits(&gb, 8);
873 }
874 if (s->version != 2)
875 {
876 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
877 return AVERROR_INVALIDDATA;
878 }
879
880 if (s->version >= 1)
881 {
882 int sample_rate_index;
883 s->channels = get_bits(&gb, 2);
884 sample_rate_index = get_bits(&gb, 4);
885 if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
886 av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
887 return AVERROR_INVALIDDATA;
888 }
889 s->samplerate = samplerate_table[sample_rate_index];
890 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
891 s->channels, s->samplerate);
892 }
893
894 if (s->channels > MAX_CHANNELS || s->channels < 1)
895 {
896 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
897 return AVERROR_INVALIDDATA;
898 }
899 avctx->channels = s->channels;
900
901 s->lossless = get_bits1(&gb);
902 if (!s->lossless)
903 skip_bits(&gb, 3); // XXX FIXME
904 s->decorrelation = get_bits(&gb, 2);
905 if (s->decorrelation != 3 && s->channels != 2) {
906 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
907 return AVERROR_INVALIDDATA;
908 }
909
910 s->downsampling = get_bits(&gb, 2);
911 if (!s->downsampling) {
912 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
913 return AVERROR_INVALIDDATA;
914 }
915
916 s->num_taps = (get_bits(&gb, 5)+1)<<5;
917 if (get_bits1(&gb)) // XXX FIXME
918 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
919
920 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
921 s->frame_size = s->channels*s->block_align*s->downsampling;
922 // avctx->frame_size = s->block_align;
923
924 if (s->num_taps * s->channels > s->frame_size) {
925 av_log(avctx, AV_LOG_ERROR,
926 "number of taps times channels (%d * %d) larger than frame size %d\n",
927 s->num_taps, s->channels, s->frame_size);
928 return AVERROR_INVALIDDATA;
929 }
930
931 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
932 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
933
934 // generate taps
935 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
936 if (!s->tap_quant)
937 return AVERROR(ENOMEM);
938
939 for (i = 0; i < s->num_taps; i++)
940 s->tap_quant[i] = ff_sqrt(i+1);
941
942 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
943
944 tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
945 if (!tmp)
946 return AVERROR(ENOMEM);
947 for (i = 0; i < s->channels; i++, tmp += s->num_taps)
948 s->predictor_state[i] = tmp;
949
950 tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
951 if (!tmp)
952 return AVERROR(ENOMEM);
953 for (i = 0; i < s->channels; i++, tmp += s->block_align)
954 s->coded_samples[i] = tmp;
955
956 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
957 if (!s->int_samples)
958 return AVERROR(ENOMEM);
959
960 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
961 return 0;
962 }
963
964 static av_cold int sonic_decode_close(AVCodecContext *avctx)
965 {
966 SonicContext *s = avctx->priv_data;
967
968 av_freep(&s->int_samples);
969 av_freep(&s->tap_quant);
970 av_freep(&s->predictor_k);
971 av_freep(&s->predictor_state[0]);
972 av_freep(&s->coded_samples[0]);
973
974 return 0;
975 }
976
977 static int sonic_decode_frame(AVCodecContext *avctx,
978 void *data, int *got_frame_ptr,
979 AVPacket *avpkt)
980 {
981 const uint8_t *buf = avpkt->data;
982 int buf_size = avpkt->size;
983 SonicContext *s = avctx->priv_data;
984 RangeCoder c;
985 uint8_t state[32];
986 int i, quant, ch, j, ret;
987 int16_t *samples;
988 AVFrame *frame = data;
989
990 if (buf_size == 0) return 0;
991
992 frame->nb_samples = s->frame_size / avctx->channels;
993 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
994 return ret;
995 samples = (int16_t *)frame->data[0];
996
997 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
998
999 memset(state, 128, sizeof(state));
1000 ff_init_range_decoder(&c, buf, buf_size);
1001 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1002
1003 intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1004
1005 // dequantize
1006 for (i = 0; i < s->num_taps; i++)
1007 s->predictor_k[i] *= s->tap_quant[i];
1008
1009 if (s->lossless)
1010 quant = 1;
1011 else
1012 quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
1013
1014 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1015
1016 for (ch = 0; ch < s->channels; ch++)
1017 {
1018 int x = ch;
1019
1020 if (c.overread > MAX_OVERREAD)
1021 return AVERROR_INVALIDDATA;
1022
1023 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1024
1025 intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1026
1027 for (i = 0; i < s->block_align; i++)
1028 {
1029 for (j = 0; j < s->downsampling - 1; j++)
1030 {
1031 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1032 x += s->channels;
1033 }
1034
1035 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1036 x += s->channels;
1037 }
1038
1039 for (i = 0; i < s->num_taps; i++)
1040 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1041 }
1042
1043 switch(s->decorrelation)
1044 {
1045 case MID_SIDE:
1046 for (i = 0; i < s->frame_size; i += s->channels)
1047 {
1048 s->int_samples[i+1] += shift(s->int_samples[i], 1);
1049 s->int_samples[i] -= s->int_samples[i+1];
1050 }
1051 break;
1052 case LEFT_SIDE:
1053 for (i = 0; i < s->frame_size; i += s->channels)
1054 s->int_samples[i+1] += s->int_samples[i];
1055 break;
1056 case RIGHT_SIDE:
1057 for (i = 0; i < s->frame_size; i += s->channels)
1058 s->int_samples[i] += s->int_samples[i+1];
1059 break;
1060 }
1061
1062 if (!s->lossless)
1063 for (i = 0; i < s->frame_size; i++)
1064 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1065
1066 // internal -> short
1067 for (i = 0; i < s->frame_size; i++)
1068 samples[i] = av_clip_int16(s->int_samples[i]);
1069
1070 *got_frame_ptr = 1;
1071
1072 return buf_size;
1073 }
1074
1075 const AVCodec ff_sonic_decoder = {
1076 .name = "sonic",
1077 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1078 .type = AVMEDIA_TYPE_AUDIO,
1079 .id = AV_CODEC_ID_SONIC,
1080 .priv_data_size = sizeof(SonicContext),
1081 .init = sonic_decode_init,
1082 .close = sonic_decode_close,
1083 .decode = sonic_decode_frame,
1084 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
1085 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1086 };
1087 #endif /* CONFIG_SONIC_DECODER */
1088
1089 #if CONFIG_SONIC_ENCODER
1090 const AVCodec ff_sonic_encoder = {
1091 .name = "sonic",
1092 .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1093 .type = AVMEDIA_TYPE_AUDIO,
1094 .id = AV_CODEC_ID_SONIC,
1095 .priv_data_size = sizeof(SonicContext),
1096 .init = sonic_encode_init,
1097 .encode2 = sonic_encode_frame,
1098 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1099 .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1100 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1101 .close = sonic_encode_close,
1102 };
1103 #endif
1104
1105 #if CONFIG_SONIC_LS_ENCODER
1106 const AVCodec ff_sonic_ls_encoder = {
1107 .name = "sonicls",
1108 .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1109 .type = AVMEDIA_TYPE_AUDIO,
1110 .id = AV_CODEC_ID_SONIC_LS,
1111 .priv_data_size = sizeof(SonicContext),
1112 .init = sonic_encode_init,
1113 .encode2 = sonic_encode_frame,
1114 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1115 .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1116 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1117 .close = sonic_encode_close,
1118 };
1119 #endif
1120