FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/sonic.c
Date: 2024-04-24 18:52:15
Exec Total Coverage
Lines: 0 393 0.0%
Functions: 0 16 0.0%
Branches: 0 266 0.0%

Line Branch Exec Source
1 /*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "config_components.h"
23
24 #include "libavutil/mem.h"
25 #include "avcodec.h"
26 #include "codec_internal.h"
27 #include "decode.h"
28 #include "encode.h"
29 #include "get_bits.h"
30 #include "golomb.h"
31 #include "put_golomb.h"
32 #include "rangecoder.h"
33
34
35 /**
36 * @file
37 * Simple free lossless/lossy audio codec
38 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
39 * Written and designed by Alex Beregszaszi
40 *
41 * TODO:
42 * - CABAC put/get_symbol
43 * - independent quantizer for channels
44 * - >2 channels support
45 * - more decorrelation types
46 * - more tap_quant tests
47 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
48 */
49
50 #define MAX_CHANNELS 2
51
52 #define MID_SIDE 0
53 #define LEFT_SIDE 1
54 #define RIGHT_SIDE 2
55
56 typedef struct SonicContext {
57 int version;
58 int minor_version;
59 int lossless, decorrelation;
60
61 int num_taps, downsampling;
62 double quantization;
63
64 int channels, samplerate, block_align, frame_size;
65
66 int *tap_quant;
67 int *int_samples;
68 int *coded_samples[MAX_CHANNELS];
69
70 // for encoding
71 int *tail;
72 int tail_size;
73 int *window;
74 int window_size;
75
76 // for decoding
77 int *predictor_k;
78 int *predictor_state[MAX_CHANNELS];
79 } SonicContext;
80
81 #define LATTICE_SHIFT 10
82 #define SAMPLE_SHIFT 4
83 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
84 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
85
86 #define BASE_QUANT 0.6
87 #define RATE_VARIATION 3.0
88
89 static inline int shift(int a,int b)
90 {
91 return (a+(1<<(b-1))) >> b;
92 }
93
94 static inline int shift_down(int a,int b)
95 {
96 return (a>>b)+(a<0);
97 }
98
99 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
100 int i;
101
102 #define put_rac(C,S,B) \
103 do{\
104 if(rc_stat){\
105 rc_stat[*(S)][B]++;\
106 rc_stat2[(S)-state][B]++;\
107 }\
108 put_rac(C,S,B);\
109 }while(0)
110
111 if(v){
112 const int a= FFABS(v);
113 const int e= av_log2(a);
114 put_rac(c, state+0, 0);
115 if(e<=9){
116 for(i=0; i<e; i++){
117 put_rac(c, state+1+i, 1); //1..10
118 }
119 put_rac(c, state+1+i, 0);
120
121 for(i=e-1; i>=0; i--){
122 put_rac(c, state+22+i, (a>>i)&1); //22..31
123 }
124
125 if(is_signed)
126 put_rac(c, state+11 + e, v < 0); //11..21
127 }else{
128 for(i=0; i<e; i++){
129 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
130 }
131 put_rac(c, state+1+9, 0);
132
133 for(i=e-1; i>=0; i--){
134 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
135 }
136
137 if(is_signed)
138 put_rac(c, state+11 + 10, v < 0); //11..21
139 }
140 }else{
141 put_rac(c, state+0, 1);
142 }
143 #undef put_rac
144 }
145
146 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
147 if(get_rac(c, state+0))
148 return 0;
149 else{
150 int i, e;
151 unsigned a;
152 e= 0;
153 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
154 e++;
155 if (e > 31)
156 return AVERROR_INVALIDDATA;
157 }
158
159 a= 1;
160 for(i=e-1; i>=0; i--){
161 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
162 }
163
164 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
165 return (a^e)-e;
166 }
167 }
168
169 #if 1
170 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
171 {
172 int i;
173
174 for (i = 0; i < entries; i++)
175 put_symbol(c, state, buf[i], 1, NULL, NULL);
176
177 return 1;
178 }
179
180 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
181 {
182 int i;
183
184 for (i = 0; i < entries; i++)
185 buf[i] = get_symbol(c, state, 1);
186
187 return 1;
188 }
189 #elif 1
190 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
191 {
192 int i;
193
194 for (i = 0; i < entries; i++)
195 set_se_golomb(pb, buf[i]);
196
197 return 1;
198 }
199
200 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
201 {
202 int i;
203
204 for (i = 0; i < entries; i++)
205 buf[i] = get_se_golomb(gb);
206
207 return 1;
208 }
209
210 #else
211
212 #define ADAPT_LEVEL 8
213
214 static int bits_to_store(uint64_t x)
215 {
216 int res = 0;
217
218 while(x)
219 {
220 res++;
221 x >>= 1;
222 }
223 return res;
224 }
225
226 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
227 {
228 int i, bits;
229
230 if (!max)
231 return;
232
233 bits = bits_to_store(max);
234
235 for (i = 0; i < bits-1; i++)
236 put_bits(pb, 1, value & (1 << i));
237
238 if ( (value | (1 << (bits-1))) <= max)
239 put_bits(pb, 1, value & (1 << (bits-1)));
240 }
241
242 static unsigned int read_uint_max(GetBitContext *gb, int max)
243 {
244 int i, bits, value = 0;
245
246 if (!max)
247 return 0;
248
249 bits = bits_to_store(max);
250
251 for (i = 0; i < bits-1; i++)
252 if (get_bits1(gb))
253 value += 1 << i;
254
255 if ( (value | (1<<(bits-1))) <= max)
256 if (get_bits1(gb))
257 value += 1 << (bits-1);
258
259 return value;
260 }
261
262 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
263 {
264 int i, j, x = 0, low_bits = 0, max = 0;
265 int step = 256, pos = 0, dominant = 0, any = 0;
266 int *copy, *bits;
267
268 copy = av_calloc(entries, sizeof(*copy));
269 if (!copy)
270 return AVERROR(ENOMEM);
271
272 if (base_2_part)
273 {
274 int energy = 0;
275
276 for (i = 0; i < entries; i++)
277 energy += abs(buf[i]);
278
279 low_bits = bits_to_store(energy / (entries * 2));
280 if (low_bits > 15)
281 low_bits = 15;
282
283 put_bits(pb, 4, low_bits);
284 }
285
286 for (i = 0; i < entries; i++)
287 {
288 put_bits(pb, low_bits, abs(buf[i]));
289 copy[i] = abs(buf[i]) >> low_bits;
290 if (copy[i] > max)
291 max = abs(copy[i]);
292 }
293
294 bits = av_calloc(entries*max, sizeof(*bits));
295 if (!bits)
296 {
297 av_free(copy);
298 return AVERROR(ENOMEM);
299 }
300
301 for (i = 0; i <= max; i++)
302 {
303 for (j = 0; j < entries; j++)
304 if (copy[j] >= i)
305 bits[x++] = copy[j] > i;
306 }
307
308 // store bitstream
309 while (pos < x)
310 {
311 int steplet = step >> 8;
312
313 if (pos + steplet > x)
314 steplet = x - pos;
315
316 for (i = 0; i < steplet; i++)
317 if (bits[i+pos] != dominant)
318 any = 1;
319
320 put_bits(pb, 1, any);
321
322 if (!any)
323 {
324 pos += steplet;
325 step += step / ADAPT_LEVEL;
326 }
327 else
328 {
329 int interloper = 0;
330
331 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
332 interloper++;
333
334 // note change
335 write_uint_max(pb, interloper, (step >> 8) - 1);
336
337 pos += interloper + 1;
338 step -= step / ADAPT_LEVEL;
339 }
340
341 if (step < 256)
342 {
343 step = 65536 / step;
344 dominant = !dominant;
345 }
346 }
347
348 // store signs
349 for (i = 0; i < entries; i++)
350 if (buf[i])
351 put_bits(pb, 1, buf[i] < 0);
352
353 av_free(bits);
354 av_free(copy);
355
356 return 0;
357 }
358
359 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
360 {
361 int i, low_bits = 0, x = 0;
362 int n_zeros = 0, step = 256, dominant = 0;
363 int pos = 0, level = 0;
364 int *bits = av_calloc(entries, sizeof(*bits));
365
366 if (!bits)
367 return AVERROR(ENOMEM);
368
369 if (base_2_part)
370 {
371 low_bits = get_bits(gb, 4);
372
373 if (low_bits)
374 for (i = 0; i < entries; i++)
375 buf[i] = get_bits(gb, low_bits);
376 }
377
378 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
379
380 while (n_zeros < entries)
381 {
382 int steplet = step >> 8;
383
384 if (!get_bits1(gb))
385 {
386 for (i = 0; i < steplet; i++)
387 bits[x++] = dominant;
388
389 if (!dominant)
390 n_zeros += steplet;
391
392 step += step / ADAPT_LEVEL;
393 }
394 else
395 {
396 int actual_run = read_uint_max(gb, steplet-1);
397
398 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
399
400 for (i = 0; i < actual_run; i++)
401 bits[x++] = dominant;
402
403 bits[x++] = !dominant;
404
405 if (!dominant)
406 n_zeros += actual_run;
407 else
408 n_zeros++;
409
410 step -= step / ADAPT_LEVEL;
411 }
412
413 if (step < 256)
414 {
415 step = 65536 / step;
416 dominant = !dominant;
417 }
418 }
419
420 // reconstruct unsigned values
421 n_zeros = 0;
422 for (i = 0; n_zeros < entries; i++)
423 {
424 while(1)
425 {
426 if (pos >= entries)
427 {
428 pos = 0;
429 level += 1 << low_bits;
430 }
431
432 if (buf[pos] >= level)
433 break;
434
435 pos++;
436 }
437
438 if (bits[i])
439 buf[pos] += 1 << low_bits;
440 else
441 n_zeros++;
442
443 pos++;
444 }
445 av_free(bits);
446
447 // read signs
448 for (i = 0; i < entries; i++)
449 if (buf[i] && get_bits1(gb))
450 buf[i] = -buf[i];
451
452 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
453
454 return 0;
455 }
456 #endif
457
458 static void predictor_init_state(int *k, int *state, int order)
459 {
460 int i;
461
462 for (i = order-2; i >= 0; i--)
463 {
464 int j, p, x = state[i];
465
466 for (j = 0, p = i+1; p < order; j++,p++)
467 {
468 int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
469 state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
470 x = tmp;
471 }
472 }
473 }
474
475 static int predictor_calc_error(int *k, int *state, int order, int error)
476 {
477 int i, x = error - (unsigned)shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
478
479 #if 1
480 int *k_ptr = &(k[order-2]),
481 *state_ptr = &(state[order-2]);
482 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
483 {
484 int k_value = *k_ptr, state_value = *state_ptr;
485 x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
486 state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
487 }
488 #else
489 for (i = order-2; i >= 0; i--)
490 {
491 x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
492 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
493 }
494 #endif
495
496 // don't drift too far, to avoid overflows
497 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
498 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
499
500 state[0] = x;
501
502 return x;
503 }
504
505 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
506 // Heavily modified Levinson-Durbin algorithm which
507 // copes better with quantization, and calculates the
508 // actual whitened result as it goes.
509
510 static void modified_levinson_durbin(int *window, int window_entries,
511 int *out, int out_entries, int channels, int *tap_quant)
512 {
513 int i;
514 int *state = window + window_entries;
515
516 memcpy(state, window, window_entries * sizeof(*state));
517
518 for (i = 0; i < out_entries; i++)
519 {
520 int step = (i+1)*channels, k, j;
521 double xx = 0.0, xy = 0.0;
522 #if 1
523 int *x_ptr = &(window[step]);
524 int *state_ptr = &(state[0]);
525 j = window_entries - step;
526 for (;j>0;j--,x_ptr++,state_ptr++)
527 {
528 double x_value = *x_ptr;
529 double state_value = *state_ptr;
530 xx += state_value*state_value;
531 xy += x_value*state_value;
532 }
533 #else
534 for (j = 0; j <= (window_entries - step); j++);
535 {
536 double stepval = window[step+j];
537 double stateval = window[j];
538 // xx += (double)window[j]*(double)window[j];
539 // xy += (double)window[step+j]*(double)window[j];
540 xx += stateval*stateval;
541 xy += stepval*stateval;
542 }
543 #endif
544 if (xx == 0.0)
545 k = 0;
546 else
547 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
548
549 if (k > (LATTICE_FACTOR/tap_quant[i]))
550 k = LATTICE_FACTOR/tap_quant[i];
551 if (-k > (LATTICE_FACTOR/tap_quant[i]))
552 k = -(LATTICE_FACTOR/tap_quant[i]);
553
554 out[i] = k;
555 k *= tap_quant[i];
556
557 #if 1
558 x_ptr = &(window[step]);
559 state_ptr = &(state[0]);
560 j = window_entries - step;
561 for (;j>0;j--,x_ptr++,state_ptr++)
562 {
563 int x_value = *x_ptr;
564 int state_value = *state_ptr;
565 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
566 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
567 }
568 #else
569 for (j=0; j <= (window_entries - step); j++)
570 {
571 int stepval = window[step+j];
572 int stateval=state[j];
573 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
574 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
575 }
576 #endif
577 }
578 }
579
580 static inline int code_samplerate(int samplerate)
581 {
582 switch (samplerate)
583 {
584 case 44100: return 0;
585 case 22050: return 1;
586 case 11025: return 2;
587 case 96000: return 3;
588 case 48000: return 4;
589 case 32000: return 5;
590 case 24000: return 6;
591 case 16000: return 7;
592 case 8000: return 8;
593 }
594 return AVERROR(EINVAL);
595 }
596
597 static av_cold int sonic_encode_init(AVCodecContext *avctx)
598 {
599 SonicContext *s = avctx->priv_data;
600 int *coded_samples;
601 PutBitContext pb;
602 int i;
603
604 s->version = 2;
605
606 if (avctx->ch_layout.nb_channels > MAX_CHANNELS)
607 {
608 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
609 return AVERROR(EINVAL); /* only stereo or mono for now */
610 }
611
612 if (avctx->ch_layout.nb_channels == 2)
613 s->decorrelation = MID_SIDE;
614 else
615 s->decorrelation = 3;
616
617 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
618 {
619 s->lossless = 1;
620 s->num_taps = 32;
621 s->downsampling = 1;
622 s->quantization = 0.0;
623 }
624 else
625 {
626 s->num_taps = 128;
627 s->downsampling = 2;
628 s->quantization = 1.0;
629 }
630
631 // max tap 2048
632 if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
633 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
634 return AVERROR_INVALIDDATA;
635 }
636
637 // generate taps
638 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
639 if (!s->tap_quant)
640 return AVERROR(ENOMEM);
641
642 for (i = 0; i < s->num_taps; i++)
643 s->tap_quant[i] = ff_sqrt(i+1);
644
645 s->channels = avctx->ch_layout.nb_channels;
646 s->samplerate = avctx->sample_rate;
647
648 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
649 s->frame_size = s->channels*s->block_align*s->downsampling;
650
651 s->tail_size = s->num_taps*s->channels;
652 s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
653 if (!s->tail)
654 return AVERROR(ENOMEM);
655
656 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
657 if (!s->predictor_k)
658 return AVERROR(ENOMEM);
659
660 coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
661 if (!coded_samples)
662 return AVERROR(ENOMEM);
663 for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
664 s->coded_samples[i] = coded_samples;
665
666 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
667
668 s->window_size = ((2*s->tail_size)+s->frame_size);
669 s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
670 if (!s->window || !s->int_samples)
671 return AVERROR(ENOMEM);
672
673 avctx->extradata = av_mallocz(16);
674 if (!avctx->extradata)
675 return AVERROR(ENOMEM);
676 init_put_bits(&pb, avctx->extradata, 16*8);
677
678 put_bits(&pb, 2, s->version); // version
679 if (s->version >= 1)
680 {
681 if (s->version >= 2) {
682 put_bits(&pb, 8, s->version);
683 put_bits(&pb, 8, s->minor_version);
684 }
685 put_bits(&pb, 2, s->channels);
686 put_bits(&pb, 4, code_samplerate(s->samplerate));
687 }
688 put_bits(&pb, 1, s->lossless);
689 if (!s->lossless)
690 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
691 put_bits(&pb, 2, s->decorrelation);
692 put_bits(&pb, 2, s->downsampling);
693 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
694 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
695
696 flush_put_bits(&pb);
697 avctx->extradata_size = put_bytes_output(&pb);
698
699 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
700 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
701
702 avctx->frame_size = s->block_align*s->downsampling;
703
704 return 0;
705 }
706
707 static av_cold int sonic_encode_close(AVCodecContext *avctx)
708 {
709 SonicContext *s = avctx->priv_data;
710
711 av_freep(&s->coded_samples[0]);
712 av_freep(&s->predictor_k);
713 av_freep(&s->tail);
714 av_freep(&s->tap_quant);
715 av_freep(&s->window);
716 av_freep(&s->int_samples);
717
718 return 0;
719 }
720
721 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
722 const AVFrame *frame, int *got_packet_ptr)
723 {
724 SonicContext *s = avctx->priv_data;
725 RangeCoder c;
726 int i, j, ch, quant = 0, x = 0;
727 int ret;
728 const short *samples = (const int16_t*)frame->data[0];
729 uint8_t state[32];
730
731 if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
732 return ret;
733
734 ff_init_range_encoder(&c, avpkt->data, avpkt->size);
735 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
736 memset(state, 128, sizeof(state));
737
738 // short -> internal
739 for (i = 0; i < s->frame_size; i++)
740 s->int_samples[i] = samples[i];
741
742 if (!s->lossless)
743 for (i = 0; i < s->frame_size; i++)
744 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
745
746 switch(s->decorrelation)
747 {
748 case MID_SIDE:
749 for (i = 0; i < s->frame_size; i += s->channels)
750 {
751 s->int_samples[i] += s->int_samples[i+1];
752 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
753 }
754 break;
755 case LEFT_SIDE:
756 for (i = 0; i < s->frame_size; i += s->channels)
757 s->int_samples[i+1] -= s->int_samples[i];
758 break;
759 case RIGHT_SIDE:
760 for (i = 0; i < s->frame_size; i += s->channels)
761 s->int_samples[i] -= s->int_samples[i+1];
762 break;
763 }
764
765 memset(s->window, 0, s->window_size * sizeof(*s->window));
766
767 for (i = 0; i < s->tail_size; i++)
768 s->window[x++] = s->tail[i];
769
770 for (i = 0; i < s->frame_size; i++)
771 s->window[x++] = s->int_samples[i];
772
773 for (i = 0; i < s->tail_size; i++)
774 s->window[x++] = 0;
775
776 for (i = 0; i < s->tail_size; i++)
777 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
778
779 // generate taps
780 modified_levinson_durbin(s->window, s->window_size,
781 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
782
783 if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
784 return ret;
785
786 for (ch = 0; ch < s->channels; ch++)
787 {
788 x = s->tail_size+ch;
789 for (i = 0; i < s->block_align; i++)
790 {
791 int sum = 0;
792 for (j = 0; j < s->downsampling; j++, x += s->channels)
793 sum += s->window[x];
794 s->coded_samples[ch][i] = sum;
795 }
796 }
797
798 // simple rate control code
799 if (!s->lossless)
800 {
801 double energy1 = 0.0, energy2 = 0.0;
802 for (ch = 0; ch < s->channels; ch++)
803 {
804 for (i = 0; i < s->block_align; i++)
805 {
806 double sample = s->coded_samples[ch][i];
807 energy2 += sample*sample;
808 energy1 += fabs(sample);
809 }
810 }
811
812 energy2 = sqrt(energy2/(s->channels*s->block_align));
813 energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
814
815 // increase bitrate when samples are like a gaussian distribution
816 // reduce bitrate when samples are like a two-tailed exponential distribution
817
818 if (energy2 > energy1)
819 energy2 += (energy2-energy1)*RATE_VARIATION;
820
821 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
822 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
823
824 quant = av_clip(quant, 1, 65534);
825
826 put_symbol(&c, state, quant, 0, NULL, NULL);
827
828 quant *= SAMPLE_FACTOR;
829 }
830
831 // write out coded samples
832 for (ch = 0; ch < s->channels; ch++)
833 {
834 if (!s->lossless)
835 for (i = 0; i < s->block_align; i++)
836 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
837
838 if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
839 return ret;
840 }
841
842 avpkt->size = ff_rac_terminate(&c, 0);
843 *got_packet_ptr = 1;
844 return 0;
845
846 }
847 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
848
849 #if CONFIG_SONIC_DECODER
850 static const int samplerate_table[] =
851 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
852
853 static av_cold int sonic_decode_init(AVCodecContext *avctx)
854 {
855 SonicContext *s = avctx->priv_data;
856 int *tmp;
857 GetBitContext gb;
858 int i;
859 int ret;
860
861 s->channels = avctx->ch_layout.nb_channels;
862 s->samplerate = avctx->sample_rate;
863
864 if (!avctx->extradata)
865 {
866 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
867 return AVERROR_INVALIDDATA;
868 }
869
870 ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
871 if (ret < 0)
872 return ret;
873
874 s->version = get_bits(&gb, 2);
875 if (s->version >= 2) {
876 s->version = get_bits(&gb, 8);
877 s->minor_version = get_bits(&gb, 8);
878 }
879 if (s->version != 2)
880 {
881 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
882 return AVERROR_INVALIDDATA;
883 }
884
885 if (s->version >= 1)
886 {
887 int sample_rate_index;
888 s->channels = get_bits(&gb, 2);
889 sample_rate_index = get_bits(&gb, 4);
890 if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
891 av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
892 return AVERROR_INVALIDDATA;
893 }
894 s->samplerate = samplerate_table[sample_rate_index];
895 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
896 s->channels, s->samplerate);
897 }
898
899 if (s->channels > MAX_CHANNELS || s->channels < 1)
900 {
901 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
902 return AVERROR_INVALIDDATA;
903 }
904 av_channel_layout_uninit(&avctx->ch_layout);
905 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
906 avctx->ch_layout.nb_channels = s->channels;
907
908 s->lossless = get_bits1(&gb);
909 if (!s->lossless)
910 skip_bits(&gb, 3); // XXX FIXME
911 s->decorrelation = get_bits(&gb, 2);
912 if (s->decorrelation != 3 && s->channels != 2) {
913 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
914 return AVERROR_INVALIDDATA;
915 }
916
917 s->downsampling = get_bits(&gb, 2);
918 if (!s->downsampling) {
919 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
920 return AVERROR_INVALIDDATA;
921 }
922
923 s->num_taps = (get_bits(&gb, 5)+1)<<5;
924 if (get_bits1(&gb)) // XXX FIXME
925 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
926
927 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
928 s->frame_size = s->channels*s->block_align*s->downsampling;
929 // avctx->frame_size = s->block_align;
930
931 if (s->num_taps * s->channels > s->frame_size) {
932 av_log(avctx, AV_LOG_ERROR,
933 "number of taps times channels (%d * %d) larger than frame size %d\n",
934 s->num_taps, s->channels, s->frame_size);
935 return AVERROR_INVALIDDATA;
936 }
937
938 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
939 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
940
941 // generate taps
942 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
943 if (!s->tap_quant)
944 return AVERROR(ENOMEM);
945
946 for (i = 0; i < s->num_taps; i++)
947 s->tap_quant[i] = ff_sqrt(i+1);
948
949 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
950
951 tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
952 if (!tmp)
953 return AVERROR(ENOMEM);
954 for (i = 0; i < s->channels; i++, tmp += s->num_taps)
955 s->predictor_state[i] = tmp;
956
957 tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
958 if (!tmp)
959 return AVERROR(ENOMEM);
960 for (i = 0; i < s->channels; i++, tmp += s->block_align)
961 s->coded_samples[i] = tmp;
962
963 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
964 if (!s->int_samples)
965 return AVERROR(ENOMEM);
966
967 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
968 return 0;
969 }
970
971 static av_cold int sonic_decode_close(AVCodecContext *avctx)
972 {
973 SonicContext *s = avctx->priv_data;
974
975 av_freep(&s->int_samples);
976 av_freep(&s->tap_quant);
977 av_freep(&s->predictor_k);
978 av_freep(&s->predictor_state[0]);
979 av_freep(&s->coded_samples[0]);
980
981 return 0;
982 }
983
984 static int sonic_decode_frame(AVCodecContext *avctx, AVFrame *frame,
985 int *got_frame_ptr, AVPacket *avpkt)
986 {
987 const uint8_t *buf = avpkt->data;
988 int buf_size = avpkt->size;
989 SonicContext *s = avctx->priv_data;
990 RangeCoder c;
991 uint8_t state[32];
992 int i, quant, ch, j, ret;
993 int16_t *samples;
994
995 if (buf_size == 0) return 0;
996
997 frame->nb_samples = s->frame_size / avctx->ch_layout.nb_channels;
998 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
999 return ret;
1000 samples = (int16_t *)frame->data[0];
1001
1002 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
1003
1004 memset(state, 128, sizeof(state));
1005 ff_init_range_decoder(&c, buf, buf_size);
1006 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1007
1008 intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1009
1010 // dequantize
1011 for (i = 0; i < s->num_taps; i++)
1012 s->predictor_k[i] *= (unsigned) s->tap_quant[i];
1013
1014 if (s->lossless)
1015 quant = 1;
1016 else
1017 quant = get_symbol(&c, state, 0) * (unsigned)SAMPLE_FACTOR;
1018
1019 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1020
1021 for (ch = 0; ch < s->channels; ch++)
1022 {
1023 int x = ch;
1024
1025 if (c.overread > MAX_OVERREAD)
1026 return AVERROR_INVALIDDATA;
1027
1028 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1029
1030 intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1031
1032 for (i = 0; i < s->block_align; i++)
1033 {
1034 for (j = 0; j < s->downsampling - 1; j++)
1035 {
1036 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1037 x += s->channels;
1038 }
1039
1040 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1041 x += s->channels;
1042 }
1043
1044 for (i = 0; i < s->num_taps; i++)
1045 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1046 }
1047
1048 switch(s->decorrelation)
1049 {
1050 case MID_SIDE:
1051 for (i = 0; i < s->frame_size; i += s->channels)
1052 {
1053 s->int_samples[i+1] += shift(s->int_samples[i], 1);
1054 s->int_samples[i] -= s->int_samples[i+1];
1055 }
1056 break;
1057 case LEFT_SIDE:
1058 for (i = 0; i < s->frame_size; i += s->channels)
1059 s->int_samples[i+1] += s->int_samples[i];
1060 break;
1061 case RIGHT_SIDE:
1062 for (i = 0; i < s->frame_size; i += s->channels)
1063 s->int_samples[i] += s->int_samples[i+1];
1064 break;
1065 }
1066
1067 if (!s->lossless)
1068 for (i = 0; i < s->frame_size; i++)
1069 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1070
1071 // internal -> short
1072 for (i = 0; i < s->frame_size; i++)
1073 samples[i] = av_clip_int16(s->int_samples[i]);
1074
1075 *got_frame_ptr = 1;
1076
1077 return buf_size;
1078 }
1079
1080 const FFCodec ff_sonic_decoder = {
1081 .p.name = "sonic",
1082 CODEC_LONG_NAME("Sonic"),
1083 .p.type = AVMEDIA_TYPE_AUDIO,
1084 .p.id = AV_CODEC_ID_SONIC,
1085 .priv_data_size = sizeof(SonicContext),
1086 .init = sonic_decode_init,
1087 .close = sonic_decode_close,
1088 FF_CODEC_DECODE_CB(sonic_decode_frame),
1089 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
1090 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1091 };
1092 #endif /* CONFIG_SONIC_DECODER */
1093
1094 #if CONFIG_SONIC_ENCODER
1095 const FFCodec ff_sonic_encoder = {
1096 .p.name = "sonic",
1097 CODEC_LONG_NAME("Sonic"),
1098 .p.type = AVMEDIA_TYPE_AUDIO,
1099 .p.id = AV_CODEC_ID_SONIC,
1100 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL |
1101 AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
1102 .priv_data_size = sizeof(SonicContext),
1103 .init = sonic_encode_init,
1104 FF_CODEC_ENCODE_CB(sonic_encode_frame),
1105 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1106 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1107 .close = sonic_encode_close,
1108 };
1109 #endif
1110
1111 #if CONFIG_SONIC_LS_ENCODER
1112 const FFCodec ff_sonic_ls_encoder = {
1113 .p.name = "sonicls",
1114 CODEC_LONG_NAME("Sonic lossless"),
1115 .p.type = AVMEDIA_TYPE_AUDIO,
1116 .p.id = AV_CODEC_ID_SONIC_LS,
1117 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL |
1118 AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
1119 .priv_data_size = sizeof(SonicContext),
1120 .init = sonic_encode_init,
1121 FF_CODEC_ENCODE_CB(sonic_encode_frame),
1122 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1123 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1124 .close = sonic_encode_close,
1125 };
1126 #endif
1127