FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libavcodec/sonic.c
Date: 2022-07-07 01:21:54
Exec Total Coverage
Lines: 0 390 0.0%
Branches: 0 266 0.0%

Line Branch Exec Source
1 /*
2 * Simple free lossless/lossy audio codec
3 * Copyright (c) 2004 Alex Beregszaszi
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "config_components.h"
23
24 #include "avcodec.h"
25 #include "codec_internal.h"
26 #include "encode.h"
27 #include "get_bits.h"
28 #include "golomb.h"
29 #include "internal.h"
30 #include "put_golomb.h"
31 #include "rangecoder.h"
32
33
34 /**
35 * @file
36 * Simple free lossless/lossy audio codec
37 * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
38 * Written and designed by Alex Beregszaszi
39 *
40 * TODO:
41 * - CABAC put/get_symbol
42 * - independent quantizer for channels
43 * - >2 channels support
44 * - more decorrelation types
45 * - more tap_quant tests
46 * - selectable intlist writers/readers (bonk-style, golomb, cabac)
47 */
48
49 #define MAX_CHANNELS 2
50
51 #define MID_SIDE 0
52 #define LEFT_SIDE 1
53 #define RIGHT_SIDE 2
54
55 typedef struct SonicContext {
56 int version;
57 int minor_version;
58 int lossless, decorrelation;
59
60 int num_taps, downsampling;
61 double quantization;
62
63 int channels, samplerate, block_align, frame_size;
64
65 int *tap_quant;
66 int *int_samples;
67 int *coded_samples[MAX_CHANNELS];
68
69 // for encoding
70 int *tail;
71 int tail_size;
72 int *window;
73 int window_size;
74
75 // for decoding
76 int *predictor_k;
77 int *predictor_state[MAX_CHANNELS];
78 } SonicContext;
79
80 #define LATTICE_SHIFT 10
81 #define SAMPLE_SHIFT 4
82 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
83 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
84
85 #define BASE_QUANT 0.6
86 #define RATE_VARIATION 3.0
87
88 static inline int shift(int a,int b)
89 {
90 return (a+(1<<(b-1))) >> b;
91 }
92
93 static inline int shift_down(int a,int b)
94 {
95 return (a>>b)+(a<0);
96 }
97
98 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
99 int i;
100
101 #define put_rac(C,S,B) \
102 do{\
103 if(rc_stat){\
104 rc_stat[*(S)][B]++;\
105 rc_stat2[(S)-state][B]++;\
106 }\
107 put_rac(C,S,B);\
108 }while(0)
109
110 if(v){
111 const int a= FFABS(v);
112 const int e= av_log2(a);
113 put_rac(c, state+0, 0);
114 if(e<=9){
115 for(i=0; i<e; i++){
116 put_rac(c, state+1+i, 1); //1..10
117 }
118 put_rac(c, state+1+i, 0);
119
120 for(i=e-1; i>=0; i--){
121 put_rac(c, state+22+i, (a>>i)&1); //22..31
122 }
123
124 if(is_signed)
125 put_rac(c, state+11 + e, v < 0); //11..21
126 }else{
127 for(i=0; i<e; i++){
128 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
129 }
130 put_rac(c, state+1+9, 0);
131
132 for(i=e-1; i>=0; i--){
133 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
134 }
135
136 if(is_signed)
137 put_rac(c, state+11 + 10, v < 0); //11..21
138 }
139 }else{
140 put_rac(c, state+0, 1);
141 }
142 #undef put_rac
143 }
144
145 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
146 if(get_rac(c, state+0))
147 return 0;
148 else{
149 int i, e;
150 unsigned a;
151 e= 0;
152 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
153 e++;
154 if (e > 31)
155 return AVERROR_INVALIDDATA;
156 }
157
158 a= 1;
159 for(i=e-1; i>=0; i--){
160 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
161 }
162
163 e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
164 return (a^e)-e;
165 }
166 }
167
168 #if 1
169 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
170 {
171 int i;
172
173 for (i = 0; i < entries; i++)
174 put_symbol(c, state, buf[i], 1, NULL, NULL);
175
176 return 1;
177 }
178
179 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
180 {
181 int i;
182
183 for (i = 0; i < entries; i++)
184 buf[i] = get_symbol(c, state, 1);
185
186 return 1;
187 }
188 #elif 1
189 static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
190 {
191 int i;
192
193 for (i = 0; i < entries; i++)
194 set_se_golomb(pb, buf[i]);
195
196 return 1;
197 }
198
199 static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
200 {
201 int i;
202
203 for (i = 0; i < entries; i++)
204 buf[i] = get_se_golomb(gb);
205
206 return 1;
207 }
208
209 #else
210
211 #define ADAPT_LEVEL 8
212
213 static int bits_to_store(uint64_t x)
214 {
215 int res = 0;
216
217 while(x)
218 {
219 res++;
220 x >>= 1;
221 }
222 return res;
223 }
224
225 static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
226 {
227 int i, bits;
228
229 if (!max)
230 return;
231
232 bits = bits_to_store(max);
233
234 for (i = 0; i < bits-1; i++)
235 put_bits(pb, 1, value & (1 << i));
236
237 if ( (value | (1 << (bits-1))) <= max)
238 put_bits(pb, 1, value & (1 << (bits-1)));
239 }
240
241 static unsigned int read_uint_max(GetBitContext *gb, int max)
242 {
243 int i, bits, value = 0;
244
245 if (!max)
246 return 0;
247
248 bits = bits_to_store(max);
249
250 for (i = 0; i < bits-1; i++)
251 if (get_bits1(gb))
252 value += 1 << i;
253
254 if ( (value | (1<<(bits-1))) <= max)
255 if (get_bits1(gb))
256 value += 1 << (bits-1);
257
258 return value;
259 }
260
261 static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
262 {
263 int i, j, x = 0, low_bits = 0, max = 0;
264 int step = 256, pos = 0, dominant = 0, any = 0;
265 int *copy, *bits;
266
267 copy = av_calloc(entries, sizeof(*copy));
268 if (!copy)
269 return AVERROR(ENOMEM);
270
271 if (base_2_part)
272 {
273 int energy = 0;
274
275 for (i = 0; i < entries; i++)
276 energy += abs(buf[i]);
277
278 low_bits = bits_to_store(energy / (entries * 2));
279 if (low_bits > 15)
280 low_bits = 15;
281
282 put_bits(pb, 4, low_bits);
283 }
284
285 for (i = 0; i < entries; i++)
286 {
287 put_bits(pb, low_bits, abs(buf[i]));
288 copy[i] = abs(buf[i]) >> low_bits;
289 if (copy[i] > max)
290 max = abs(copy[i]);
291 }
292
293 bits = av_calloc(entries*max, sizeof(*bits));
294 if (!bits)
295 {
296 av_free(copy);
297 return AVERROR(ENOMEM);
298 }
299
300 for (i = 0; i <= max; i++)
301 {
302 for (j = 0; j < entries; j++)
303 if (copy[j] >= i)
304 bits[x++] = copy[j] > i;
305 }
306
307 // store bitstream
308 while (pos < x)
309 {
310 int steplet = step >> 8;
311
312 if (pos + steplet > x)
313 steplet = x - pos;
314
315 for (i = 0; i < steplet; i++)
316 if (bits[i+pos] != dominant)
317 any = 1;
318
319 put_bits(pb, 1, any);
320
321 if (!any)
322 {
323 pos += steplet;
324 step += step / ADAPT_LEVEL;
325 }
326 else
327 {
328 int interloper = 0;
329
330 while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
331 interloper++;
332
333 // note change
334 write_uint_max(pb, interloper, (step >> 8) - 1);
335
336 pos += interloper + 1;
337 step -= step / ADAPT_LEVEL;
338 }
339
340 if (step < 256)
341 {
342 step = 65536 / step;
343 dominant = !dominant;
344 }
345 }
346
347 // store signs
348 for (i = 0; i < entries; i++)
349 if (buf[i])
350 put_bits(pb, 1, buf[i] < 0);
351
352 av_free(bits);
353 av_free(copy);
354
355 return 0;
356 }
357
358 static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
359 {
360 int i, low_bits = 0, x = 0;
361 int n_zeros = 0, step = 256, dominant = 0;
362 int pos = 0, level = 0;
363 int *bits = av_calloc(entries, sizeof(*bits));
364
365 if (!bits)
366 return AVERROR(ENOMEM);
367
368 if (base_2_part)
369 {
370 low_bits = get_bits(gb, 4);
371
372 if (low_bits)
373 for (i = 0; i < entries; i++)
374 buf[i] = get_bits(gb, low_bits);
375 }
376
377 // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
378
379 while (n_zeros < entries)
380 {
381 int steplet = step >> 8;
382
383 if (!get_bits1(gb))
384 {
385 for (i = 0; i < steplet; i++)
386 bits[x++] = dominant;
387
388 if (!dominant)
389 n_zeros += steplet;
390
391 step += step / ADAPT_LEVEL;
392 }
393 else
394 {
395 int actual_run = read_uint_max(gb, steplet-1);
396
397 // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
398
399 for (i = 0; i < actual_run; i++)
400 bits[x++] = dominant;
401
402 bits[x++] = !dominant;
403
404 if (!dominant)
405 n_zeros += actual_run;
406 else
407 n_zeros++;
408
409 step -= step / ADAPT_LEVEL;
410 }
411
412 if (step < 256)
413 {
414 step = 65536 / step;
415 dominant = !dominant;
416 }
417 }
418
419 // reconstruct unsigned values
420 n_zeros = 0;
421 for (i = 0; n_zeros < entries; i++)
422 {
423 while(1)
424 {
425 if (pos >= entries)
426 {
427 pos = 0;
428 level += 1 << low_bits;
429 }
430
431 if (buf[pos] >= level)
432 break;
433
434 pos++;
435 }
436
437 if (bits[i])
438 buf[pos] += 1 << low_bits;
439 else
440 n_zeros++;
441
442 pos++;
443 }
444 av_free(bits);
445
446 // read signs
447 for (i = 0; i < entries; i++)
448 if (buf[i] && get_bits1(gb))
449 buf[i] = -buf[i];
450
451 // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
452
453 return 0;
454 }
455 #endif
456
457 static void predictor_init_state(int *k, int *state, int order)
458 {
459 int i;
460
461 for (i = order-2; i >= 0; i--)
462 {
463 int j, p, x = state[i];
464
465 for (j = 0, p = i+1; p < order; j++,p++)
466 {
467 int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
468 state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
469 x = tmp;
470 }
471 }
472 }
473
474 static int predictor_calc_error(int *k, int *state, int order, int error)
475 {
476 int i, x = error - shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
477
478 #if 1
479 int *k_ptr = &(k[order-2]),
480 *state_ptr = &(state[order-2]);
481 for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
482 {
483 int k_value = *k_ptr, state_value = *state_ptr;
484 x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
485 state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
486 }
487 #else
488 for (i = order-2; i >= 0; i--)
489 {
490 x -= (unsigned)shift_down(k[i] * state[i], LATTICE_SHIFT);
491 state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
492 }
493 #endif
494
495 // don't drift too far, to avoid overflows
496 if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
497 if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
498
499 state[0] = x;
500
501 return x;
502 }
503
504 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
505 // Heavily modified Levinson-Durbin algorithm which
506 // copes better with quantization, and calculates the
507 // actual whitened result as it goes.
508
509 static void modified_levinson_durbin(int *window, int window_entries,
510 int *out, int out_entries, int channels, int *tap_quant)
511 {
512 int i;
513 int *state = window + window_entries;
514
515 memcpy(state, window, window_entries * sizeof(*state));
516
517 for (i = 0; i < out_entries; i++)
518 {
519 int step = (i+1)*channels, k, j;
520 double xx = 0.0, xy = 0.0;
521 #if 1
522 int *x_ptr = &(window[step]);
523 int *state_ptr = &(state[0]);
524 j = window_entries - step;
525 for (;j>0;j--,x_ptr++,state_ptr++)
526 {
527 double x_value = *x_ptr;
528 double state_value = *state_ptr;
529 xx += state_value*state_value;
530 xy += x_value*state_value;
531 }
532 #else
533 for (j = 0; j <= (window_entries - step); j++);
534 {
535 double stepval = window[step+j];
536 double stateval = window[j];
537 // xx += (double)window[j]*(double)window[j];
538 // xy += (double)window[step+j]*(double)window[j];
539 xx += stateval*stateval;
540 xy += stepval*stateval;
541 }
542 #endif
543 if (xx == 0.0)
544 k = 0;
545 else
546 k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
547
548 if (k > (LATTICE_FACTOR/tap_quant[i]))
549 k = LATTICE_FACTOR/tap_quant[i];
550 if (-k > (LATTICE_FACTOR/tap_quant[i]))
551 k = -(LATTICE_FACTOR/tap_quant[i]);
552
553 out[i] = k;
554 k *= tap_quant[i];
555
556 #if 1
557 x_ptr = &(window[step]);
558 state_ptr = &(state[0]);
559 j = window_entries - step;
560 for (;j>0;j--,x_ptr++,state_ptr++)
561 {
562 int x_value = *x_ptr;
563 int state_value = *state_ptr;
564 *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
565 *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
566 }
567 #else
568 for (j=0; j <= (window_entries - step); j++)
569 {
570 int stepval = window[step+j];
571 int stateval=state[j];
572 window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
573 state[j] += shift_down(k * stepval, LATTICE_SHIFT);
574 }
575 #endif
576 }
577 }
578
579 static inline int code_samplerate(int samplerate)
580 {
581 switch (samplerate)
582 {
583 case 44100: return 0;
584 case 22050: return 1;
585 case 11025: return 2;
586 case 96000: return 3;
587 case 48000: return 4;
588 case 32000: return 5;
589 case 24000: return 6;
590 case 16000: return 7;
591 case 8000: return 8;
592 }
593 return AVERROR(EINVAL);
594 }
595
596 static av_cold int sonic_encode_init(AVCodecContext *avctx)
597 {
598 SonicContext *s = avctx->priv_data;
599 int *coded_samples;
600 PutBitContext pb;
601 int i;
602
603 s->version = 2;
604
605 if (avctx->ch_layout.nb_channels > MAX_CHANNELS)
606 {
607 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
608 return AVERROR(EINVAL); /* only stereo or mono for now */
609 }
610
611 if (avctx->ch_layout.nb_channels == 2)
612 s->decorrelation = MID_SIDE;
613 else
614 s->decorrelation = 3;
615
616 if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
617 {
618 s->lossless = 1;
619 s->num_taps = 32;
620 s->downsampling = 1;
621 s->quantization = 0.0;
622 }
623 else
624 {
625 s->num_taps = 128;
626 s->downsampling = 2;
627 s->quantization = 1.0;
628 }
629
630 // max tap 2048
631 if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
632 av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
633 return AVERROR_INVALIDDATA;
634 }
635
636 // generate taps
637 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
638 if (!s->tap_quant)
639 return AVERROR(ENOMEM);
640
641 for (i = 0; i < s->num_taps; i++)
642 s->tap_quant[i] = ff_sqrt(i+1);
643
644 s->channels = avctx->ch_layout.nb_channels;
645 s->samplerate = avctx->sample_rate;
646
647 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
648 s->frame_size = s->channels*s->block_align*s->downsampling;
649
650 s->tail_size = s->num_taps*s->channels;
651 s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
652 if (!s->tail)
653 return AVERROR(ENOMEM);
654
655 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
656 if (!s->predictor_k)
657 return AVERROR(ENOMEM);
658
659 coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
660 if (!coded_samples)
661 return AVERROR(ENOMEM);
662 for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
663 s->coded_samples[i] = coded_samples;
664
665 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
666
667 s->window_size = ((2*s->tail_size)+s->frame_size);
668 s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
669 if (!s->window || !s->int_samples)
670 return AVERROR(ENOMEM);
671
672 avctx->extradata = av_mallocz(16);
673 if (!avctx->extradata)
674 return AVERROR(ENOMEM);
675 init_put_bits(&pb, avctx->extradata, 16*8);
676
677 put_bits(&pb, 2, s->version); // version
678 if (s->version >= 1)
679 {
680 if (s->version >= 2) {
681 put_bits(&pb, 8, s->version);
682 put_bits(&pb, 8, s->minor_version);
683 }
684 put_bits(&pb, 2, s->channels);
685 put_bits(&pb, 4, code_samplerate(s->samplerate));
686 }
687 put_bits(&pb, 1, s->lossless);
688 if (!s->lossless)
689 put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
690 put_bits(&pb, 2, s->decorrelation);
691 put_bits(&pb, 2, s->downsampling);
692 put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
693 put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
694
695 flush_put_bits(&pb);
696 avctx->extradata_size = put_bytes_output(&pb);
697
698 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
699 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
700
701 avctx->frame_size = s->block_align*s->downsampling;
702
703 return 0;
704 }
705
706 static av_cold int sonic_encode_close(AVCodecContext *avctx)
707 {
708 SonicContext *s = avctx->priv_data;
709
710 av_freep(&s->coded_samples[0]);
711 av_freep(&s->predictor_k);
712 av_freep(&s->tail);
713 av_freep(&s->tap_quant);
714 av_freep(&s->window);
715 av_freep(&s->int_samples);
716
717 return 0;
718 }
719
720 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
721 const AVFrame *frame, int *got_packet_ptr)
722 {
723 SonicContext *s = avctx->priv_data;
724 RangeCoder c;
725 int i, j, ch, quant = 0, x = 0;
726 int ret;
727 const short *samples = (const int16_t*)frame->data[0];
728 uint8_t state[32];
729
730 if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
731 return ret;
732
733 ff_init_range_encoder(&c, avpkt->data, avpkt->size);
734 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
735 memset(state, 128, sizeof(state));
736
737 // short -> internal
738 for (i = 0; i < s->frame_size; i++)
739 s->int_samples[i] = samples[i];
740
741 if (!s->lossless)
742 for (i = 0; i < s->frame_size; i++)
743 s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
744
745 switch(s->decorrelation)
746 {
747 case MID_SIDE:
748 for (i = 0; i < s->frame_size; i += s->channels)
749 {
750 s->int_samples[i] += s->int_samples[i+1];
751 s->int_samples[i+1] -= shift(s->int_samples[i], 1);
752 }
753 break;
754 case LEFT_SIDE:
755 for (i = 0; i < s->frame_size; i += s->channels)
756 s->int_samples[i+1] -= s->int_samples[i];
757 break;
758 case RIGHT_SIDE:
759 for (i = 0; i < s->frame_size; i += s->channels)
760 s->int_samples[i] -= s->int_samples[i+1];
761 break;
762 }
763
764 memset(s->window, 0, s->window_size * sizeof(*s->window));
765
766 for (i = 0; i < s->tail_size; i++)
767 s->window[x++] = s->tail[i];
768
769 for (i = 0; i < s->frame_size; i++)
770 s->window[x++] = s->int_samples[i];
771
772 for (i = 0; i < s->tail_size; i++)
773 s->window[x++] = 0;
774
775 for (i = 0; i < s->tail_size; i++)
776 s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
777
778 // generate taps
779 modified_levinson_durbin(s->window, s->window_size,
780 s->predictor_k, s->num_taps, s->channels, s->tap_quant);
781
782 if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
783 return ret;
784
785 for (ch = 0; ch < s->channels; ch++)
786 {
787 x = s->tail_size+ch;
788 for (i = 0; i < s->block_align; i++)
789 {
790 int sum = 0;
791 for (j = 0; j < s->downsampling; j++, x += s->channels)
792 sum += s->window[x];
793 s->coded_samples[ch][i] = sum;
794 }
795 }
796
797 // simple rate control code
798 if (!s->lossless)
799 {
800 double energy1 = 0.0, energy2 = 0.0;
801 for (ch = 0; ch < s->channels; ch++)
802 {
803 for (i = 0; i < s->block_align; i++)
804 {
805 double sample = s->coded_samples[ch][i];
806 energy2 += sample*sample;
807 energy1 += fabs(sample);
808 }
809 }
810
811 energy2 = sqrt(energy2/(s->channels*s->block_align));
812 energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
813
814 // increase bitrate when samples are like a gaussian distribution
815 // reduce bitrate when samples are like a two-tailed exponential distribution
816
817 if (energy2 > energy1)
818 energy2 += (energy2-energy1)*RATE_VARIATION;
819
820 quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
821 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
822
823 quant = av_clip(quant, 1, 65534);
824
825 put_symbol(&c, state, quant, 0, NULL, NULL);
826
827 quant *= SAMPLE_FACTOR;
828 }
829
830 // write out coded samples
831 for (ch = 0; ch < s->channels; ch++)
832 {
833 if (!s->lossless)
834 for (i = 0; i < s->block_align; i++)
835 s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
836
837 if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
838 return ret;
839 }
840
841 avpkt->size = ff_rac_terminate(&c, 0);
842 *got_packet_ptr = 1;
843 return 0;
844
845 }
846 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
847
848 #if CONFIG_SONIC_DECODER
849 static const int samplerate_table[] =
850 { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
851
852 static av_cold int sonic_decode_init(AVCodecContext *avctx)
853 {
854 SonicContext *s = avctx->priv_data;
855 int *tmp;
856 GetBitContext gb;
857 int i;
858 int ret;
859
860 s->channels = avctx->ch_layout.nb_channels;
861 s->samplerate = avctx->sample_rate;
862
863 if (!avctx->extradata)
864 {
865 av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
866 return AVERROR_INVALIDDATA;
867 }
868
869 ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
870 if (ret < 0)
871 return ret;
872
873 s->version = get_bits(&gb, 2);
874 if (s->version >= 2) {
875 s->version = get_bits(&gb, 8);
876 s->minor_version = get_bits(&gb, 8);
877 }
878 if (s->version != 2)
879 {
880 av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
881 return AVERROR_INVALIDDATA;
882 }
883
884 if (s->version >= 1)
885 {
886 int sample_rate_index;
887 s->channels = get_bits(&gb, 2);
888 sample_rate_index = get_bits(&gb, 4);
889 if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
890 av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
891 return AVERROR_INVALIDDATA;
892 }
893 s->samplerate = samplerate_table[sample_rate_index];
894 av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
895 s->channels, s->samplerate);
896 }
897
898 if (s->channels > MAX_CHANNELS || s->channels < 1)
899 {
900 av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
901 return AVERROR_INVALIDDATA;
902 }
903 av_channel_layout_uninit(&avctx->ch_layout);
904 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
905 avctx->ch_layout.nb_channels = s->channels;
906
907 s->lossless = get_bits1(&gb);
908 if (!s->lossless)
909 skip_bits(&gb, 3); // XXX FIXME
910 s->decorrelation = get_bits(&gb, 2);
911 if (s->decorrelation != 3 && s->channels != 2) {
912 av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
913 return AVERROR_INVALIDDATA;
914 }
915
916 s->downsampling = get_bits(&gb, 2);
917 if (!s->downsampling) {
918 av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
919 return AVERROR_INVALIDDATA;
920 }
921
922 s->num_taps = (get_bits(&gb, 5)+1)<<5;
923 if (get_bits1(&gb)) // XXX FIXME
924 av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
925
926 s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
927 s->frame_size = s->channels*s->block_align*s->downsampling;
928 // avctx->frame_size = s->block_align;
929
930 if (s->num_taps * s->channels > s->frame_size) {
931 av_log(avctx, AV_LOG_ERROR,
932 "number of taps times channels (%d * %d) larger than frame size %d\n",
933 s->num_taps, s->channels, s->frame_size);
934 return AVERROR_INVALIDDATA;
935 }
936
937 av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
938 s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
939
940 // generate taps
941 s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
942 if (!s->tap_quant)
943 return AVERROR(ENOMEM);
944
945 for (i = 0; i < s->num_taps; i++)
946 s->tap_quant[i] = ff_sqrt(i+1);
947
948 s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
949
950 tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
951 if (!tmp)
952 return AVERROR(ENOMEM);
953 for (i = 0; i < s->channels; i++, tmp += s->num_taps)
954 s->predictor_state[i] = tmp;
955
956 tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
957 if (!tmp)
958 return AVERROR(ENOMEM);
959 for (i = 0; i < s->channels; i++, tmp += s->block_align)
960 s->coded_samples[i] = tmp;
961
962 s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
963 if (!s->int_samples)
964 return AVERROR(ENOMEM);
965
966 avctx->sample_fmt = AV_SAMPLE_FMT_S16;
967 return 0;
968 }
969
970 static av_cold int sonic_decode_close(AVCodecContext *avctx)
971 {
972 SonicContext *s = avctx->priv_data;
973
974 av_freep(&s->int_samples);
975 av_freep(&s->tap_quant);
976 av_freep(&s->predictor_k);
977 av_freep(&s->predictor_state[0]);
978 av_freep(&s->coded_samples[0]);
979
980 return 0;
981 }
982
983 static int sonic_decode_frame(AVCodecContext *avctx, AVFrame *frame,
984 int *got_frame_ptr, AVPacket *avpkt)
985 {
986 const uint8_t *buf = avpkt->data;
987 int buf_size = avpkt->size;
988 SonicContext *s = avctx->priv_data;
989 RangeCoder c;
990 uint8_t state[32];
991 int i, quant, ch, j, ret;
992 int16_t *samples;
993
994 if (buf_size == 0) return 0;
995
996 frame->nb_samples = s->frame_size / avctx->ch_layout.nb_channels;
997 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
998 return ret;
999 samples = (int16_t *)frame->data[0];
1000
1001 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
1002
1003 memset(state, 128, sizeof(state));
1004 ff_init_range_decoder(&c, buf, buf_size);
1005 ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
1006
1007 intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
1008
1009 // dequantize
1010 for (i = 0; i < s->num_taps; i++)
1011 s->predictor_k[i] *= (unsigned) s->tap_quant[i];
1012
1013 if (s->lossless)
1014 quant = 1;
1015 else
1016 quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
1017
1018 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
1019
1020 for (ch = 0; ch < s->channels; ch++)
1021 {
1022 int x = ch;
1023
1024 if (c.overread > MAX_OVERREAD)
1025 return AVERROR_INVALIDDATA;
1026
1027 predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
1028
1029 intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
1030
1031 for (i = 0; i < s->block_align; i++)
1032 {
1033 for (j = 0; j < s->downsampling - 1; j++)
1034 {
1035 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
1036 x += s->channels;
1037 }
1038
1039 s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
1040 x += s->channels;
1041 }
1042
1043 for (i = 0; i < s->num_taps; i++)
1044 s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
1045 }
1046
1047 switch(s->decorrelation)
1048 {
1049 case MID_SIDE:
1050 for (i = 0; i < s->frame_size; i += s->channels)
1051 {
1052 s->int_samples[i+1] += shift(s->int_samples[i], 1);
1053 s->int_samples[i] -= s->int_samples[i+1];
1054 }
1055 break;
1056 case LEFT_SIDE:
1057 for (i = 0; i < s->frame_size; i += s->channels)
1058 s->int_samples[i+1] += s->int_samples[i];
1059 break;
1060 case RIGHT_SIDE:
1061 for (i = 0; i < s->frame_size; i += s->channels)
1062 s->int_samples[i] += s->int_samples[i+1];
1063 break;
1064 }
1065
1066 if (!s->lossless)
1067 for (i = 0; i < s->frame_size; i++)
1068 s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
1069
1070 // internal -> short
1071 for (i = 0; i < s->frame_size; i++)
1072 samples[i] = av_clip_int16(s->int_samples[i]);
1073
1074 *got_frame_ptr = 1;
1075
1076 return buf_size;
1077 }
1078
1079 const FFCodec ff_sonic_decoder = {
1080 .p.name = "sonic",
1081 .p.long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1082 .p.type = AVMEDIA_TYPE_AUDIO,
1083 .p.id = AV_CODEC_ID_SONIC,
1084 .priv_data_size = sizeof(SonicContext),
1085 .init = sonic_decode_init,
1086 .close = sonic_decode_close,
1087 FF_CODEC_DECODE_CB(sonic_decode_frame),
1088 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF,
1089 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1090 };
1091 #endif /* CONFIG_SONIC_DECODER */
1092
1093 #if CONFIG_SONIC_ENCODER
1094 const FFCodec ff_sonic_encoder = {
1095 .p.name = "sonic",
1096 .p.long_name = NULL_IF_CONFIG_SMALL("Sonic"),
1097 .p.type = AVMEDIA_TYPE_AUDIO,
1098 .p.id = AV_CODEC_ID_SONIC,
1099 .priv_data_size = sizeof(SonicContext),
1100 .init = sonic_encode_init,
1101 FF_CODEC_ENCODE_CB(sonic_encode_frame),
1102 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1103 .p.capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1104 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1105 .close = sonic_encode_close,
1106 };
1107 #endif
1108
1109 #if CONFIG_SONIC_LS_ENCODER
1110 const FFCodec ff_sonic_ls_encoder = {
1111 .p.name = "sonicls",
1112 .p.long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
1113 .p.type = AVMEDIA_TYPE_AUDIO,
1114 .p.id = AV_CODEC_ID_SONIC_LS,
1115 .priv_data_size = sizeof(SonicContext),
1116 .init = sonic_encode_init,
1117 FF_CODEC_ENCODE_CB(sonic_encode_frame),
1118 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
1119 .p.capabilities = AV_CODEC_CAP_EXPERIMENTAL,
1120 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1121 .close = sonic_encode_close,
1122 };
1123 #endif
1124