| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Simple free lossless/lossy audio codec | ||
| 3 | * Copyright (c) 2004 Alex Beregszaszi | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include "config_components.h" | ||
| 23 | |||
| 24 | #include "libavutil/mem.h" | ||
| 25 | #include "avcodec.h" | ||
| 26 | #include "codec_internal.h" | ||
| 27 | #include "decode.h" | ||
| 28 | #include "encode.h" | ||
| 29 | #include "get_bits.h" | ||
| 30 | #include "golomb.h" | ||
| 31 | #include "put_golomb.h" | ||
| 32 | #include "rangecoder.h" | ||
| 33 | |||
| 34 | |||
| 35 | /** | ||
| 36 | * @file | ||
| 37 | * Simple free lossless/lossy audio codec | ||
| 38 | * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk) | ||
| 39 | * Written and designed by Alex Beregszaszi | ||
| 40 | * | ||
| 41 | * TODO: | ||
| 42 | * - CABAC put/get_symbol | ||
| 43 | * - independent quantizer for channels | ||
| 44 | * - >2 channels support | ||
| 45 | * - more decorrelation types | ||
| 46 | * - more tap_quant tests | ||
| 47 | * - selectable intlist writers/readers (bonk-style, golomb, cabac) | ||
| 48 | */ | ||
| 49 | |||
| 50 | #define MAX_CHANNELS 2 | ||
| 51 | |||
| 52 | #define MID_SIDE 0 | ||
| 53 | #define LEFT_SIDE 1 | ||
| 54 | #define RIGHT_SIDE 2 | ||
| 55 | |||
| 56 | typedef struct SonicContext { | ||
| 57 | int version; | ||
| 58 | int minor_version; | ||
| 59 | int lossless, decorrelation; | ||
| 60 | |||
| 61 | int num_taps, downsampling; | ||
| 62 | double quantization; | ||
| 63 | |||
| 64 | int channels, samplerate, block_align, frame_size; | ||
| 65 | |||
| 66 | int *tap_quant; | ||
| 67 | int *int_samples; | ||
| 68 | int *coded_samples[MAX_CHANNELS]; | ||
| 69 | |||
| 70 | // for encoding | ||
| 71 | int *tail; | ||
| 72 | int tail_size; | ||
| 73 | int *window; | ||
| 74 | int window_size; | ||
| 75 | |||
| 76 | // for decoding | ||
| 77 | int *predictor_k; | ||
| 78 | int *predictor_state[MAX_CHANNELS]; | ||
| 79 | } SonicContext; | ||
| 80 | |||
| 81 | #define LATTICE_SHIFT 10 | ||
| 82 | #define SAMPLE_SHIFT 4 | ||
| 83 | #define LATTICE_FACTOR (1 << LATTICE_SHIFT) | ||
| 84 | #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) | ||
| 85 | |||
| 86 | #define BASE_QUANT 0.6 | ||
| 87 | #define RATE_VARIATION 3.0 | ||
| 88 | |||
| 89 | ✗ | static inline int shift(int a,int b) | |
| 90 | { | ||
| 91 | ✗ | return (a+(1<<(b-1))) >> b; | |
| 92 | } | ||
| 93 | |||
| 94 | ✗ | static inline int shift_down(int a,int b) | |
| 95 | { | ||
| 96 | ✗ | return (a>>b)+(a<0); | |
| 97 | } | ||
| 98 | |||
| 99 | |||
| 100 | #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER | ||
| 101 | // Heavily modified Levinson-Durbin algorithm which | ||
| 102 | // copes better with quantization, and calculates the | ||
| 103 | // actual whitened result as it goes. | ||
| 104 | |||
| 105 | static void modified_levinson_durbin(int *window, int window_entries, | ||
| 106 | int *out, int out_entries, int channels, int *tap_quant) | ||
| 107 | { | ||
| 108 | int i; | ||
| 109 | int *state = window + window_entries; | ||
| 110 | |||
| 111 | memcpy(state, window, window_entries * sizeof(*state)); | ||
| 112 | |||
| 113 | for (i = 0; i < out_entries; i++) | ||
| 114 | { | ||
| 115 | int step = (i+1)*channels, k, j; | ||
| 116 | double xx = 0.0, xy = 0.0; | ||
| 117 | int *x_ptr = &(window[step]); | ||
| 118 | int *state_ptr = &(state[0]); | ||
| 119 | j = window_entries - step; | ||
| 120 | for (;j>0;j--,x_ptr++,state_ptr++) | ||
| 121 | { | ||
| 122 | double x_value = *x_ptr; | ||
| 123 | double state_value = *state_ptr; | ||
| 124 | xx += state_value*state_value; | ||
| 125 | xy += x_value*state_value; | ||
| 126 | } | ||
| 127 | if (xx == 0.0) | ||
| 128 | k = 0; | ||
| 129 | else | ||
| 130 | k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5)); | ||
| 131 | |||
| 132 | if (k > (LATTICE_FACTOR/tap_quant[i])) | ||
| 133 | k = LATTICE_FACTOR/tap_quant[i]; | ||
| 134 | if (-k > (LATTICE_FACTOR/tap_quant[i])) | ||
| 135 | k = -(LATTICE_FACTOR/tap_quant[i]); | ||
| 136 | |||
| 137 | out[i] = k; | ||
| 138 | k *= tap_quant[i]; | ||
| 139 | |||
| 140 | x_ptr = &(window[step]); | ||
| 141 | state_ptr = &(state[0]); | ||
| 142 | j = window_entries - step; | ||
| 143 | for (;j>0;j--,x_ptr++,state_ptr++) | ||
| 144 | { | ||
| 145 | int x_value = *x_ptr; | ||
| 146 | int state_value = *state_ptr; | ||
| 147 | *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT); | ||
| 148 | *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | static inline int code_samplerate(int samplerate) | ||
| 154 | { | ||
| 155 | switch (samplerate) | ||
| 156 | { | ||
| 157 | case 44100: return 0; | ||
| 158 | case 22050: return 1; | ||
| 159 | case 11025: return 2; | ||
| 160 | case 96000: return 3; | ||
| 161 | case 48000: return 4; | ||
| 162 | case 32000: return 5; | ||
| 163 | case 24000: return 6; | ||
| 164 | case 16000: return 7; | ||
| 165 | case 8000: return 8; | ||
| 166 | } | ||
| 167 | return AVERROR(EINVAL); | ||
| 168 | } | ||
| 169 | |||
| 170 | static av_cold int sonic_encode_init(AVCodecContext *avctx) | ||
| 171 | { | ||
| 172 | SonicContext *s = avctx->priv_data; | ||
| 173 | int *coded_samples; | ||
| 174 | PutBitContext pb; | ||
| 175 | int i; | ||
| 176 | |||
| 177 | s->version = 2; | ||
| 178 | |||
| 179 | if (avctx->ch_layout.nb_channels > MAX_CHANNELS) | ||
| 180 | { | ||
| 181 | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); | ||
| 182 | return AVERROR(EINVAL); /* only stereo or mono for now */ | ||
| 183 | } | ||
| 184 | |||
| 185 | if (avctx->ch_layout.nb_channels == 2) | ||
| 186 | s->decorrelation = MID_SIDE; | ||
| 187 | else | ||
| 188 | s->decorrelation = 3; | ||
| 189 | |||
| 190 | if (avctx->codec->id == AV_CODEC_ID_SONIC_LS) | ||
| 191 | { | ||
| 192 | s->lossless = 1; | ||
| 193 | s->num_taps = 32; | ||
| 194 | s->downsampling = 1; | ||
| 195 | s->quantization = 0.0; | ||
| 196 | } | ||
| 197 | else | ||
| 198 | { | ||
| 199 | s->num_taps = 128; | ||
| 200 | s->downsampling = 2; | ||
| 201 | s->quantization = 1.0; | ||
| 202 | } | ||
| 203 | |||
| 204 | // max tap 2048 | ||
| 205 | if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) { | ||
| 206 | av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n"); | ||
| 207 | return AVERROR_INVALIDDATA; | ||
| 208 | } | ||
| 209 | |||
| 210 | // generate taps | ||
| 211 | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); | ||
| 212 | if (!s->tap_quant) | ||
| 213 | return AVERROR(ENOMEM); | ||
| 214 | |||
| 215 | for (i = 0; i < s->num_taps; i++) | ||
| 216 | s->tap_quant[i] = ff_sqrt(i+1); | ||
| 217 | |||
| 218 | s->channels = avctx->ch_layout.nb_channels; | ||
| 219 | s->samplerate = avctx->sample_rate; | ||
| 220 | |||
| 221 | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); | ||
| 222 | s->frame_size = s->channels*s->block_align*s->downsampling; | ||
| 223 | |||
| 224 | s->tail_size = s->num_taps*s->channels; | ||
| 225 | s->tail = av_calloc(s->tail_size, sizeof(*s->tail)); | ||
| 226 | if (!s->tail) | ||
| 227 | return AVERROR(ENOMEM); | ||
| 228 | |||
| 229 | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) ); | ||
| 230 | if (!s->predictor_k) | ||
| 231 | return AVERROR(ENOMEM); | ||
| 232 | |||
| 233 | coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples)); | ||
| 234 | if (!coded_samples) | ||
| 235 | return AVERROR(ENOMEM); | ||
| 236 | for (i = 0; i < s->channels; i++, coded_samples += s->block_align) | ||
| 237 | s->coded_samples[i] = coded_samples; | ||
| 238 | |||
| 239 | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); | ||
| 240 | |||
| 241 | s->window_size = ((2*s->tail_size)+s->frame_size); | ||
| 242 | s->window = av_calloc(s->window_size, 2 * sizeof(*s->window)); | ||
| 243 | if (!s->window || !s->int_samples) | ||
| 244 | return AVERROR(ENOMEM); | ||
| 245 | |||
| 246 | avctx->extradata = av_mallocz(16); | ||
| 247 | if (!avctx->extradata) | ||
| 248 | return AVERROR(ENOMEM); | ||
| 249 | init_put_bits(&pb, avctx->extradata, 16*8); | ||
| 250 | |||
| 251 | put_bits(&pb, 2, s->version); // version | ||
| 252 | if (s->version >= 1) | ||
| 253 | { | ||
| 254 | if (s->version >= 2) { | ||
| 255 | put_bits(&pb, 8, s->version); | ||
| 256 | put_bits(&pb, 8, s->minor_version); | ||
| 257 | } | ||
| 258 | put_bits(&pb, 2, s->channels); | ||
| 259 | put_bits(&pb, 4, code_samplerate(s->samplerate)); | ||
| 260 | } | ||
| 261 | put_bits(&pb, 1, s->lossless); | ||
| 262 | if (!s->lossless) | ||
| 263 | put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision | ||
| 264 | put_bits(&pb, 2, s->decorrelation); | ||
| 265 | put_bits(&pb, 2, s->downsampling); | ||
| 266 | put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024 | ||
| 267 | put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table | ||
| 268 | |||
| 269 | flush_put_bits(&pb); | ||
| 270 | avctx->extradata_size = put_bytes_output(&pb); | ||
| 271 | |||
| 272 | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", | ||
| 273 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); | ||
| 274 | |||
| 275 | avctx->frame_size = s->block_align*s->downsampling; | ||
| 276 | |||
| 277 | return 0; | ||
| 278 | } | ||
| 279 | |||
| 280 | static av_cold int sonic_encode_close(AVCodecContext *avctx) | ||
| 281 | { | ||
| 282 | SonicContext *s = avctx->priv_data; | ||
| 283 | |||
| 284 | av_freep(&s->coded_samples[0]); | ||
| 285 | av_freep(&s->predictor_k); | ||
| 286 | av_freep(&s->tail); | ||
| 287 | av_freep(&s->tap_quant); | ||
| 288 | av_freep(&s->window); | ||
| 289 | av_freep(&s->int_samples); | ||
| 290 | |||
| 291 | return 0; | ||
| 292 | } | ||
| 293 | |||
| 294 | static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){ | ||
| 295 | int i; | ||
| 296 | |||
| 297 | #define put_rac(C,S,B) \ | ||
| 298 | do{\ | ||
| 299 | if(rc_stat){\ | ||
| 300 | rc_stat[*(S)][B]++;\ | ||
| 301 | rc_stat2[(S)-state][B]++;\ | ||
| 302 | }\ | ||
| 303 | put_rac(C,S,B);\ | ||
| 304 | }while(0) | ||
| 305 | |||
| 306 | if(v){ | ||
| 307 | const int a= FFABS(v); | ||
| 308 | const int e= av_log2(a); | ||
| 309 | put_rac(c, state+0, 0); | ||
| 310 | if(e<=9){ | ||
| 311 | for(i=0; i<e; i++){ | ||
| 312 | put_rac(c, state+1+i, 1); //1..10 | ||
| 313 | } | ||
| 314 | put_rac(c, state+1+i, 0); | ||
| 315 | |||
| 316 | for(i=e-1; i>=0; i--){ | ||
| 317 | put_rac(c, state+22+i, (a>>i)&1); //22..31 | ||
| 318 | } | ||
| 319 | |||
| 320 | if(is_signed) | ||
| 321 | put_rac(c, state+11 + e, v < 0); //11..21 | ||
| 322 | }else{ | ||
| 323 | for(i=0; i<e; i++){ | ||
| 324 | put_rac(c, state+1+FFMIN(i,9), 1); //1..10 | ||
| 325 | } | ||
| 326 | put_rac(c, state+1+9, 0); | ||
| 327 | |||
| 328 | for(i=e-1; i>=0; i--){ | ||
| 329 | put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 | ||
| 330 | } | ||
| 331 | |||
| 332 | if(is_signed) | ||
| 333 | put_rac(c, state+11 + 10, v < 0); //11..21 | ||
| 334 | } | ||
| 335 | }else{ | ||
| 336 | put_rac(c, state+0, 1); | ||
| 337 | } | ||
| 338 | #undef put_rac | ||
| 339 | } | ||
| 340 | |||
| 341 | static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) | ||
| 342 | { | ||
| 343 | int i; | ||
| 344 | |||
| 345 | for (i = 0; i < entries; i++) | ||
| 346 | put_symbol(c, state, buf[i], 1, NULL, NULL); | ||
| 347 | |||
| 348 | return 1; | ||
| 349 | } | ||
| 350 | |||
| 351 | static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, | ||
| 352 | const AVFrame *frame, int *got_packet_ptr) | ||
| 353 | { | ||
| 354 | SonicContext *s = avctx->priv_data; | ||
| 355 | RangeCoder c; | ||
| 356 | int i, j, ch, quant = 0, x = 0; | ||
| 357 | int ret; | ||
| 358 | const short *samples = (const int16_t*)frame->data[0]; | ||
| 359 | uint8_t state[32]; | ||
| 360 | |||
| 361 | if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0) | ||
| 362 | return ret; | ||
| 363 | |||
| 364 | ff_init_range_encoder(&c, avpkt->data, avpkt->size); | ||
| 365 | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); | ||
| 366 | memset(state, 128, sizeof(state)); | ||
| 367 | |||
| 368 | // short -> internal | ||
| 369 | for (i = 0; i < s->frame_size; i++) | ||
| 370 | s->int_samples[i] = samples[i]; | ||
| 371 | |||
| 372 | if (!s->lossless) | ||
| 373 | for (i = 0; i < s->frame_size; i++) | ||
| 374 | s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT; | ||
| 375 | |||
| 376 | switch(s->decorrelation) | ||
| 377 | { | ||
| 378 | case MID_SIDE: | ||
| 379 | for (i = 0; i < s->frame_size; i += s->channels) | ||
| 380 | { | ||
| 381 | s->int_samples[i] += s->int_samples[i+1]; | ||
| 382 | s->int_samples[i+1] -= shift(s->int_samples[i], 1); | ||
| 383 | } | ||
| 384 | break; | ||
| 385 | case LEFT_SIDE: | ||
| 386 | for (i = 0; i < s->frame_size; i += s->channels) | ||
| 387 | s->int_samples[i+1] -= s->int_samples[i]; | ||
| 388 | break; | ||
| 389 | case RIGHT_SIDE: | ||
| 390 | for (i = 0; i < s->frame_size; i += s->channels) | ||
| 391 | s->int_samples[i] -= s->int_samples[i+1]; | ||
| 392 | break; | ||
| 393 | } | ||
| 394 | |||
| 395 | memset(s->window, 0, s->window_size * sizeof(*s->window)); | ||
| 396 | |||
| 397 | for (i = 0; i < s->tail_size; i++) | ||
| 398 | s->window[x++] = s->tail[i]; | ||
| 399 | |||
| 400 | for (i = 0; i < s->frame_size; i++) | ||
| 401 | s->window[x++] = s->int_samples[i]; | ||
| 402 | |||
| 403 | for (i = 0; i < s->tail_size; i++) | ||
| 404 | s->window[x++] = 0; | ||
| 405 | |||
| 406 | for (i = 0; i < s->tail_size; i++) | ||
| 407 | s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i]; | ||
| 408 | |||
| 409 | // generate taps | ||
| 410 | modified_levinson_durbin(s->window, s->window_size, | ||
| 411 | s->predictor_k, s->num_taps, s->channels, s->tap_quant); | ||
| 412 | |||
| 413 | if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0) | ||
| 414 | return ret; | ||
| 415 | |||
| 416 | for (ch = 0; ch < s->channels; ch++) | ||
| 417 | { | ||
| 418 | x = s->tail_size+ch; | ||
| 419 | for (i = 0; i < s->block_align; i++) | ||
| 420 | { | ||
| 421 | int sum = 0; | ||
| 422 | for (j = 0; j < s->downsampling; j++, x += s->channels) | ||
| 423 | sum += s->window[x]; | ||
| 424 | s->coded_samples[ch][i] = sum; | ||
| 425 | } | ||
| 426 | } | ||
| 427 | |||
| 428 | // simple rate control code | ||
| 429 | if (!s->lossless) | ||
| 430 | { | ||
| 431 | double energy1 = 0.0, energy2 = 0.0; | ||
| 432 | for (ch = 0; ch < s->channels; ch++) | ||
| 433 | { | ||
| 434 | for (i = 0; i < s->block_align; i++) | ||
| 435 | { | ||
| 436 | double sample = s->coded_samples[ch][i]; | ||
| 437 | energy2 += sample*sample; | ||
| 438 | energy1 += fabs(sample); | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | energy2 = sqrt(energy2/(s->channels*s->block_align)); | ||
| 443 | energy1 = M_SQRT2*energy1/(s->channels*s->block_align); | ||
| 444 | |||
| 445 | // increase bitrate when samples are like a gaussian distribution | ||
| 446 | // reduce bitrate when samples are like a two-tailed exponential distribution | ||
| 447 | |||
| 448 | if (energy2 > energy1) | ||
| 449 | energy2 += (energy2-energy1)*RATE_VARIATION; | ||
| 450 | |||
| 451 | quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR); | ||
| 452 | // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2); | ||
| 453 | |||
| 454 | quant = av_clip(quant, 1, 65534); | ||
| 455 | |||
| 456 | put_symbol(&c, state, quant, 0, NULL, NULL); | ||
| 457 | |||
| 458 | quant *= SAMPLE_FACTOR; | ||
| 459 | } | ||
| 460 | |||
| 461 | // write out coded samples | ||
| 462 | for (ch = 0; ch < s->channels; ch++) | ||
| 463 | { | ||
| 464 | if (!s->lossless) | ||
| 465 | for (i = 0; i < s->block_align; i++) | ||
| 466 | s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant); | ||
| 467 | |||
| 468 | if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0) | ||
| 469 | return ret; | ||
| 470 | } | ||
| 471 | |||
| 472 | avpkt->size = ff_rac_terminate(&c, 0); | ||
| 473 | *got_packet_ptr = 1; | ||
| 474 | return 0; | ||
| 475 | |||
| 476 | } | ||
| 477 | #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */ | ||
| 478 | |||
| 479 | #if CONFIG_SONIC_DECODER | ||
| 480 | static const int samplerate_table[] = | ||
| 481 | { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 }; | ||
| 482 | |||
| 483 | ✗ | static av_cold int sonic_decode_init(AVCodecContext *avctx) | |
| 484 | { | ||
| 485 | ✗ | SonicContext *s = avctx->priv_data; | |
| 486 | int *tmp; | ||
| 487 | GetBitContext gb; | ||
| 488 | int i; | ||
| 489 | int ret; | ||
| 490 | |||
| 491 | ✗ | s->channels = avctx->ch_layout.nb_channels; | |
| 492 | ✗ | s->samplerate = avctx->sample_rate; | |
| 493 | |||
| 494 | ✗ | if (!avctx->extradata) | |
| 495 | { | ||
| 496 | ✗ | av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n"); | |
| 497 | ✗ | return AVERROR_INVALIDDATA; | |
| 498 | } | ||
| 499 | |||
| 500 | ✗ | ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size); | |
| 501 | ✗ | if (ret < 0) | |
| 502 | ✗ | return ret; | |
| 503 | |||
| 504 | ✗ | s->version = get_bits(&gb, 2); | |
| 505 | ✗ | if (s->version >= 2) { | |
| 506 | ✗ | s->version = get_bits(&gb, 8); | |
| 507 | ✗ | s->minor_version = get_bits(&gb, 8); | |
| 508 | } | ||
| 509 | ✗ | if (s->version != 2) | |
| 510 | { | ||
| 511 | ✗ | av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n"); | |
| 512 | ✗ | return AVERROR_INVALIDDATA; | |
| 513 | } | ||
| 514 | |||
| 515 | ✗ | if (s->version >= 1) | |
| 516 | { | ||
| 517 | int sample_rate_index; | ||
| 518 | ✗ | s->channels = get_bits(&gb, 2); | |
| 519 | ✗ | sample_rate_index = get_bits(&gb, 4); | |
| 520 | ✗ | if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) { | |
| 521 | ✗ | av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index); | |
| 522 | ✗ | return AVERROR_INVALIDDATA; | |
| 523 | } | ||
| 524 | ✗ | s->samplerate = samplerate_table[sample_rate_index]; | |
| 525 | ✗ | av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n", | |
| 526 | s->channels, s->samplerate); | ||
| 527 | } | ||
| 528 | |||
| 529 | ✗ | if (s->channels > MAX_CHANNELS || s->channels < 1) | |
| 530 | { | ||
| 531 | ✗ | av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n"); | |
| 532 | ✗ | return AVERROR_INVALIDDATA; | |
| 533 | } | ||
| 534 | ✗ | av_channel_layout_uninit(&avctx->ch_layout); | |
| 535 | ✗ | avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC; | |
| 536 | ✗ | avctx->ch_layout.nb_channels = s->channels; | |
| 537 | |||
| 538 | ✗ | s->lossless = get_bits1(&gb); | |
| 539 | ✗ | if (!s->lossless) | |
| 540 | ✗ | skip_bits(&gb, 3); // XXX FIXME | |
| 541 | ✗ | s->decorrelation = get_bits(&gb, 2); | |
| 542 | ✗ | if (s->decorrelation != 3 && s->channels != 2) { | |
| 543 | ✗ | av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation); | |
| 544 | ✗ | return AVERROR_INVALIDDATA; | |
| 545 | } | ||
| 546 | |||
| 547 | ✗ | s->downsampling = get_bits(&gb, 2); | |
| 548 | ✗ | if (!s->downsampling) { | |
| 549 | ✗ | av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n"); | |
| 550 | ✗ | return AVERROR_INVALIDDATA; | |
| 551 | } | ||
| 552 | |||
| 553 | ✗ | s->num_taps = (get_bits(&gb, 5)+1)<<5; | |
| 554 | ✗ | if (get_bits1(&gb)) // XXX FIXME | |
| 555 | ✗ | av_log(avctx, AV_LOG_INFO, "Custom quant table\n"); | |
| 556 | |||
| 557 | ✗ | if (s->num_taps > 128) | |
| 558 | ✗ | return AVERROR_INVALIDDATA; | |
| 559 | |||
| 560 | ✗ | s->block_align = 2048LL*s->samplerate/(44100*s->downsampling); | |
| 561 | ✗ | s->frame_size = s->channels*s->block_align*s->downsampling; | |
| 562 | // avctx->frame_size = s->block_align; | ||
| 563 | |||
| 564 | ✗ | if (s->num_taps * s->channels > s->frame_size) { | |
| 565 | ✗ | av_log(avctx, AV_LOG_ERROR, | |
| 566 | "number of taps times channels (%d * %d) larger than frame size %d\n", | ||
| 567 | s->num_taps, s->channels, s->frame_size); | ||
| 568 | ✗ | return AVERROR_INVALIDDATA; | |
| 569 | } | ||
| 570 | |||
| 571 | ✗ | av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n", | |
| 572 | s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling); | ||
| 573 | |||
| 574 | // generate taps | ||
| 575 | ✗ | s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant)); | |
| 576 | ✗ | if (!s->tap_quant) | |
| 577 | ✗ | return AVERROR(ENOMEM); | |
| 578 | |||
| 579 | ✗ | for (i = 0; i < s->num_taps; i++) | |
| 580 | ✗ | s->tap_quant[i] = ff_sqrt(i+1); | |
| 581 | |||
| 582 | ✗ | s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k)); | |
| 583 | |||
| 584 | ✗ | tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state)); | |
| 585 | ✗ | if (!tmp) | |
| 586 | ✗ | return AVERROR(ENOMEM); | |
| 587 | ✗ | for (i = 0; i < s->channels; i++, tmp += s->num_taps) | |
| 588 | ✗ | s->predictor_state[i] = tmp; | |
| 589 | |||
| 590 | ✗ | tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples)); | |
| 591 | ✗ | if (!tmp) | |
| 592 | ✗ | return AVERROR(ENOMEM); | |
| 593 | ✗ | for (i = 0; i < s->channels; i++, tmp += s->block_align) | |
| 594 | ✗ | s->coded_samples[i] = tmp; | |
| 595 | |||
| 596 | ✗ | s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples)); | |
| 597 | ✗ | if (!s->int_samples) | |
| 598 | ✗ | return AVERROR(ENOMEM); | |
| 599 | |||
| 600 | ✗ | avctx->sample_fmt = AV_SAMPLE_FMT_S16; | |
| 601 | ✗ | return 0; | |
| 602 | } | ||
| 603 | |||
| 604 | ✗ | static av_cold int sonic_decode_close(AVCodecContext *avctx) | |
| 605 | { | ||
| 606 | ✗ | SonicContext *s = avctx->priv_data; | |
| 607 | |||
| 608 | ✗ | av_freep(&s->int_samples); | |
| 609 | ✗ | av_freep(&s->tap_quant); | |
| 610 | ✗ | av_freep(&s->predictor_k); | |
| 611 | ✗ | av_freep(&s->predictor_state[0]); | |
| 612 | ✗ | av_freep(&s->coded_samples[0]); | |
| 613 | |||
| 614 | ✗ | return 0; | |
| 615 | } | ||
| 616 | |||
| 617 | ✗ | static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ | |
| 618 | ✗ | if(get_rac(c, state+0)) | |
| 619 | ✗ | return 0; | |
| 620 | else{ | ||
| 621 | int i, e; | ||
| 622 | unsigned a; | ||
| 623 | ✗ | e= 0; | |
| 624 | ✗ | while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 | |
| 625 | ✗ | e++; | |
| 626 | ✗ | if (e > 31) | |
| 627 | ✗ | return AVERROR_INVALIDDATA; | |
| 628 | } | ||
| 629 | |||
| 630 | ✗ | a= 1; | |
| 631 | ✗ | for(i=e-1; i>=0; i--){ | |
| 632 | ✗ | a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 | |
| 633 | } | ||
| 634 | |||
| 635 | ✗ | e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21 | |
| 636 | ✗ | return (a^e)-e; | |
| 637 | } | ||
| 638 | } | ||
| 639 | |||
| 640 | ✗ | static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part) | |
| 641 | { | ||
| 642 | int i; | ||
| 643 | |||
| 644 | ✗ | for (i = 0; i < entries; i++) | |
| 645 | ✗ | buf[i] = get_symbol(c, state, 1); | |
| 646 | |||
| 647 | ✗ | return 1; | |
| 648 | } | ||
| 649 | |||
| 650 | ✗ | static void predictor_init_state(int *k, int *state, int order) | |
| 651 | { | ||
| 652 | int i; | ||
| 653 | |||
| 654 | ✗ | for (i = order-2; i >= 0; i--) | |
| 655 | { | ||
| 656 | ✗ | int j, p, x = state[i]; | |
| 657 | |||
| 658 | ✗ | for (j = 0, p = i+1; p < order; j++,p++) | |
| 659 | { | ||
| 660 | ✗ | int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT); | |
| 661 | ✗ | state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT); | |
| 662 | ✗ | x = tmp; | |
| 663 | } | ||
| 664 | } | ||
| 665 | ✗ | } | |
| 666 | |||
| 667 | ✗ | static int predictor_calc_error(int *k, int *state, int order, int error) | |
| 668 | { | ||
| 669 | ✗ | int i, x = error - (unsigned)shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT); | |
| 670 | |||
| 671 | ✗ | int *k_ptr = &(k[order-2]), | |
| 672 | ✗ | *state_ptr = &(state[order-2]); | |
| 673 | ✗ | for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) | |
| 674 | { | ||
| 675 | ✗ | int k_value = *k_ptr, state_value = *state_ptr; | |
| 676 | ✗ | x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT); | |
| 677 | ✗ | state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT); | |
| 678 | } | ||
| 679 | |||
| 680 | // don't drift too far, to avoid overflows | ||
| 681 | ✗ | if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16); | |
| 682 | ✗ | if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16); | |
| 683 | |||
| 684 | ✗ | state[0] = x; | |
| 685 | |||
| 686 | ✗ | return x; | |
| 687 | } | ||
| 688 | |||
| 689 | ✗ | static int sonic_decode_frame(AVCodecContext *avctx, AVFrame *frame, | |
| 690 | int *got_frame_ptr, AVPacket *avpkt) | ||
| 691 | { | ||
| 692 | ✗ | const uint8_t *buf = avpkt->data; | |
| 693 | ✗ | int buf_size = avpkt->size; | |
| 694 | ✗ | SonicContext *s = avctx->priv_data; | |
| 695 | RangeCoder c; | ||
| 696 | uint8_t state[32]; | ||
| 697 | int i, quant, ch, j, ret; | ||
| 698 | int16_t *samples; | ||
| 699 | |||
| 700 | ✗ | if (buf_size == 0) return 0; | |
| 701 | |||
| 702 | ✗ | frame->nb_samples = s->frame_size / avctx->ch_layout.nb_channels; | |
| 703 | ✗ | if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) | |
| 704 | ✗ | return ret; | |
| 705 | ✗ | samples = (int16_t *)frame->data[0]; | |
| 706 | |||
| 707 | // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size); | ||
| 708 | |||
| 709 | ✗ | memset(state, 128, sizeof(state)); | |
| 710 | ✗ | ff_init_range_decoder(&c, buf, buf_size); | |
| 711 | ✗ | ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8); | |
| 712 | |||
| 713 | ✗ | intlist_read(&c, state, s->predictor_k, s->num_taps, 0); | |
| 714 | |||
| 715 | // dequantize | ||
| 716 | ✗ | for (i = 0; i < s->num_taps; i++) | |
| 717 | ✗ | s->predictor_k[i] *= (unsigned) s->tap_quant[i]; | |
| 718 | |||
| 719 | ✗ | if (s->lossless) | |
| 720 | ✗ | quant = 1; | |
| 721 | else | ||
| 722 | ✗ | quant = get_symbol(&c, state, 0) * (unsigned)SAMPLE_FACTOR; | |
| 723 | |||
| 724 | // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant); | ||
| 725 | |||
| 726 | ✗ | for (ch = 0; ch < s->channels; ch++) | |
| 727 | { | ||
| 728 | ✗ | int x = ch; | |
| 729 | |||
| 730 | ✗ | if (c.overread > MAX_OVERREAD) | |
| 731 | ✗ | return AVERROR_INVALIDDATA; | |
| 732 | |||
| 733 | ✗ | predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps); | |
| 734 | |||
| 735 | ✗ | intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1); | |
| 736 | |||
| 737 | ✗ | for (i = 0; i < s->block_align; i++) | |
| 738 | { | ||
| 739 | ✗ | for (j = 0; j < s->downsampling - 1; j++) | |
| 740 | { | ||
| 741 | ✗ | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0); | |
| 742 | ✗ | x += s->channels; | |
| 743 | } | ||
| 744 | |||
| 745 | ✗ | s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant); | |
| 746 | ✗ | x += s->channels; | |
| 747 | } | ||
| 748 | |||
| 749 | ✗ | for (i = 0; i < s->num_taps; i++) | |
| 750 | ✗ | s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels]; | |
| 751 | } | ||
| 752 | |||
| 753 | ✗ | switch(s->decorrelation) | |
| 754 | { | ||
| 755 | ✗ | case MID_SIDE: | |
| 756 | ✗ | for (i = 0; i < s->frame_size; i += s->channels) | |
| 757 | { | ||
| 758 | ✗ | s->int_samples[i+1] += shift(s->int_samples[i], 1); | |
| 759 | ✗ | s->int_samples[i] -= s->int_samples[i+1]; | |
| 760 | } | ||
| 761 | ✗ | break; | |
| 762 | ✗ | case LEFT_SIDE: | |
| 763 | ✗ | for (i = 0; i < s->frame_size; i += s->channels) | |
| 764 | ✗ | s->int_samples[i+1] += s->int_samples[i]; | |
| 765 | ✗ | break; | |
| 766 | ✗ | case RIGHT_SIDE: | |
| 767 | ✗ | for (i = 0; i < s->frame_size; i += s->channels) | |
| 768 | ✗ | s->int_samples[i] += s->int_samples[i+1]; | |
| 769 | ✗ | break; | |
| 770 | } | ||
| 771 | |||
| 772 | ✗ | if (!s->lossless) | |
| 773 | ✗ | for (i = 0; i < s->frame_size; i++) | |
| 774 | ✗ | s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT); | |
| 775 | |||
| 776 | // internal -> short | ||
| 777 | ✗ | for (i = 0; i < s->frame_size; i++) | |
| 778 | ✗ | samples[i] = av_clip_int16(s->int_samples[i]); | |
| 779 | |||
| 780 | ✗ | *got_frame_ptr = 1; | |
| 781 | |||
| 782 | ✗ | return buf_size; | |
| 783 | } | ||
| 784 | |||
| 785 | const FFCodec ff_sonic_decoder = { | ||
| 786 | .p.name = "sonic", | ||
| 787 | CODEC_LONG_NAME("Sonic"), | ||
| 788 | .p.type = AVMEDIA_TYPE_AUDIO, | ||
| 789 | .p.id = AV_CODEC_ID_SONIC, | ||
| 790 | .priv_data_size = sizeof(SonicContext), | ||
| 791 | .init = sonic_decode_init, | ||
| 792 | .close = sonic_decode_close, | ||
| 793 | FF_CODEC_DECODE_CB(sonic_decode_frame), | ||
| 794 | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_CHANNEL_CONF, | ||
| 795 | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, | ||
| 796 | }; | ||
| 797 | #endif /* CONFIG_SONIC_DECODER */ | ||
| 798 | |||
| 799 | #if CONFIG_SONIC_ENCODER | ||
| 800 | const FFCodec ff_sonic_encoder = { | ||
| 801 | .p.name = "sonic", | ||
| 802 | CODEC_LONG_NAME("Sonic"), | ||
| 803 | .p.type = AVMEDIA_TYPE_AUDIO, | ||
| 804 | .p.id = AV_CODEC_ID_SONIC, | ||
| 805 | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | | ||
| 806 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, | ||
| 807 | .priv_data_size = sizeof(SonicContext), | ||
| 808 | .init = sonic_encode_init, | ||
| 809 | FF_CODEC_ENCODE_CB(sonic_encode_frame), | ||
| 810 | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_S16), | ||
| 811 | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, | ||
| 812 | .close = sonic_encode_close, | ||
| 813 | }; | ||
| 814 | #endif | ||
| 815 | |||
| 816 | #if CONFIG_SONIC_LS_ENCODER | ||
| 817 | const FFCodec ff_sonic_ls_encoder = { | ||
| 818 | .p.name = "sonicls", | ||
| 819 | CODEC_LONG_NAME("Sonic lossless"), | ||
| 820 | .p.type = AVMEDIA_TYPE_AUDIO, | ||
| 821 | .p.id = AV_CODEC_ID_SONIC_LS, | ||
| 822 | .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL | | ||
| 823 | AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE, | ||
| 824 | .priv_data_size = sizeof(SonicContext), | ||
| 825 | .init = sonic_encode_init, | ||
| 826 | FF_CODEC_ENCODE_CB(sonic_encode_frame), | ||
| 827 | CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_S16), | ||
| 828 | .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, | ||
| 829 | .close = sonic_encode_close, | ||
| 830 | }; | ||
| 831 | #endif | ||
| 832 |