GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/snow_dwt.c Lines: 440 460 95.7 %
Date: 2020-09-25 14:59:26 Branches: 218 245 89.0 %

Line Branch Exec Source
1
/*
2
 * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
3
 * Copyright (C) 2008 David Conrad
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22
#include "libavutil/attributes.h"
23
#include "libavutil/avassert.h"
24
#include "libavutil/common.h"
25
#include "me_cmp.h"
26
#include "snow_dwt.h"
27
28
461
int ff_slice_buffer_init(slice_buffer *buf, int line_count,
29
                         int max_allocated_lines, int line_width,
30
                         IDWTELEM *base_buffer)
31
{
32
    int i;
33
34
461
    buf->base_buffer = base_buffer;
35
461
    buf->line_count  = line_count;
36
461
    buf->line_width  = line_width;
37
461
    buf->data_count  = max_allocated_lines;
38
461
    buf->line        = av_mallocz_array(line_count, sizeof(IDWTELEM *));
39
461
    if (!buf->line)
40
        return AVERROR(ENOMEM);
41
461
    buf->data_stack  = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *));
42
461
    if (!buf->data_stack) {
43
        av_freep(&buf->line);
44
        return AVERROR(ENOMEM);
45
    }
46
47
32421
    for (i = 0; i < max_allocated_lines; i++) {
48
31960
        buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM));
49
31960
        if (!buf->data_stack[i]) {
50
            for (i--; i >=0; i--)
51
                av_freep(&buf->data_stack[i]);
52
            av_freep(&buf->data_stack);
53
            av_freep(&buf->line);
54
            return AVERROR(ENOMEM);
55
        }
56
    }
57
58
461
    buf->data_stack_top = max_allocated_lines - 1;
59
461
    return 0;
60
}
61
62
128848
IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
63
{
64
    IDWTELEM *buffer;
65
66
128848
    av_assert0(buf->data_stack_top >= 0);
67
//  av_assert1(!buf->line[line]);
68
128848
    if (buf->line[line])
69
        return buf->line[line];
70
71
128848
    buffer = buf->data_stack[buf->data_stack_top];
72
128848
    buf->data_stack_top--;
73
128848
    buf->line[line] = buffer;
74
75
128848
    return buffer;
76
}
77
78
128848
void ff_slice_buffer_release(slice_buffer *buf, int line)
79
{
80
    IDWTELEM *buffer;
81
82
    av_assert1(line >= 0 && line < buf->line_count);
83
    av_assert1(buf->line[line]);
84
85
128848
    buffer = buf->line[line];
86
128848
    buf->data_stack_top++;
87
128848
    buf->data_stack[buf->data_stack_top] = buffer;
88
128848
    buf->line[line]                      = NULL;
89
128848
}
90
91
1864
void ff_slice_buffer_flush(slice_buffer *buf)
92
{
93
    int i;
94
95
1864
    if (!buf->line)
96
20
        return;
97
98
257844
    for (i = 0; i < buf->line_count; i++)
99
256000
        if (buf->line[i])
100
848
            ff_slice_buffer_release(buf, i);
101
}
102
103
481
void ff_slice_buffer_destroy(slice_buffer *buf)
104
{
105
    int i;
106
481
    ff_slice_buffer_flush(buf);
107
108
481
    if (buf->data_stack)
109
32421
        for (i = buf->data_count - 1; i >= 0; i--)
110
31960
            av_freep(&buf->data_stack[i]);
111
481
    av_freep(&buf->data_stack);
112
481
    av_freep(&buf->line);
113
481
}
114
115
72825120
static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
116
                                  int dst_step, int src_step, int ref_step,
117
                                  int width, int mul, int add, int shift,
118
                                  int highpass, int inverse)
119
{
120
72825120
    const int mirror_left  = !highpass;
121
72825120
    const int mirror_right = (width & 1) ^ highpass;
122
72825120
    const int w            = (width >> 1) - 1 + (highpass & width);
123
    int i;
124
125
#define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
126
72825120
    if (mirror_left) {
127
24330840
        dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
128
24330840
        dst   += dst_step;
129
24330840
        src   += src_step;
130
    }
131
132
769916940
    for (i = 0; i < w; i++)
133
697091820
        dst[i * dst_step] = LIFT(src[i * src_step],
134
                                 ((mul * (ref[i * ref_step] +
135
                                          ref[(i + 1) * ref_step]) +
136
                                   add) >> shift),
137
                                 inverse);
138
139
72825120
    if (mirror_right)
140
48494280
        dst[w * dst_step] = LIFT(src[w * src_step],
141
                                 ((mul * 2 * ref[w * ref_step] + add) >> shift),
142
                                 inverse);
143
72825120
}
144
145
24163440
static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
146
                                   int dst_step, int src_step, int ref_step,
147
                                   int width, int mul, int add, int shift,
148
                                   int highpass, int inverse)
149
{
150
24163440
    const int mirror_left  = !highpass;
151
24163440
    const int mirror_right = (width & 1) ^ highpass;
152
24163440
    const int w            = (width >> 1) - 1 + (highpass & width);
153
    int i;
154
155
    av_assert1(shift == 4);
156
#define LIFTS(src, ref, inv)                                            \
157
    ((inv) ? (src) + (((ref) + 4 * (src)) >> shift)                     \
158
           : -((-16 * (src) + (ref) + add /                             \
159
                4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
160
24163440
    if (mirror_left) {
161
24163440
        dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
162
24163440
        dst   += dst_step;
163
24163440
        src   += src_step;
164
    }
165
166
246511280
    for (i = 0; i < w; i++)
167
222347840
        dst[i * dst_step] = LIFTS(src[i * src_step],
168
                                  mul * (ref[i * ref_step] +
169
                                         ref[(i + 1) * ref_step]) + add,
170
                                  inverse);
171
172
24163440
    if (mirror_right)
173
        dst[w * dst_step] = LIFTS(src[w * src_step],
174
                                  mul * 2 * ref[w * ref_step] + add,
175
                                  inverse);
176
24163440
}
177
178
167400
static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
179
{
180
167400
    const int width2 = width >> 1;
181
    int x;
182
167400
    const int w2 = (width + 1) >> 1;
183
184
15357600
    for (x = 0; x < width2; x++) {
185
15190200
        temp[x]      = b[2 * x];
186
15190200
        temp[x + w2] = b[2 * x + 1];
187
    }
188
167400
    if (width & 1)
189
2700
        temp[x] = b[2 * x];
190
167400
    lift(b + w2, temp + w2, temp,   1, 1, 1, width, -1, 0, 1, 1, 0);
191
167400
    lift(b,      temp,      b + w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
192
167400
}
193
194
83550
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
195
                                    int width)
196
{
197
    int i;
198
199
15273450
    for (i = 0; i < width; i++)
200
15189900
        b1[i] -= (b0[i] + b2[i]) >> 1;
201
83550
}
202
203
83850
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
204
                                    int width)
205
{
206
    int i;
207
208
15277050
    for (i = 0; i < width; i++)
209
15193200
        b1[i] += (b0[i] + b2[i] + 2) >> 2;
210
83850
}
211
212
2250
static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
213
                                 int width, int height, int stride)
214
{
215
    int y;
216
2250
    DWTELEM *b0 = buffer + avpriv_mirror(-2 - 1, height - 1) * stride;
217
2250
    DWTELEM *b1 = buffer + avpriv_mirror(-2,     height - 1) * stride;
218
219
88350
    for (y = -2; y < height; y += 2) {
220
86100
        DWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
221
86100
        DWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
222
223
86100
        if (y + 1 < (unsigned)height)
224
83550
            horizontal_decompose53i(b2, temp, width);
225
86100
        if (y + 2 < (unsigned)height)
226
83850
            horizontal_decompose53i(b3, temp, width);
227
228
86100
        if (y + 1 < (unsigned)height)
229
83550
            vertical_decompose53iH0(b1, b2, b3, width);
230
86100
        if (y + 0 < (unsigned)height)
231
83850
            vertical_decompose53iL0(b0, b1, b2, width);
232
233
86100
        b0 = b2;
234
86100
        b1 = b3;
235
    }
236
2250
}
237
238
24163440
static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
239
{
240
24163440
    const int w2 = (width + 1) >> 1;
241
242
24163440
    lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
243
24163440
    liftS(temp,     b,     temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
244
24163440
    lift(b + w2, temp + w2, temp,     1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
245
24163440
    lift(b,      temp,      b + w2,   1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
246
24163440
}
247
248
12081720
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
249
                                    int width)
250
{
251
    int i;
252
253
258593000
    for (i = 0; i < width; i++)
254
246511280
        b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
255
12081720
}
256
257
12081720
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
258
                                    int width)
259
{
260
    int i;
261
262
258593000
    for (i = 0; i < width; i++)
263
246511280
        b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
264
12081720
}
265
266
12081720
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
267
                                    int width)
268
{
269
    int i;
270
271
258593000
    for (i = 0; i < width; i++)
272
246511280
        b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
273
246511280
                (5 * 16) - (1 << 23);
274
12081720
}
275
276
12081720
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
277
                                    int width)
278
{
279
    int i;
280
281
258593000
    for (i = 0; i < width; i++)
282
246511280
        b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
283
12081720
}
284
285
1951060
static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
286
                                 int width, int height, int stride)
287
{
288
    int y;
289
1951060
    DWTELEM *b0 = buffer + avpriv_mirror(-4 - 1, height - 1) * stride;
290
1951060
    DWTELEM *b1 = buffer + avpriv_mirror(-4,     height - 1) * stride;
291
1951060
    DWTELEM *b2 = buffer + avpriv_mirror(-4 + 1, height - 1) * stride;
292
1951060
    DWTELEM *b3 = buffer + avpriv_mirror(-4 + 2, height - 1) * stride;
293
294
17934900
    for (y = -4; y < height; y += 2) {
295
15983840
        DWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
296
15983840
        DWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
297
298
15983840
        if (y + 3 < (unsigned)height)
299
12081720
            horizontal_decompose97i(b4, temp, width);
300
15983840
        if (y + 4 < (unsigned)height)
301
12081720
            horizontal_decompose97i(b5, temp, width);
302
303
15983840
        if (y + 3 < (unsigned)height)
304
12081720
            vertical_decompose97iH0(b3, b4, b5, width);
305
15983840
        if (y + 2 < (unsigned)height)
306
12081720
            vertical_decompose97iL0(b2, b3, b4, width);
307
15983840
        if (y + 1 < (unsigned)height)
308
12081720
            vertical_decompose97iH1(b1, b2, b3, width);
309
15983840
        if (y + 0 < (unsigned)height)
310
12081720
            vertical_decompose97iL1(b0, b1, b2, width);
311
312
15983840
        b0 = b2;
313
15983840
        b1 = b3;
314
15983840
        b2 = b4;
315
15983840
        b3 = b5;
316
    }
317
1951060
}
318
319
487990
void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
320
                    int stride, int type, int decomposition_count)
321
{
322
    int level;
323
324
2441300
    for (level = 0; level < decomposition_count; level++) {
325
1953310
        switch (type) {
326
1951060
        case DWT_97:
327
1951060
            spatial_decompose97i(buffer, temp,
328
                                 width >> level, height >> level,
329
                                 stride << level);
330
1951060
            break;
331
2250
        case DWT_53:
332
2250
            spatial_decompose53i(buffer, temp,
333
                                 width >> level, height >> level,
334
                                 stride << level);
335
2250
            break;
336
        }
337
1953310
    }
338
487990
}
339
340
392832
static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
341
{
342
392832
    const int width2 = width >> 1;
343
392832
    const int w2     = (width + 1) >> 1;
344
    int x;
345
346
36039168
    for (x = 0; x < width2; x++) {
347
35646336
        temp[2 * x]     = b[x];
348
35646336
        temp[2 * x + 1] = b[x + w2];
349
    }
350
392832
    if (width & 1)
351
6336
        temp[2 * x] = b[x];
352
353
392832
    b[0] = temp[0] - ((temp[1] + 1) >> 1);
354
35646336
    for (x = 2; x < width - 1; x += 2) {
355
35253504
        b[x]     = temp[x]     - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
356
35253504
        b[x - 1] = temp[x - 1] + ((b[x - 2]    + b[x]        + 1) >> 1);
357
    }
358
392832
    if (width & 1) {
359
6336
        b[x]     = temp[x]     - ((temp[x - 1]     + 1) >> 1);
360
6336
        b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
361
    } else
362
386496
        b[x - 1] = temp[x - 1] + b[x - 2];
363
392832
}
364
365
112288
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
366
                                  int width)
367
{
368
    int i;
369
370
20369624
    for (i = 0; i < width; i++)
371
20257336
        b1[i] += (b0[i] + b2[i]) >> 1;
372
112288
}
373
374
112992
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
375
                                  int width)
376
{
377
    int i;
378
379
20378072
    for (i = 0; i < width; i++)
380
20265080
        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
381
112992
}
382
383
2310
static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
384
                                             int height, int stride_line)
385
{
386
2310
    cs->b0 = slice_buffer_get_line(sb,
387
                                   avpriv_mirror(-1 - 1, height - 1) * stride_line);
388
2310
    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-1, height - 1) * stride_line);
389
2310
    cs->y  = -1;
390
2310
}
391
392
2970
static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
393
                                    int height, int stride)
394
{
395
2970
    cs->b0 = buffer + avpriv_mirror(-1 - 1, height - 1) * stride;
396
2970
    cs->b1 = buffer + avpriv_mirror(-1,     height - 1) * stride;
397
2970
    cs->y  = -1;
398
2970
}
399
400
88396
static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
401
                                           IDWTELEM *temp,
402
                                           int width, int height,
403
                                           int stride_line)
404
{
405
88396
    int y = cs->y;
406
407
88396
    IDWTELEM *b0 = cs->b0;
408
88396
    IDWTELEM *b1 = cs->b1;
409
88396
    IDWTELEM *b2 = slice_buffer_get_line(sb,
410
                                         avpriv_mirror(y + 1, height - 1) *
411
                                         stride_line);
412
88396
    IDWTELEM *b3 = slice_buffer_get_line(sb,
413
                                         avpriv_mirror(y + 2, height - 1) *
414
                                         stride_line);
415
416

172172
    if (y + 1 < (unsigned)height && y < (unsigned)height) {
417
        int x;
418
419
15472072
        for (x = 0; x < width; x++) {
420
15388296
            b2[x] -= (b1[x] + b3[x] + 2) >> 2;
421
15388296
            b1[x] += (b0[x] + b2[x])     >> 1;
422
        }
423
    } else {
424
4620
        if (y + 1 < (unsigned)height)
425
2310
            vertical_compose53iL0(b1, b2, b3, width);
426
4620
        if (y + 0 < (unsigned)height)
427
2002
            vertical_compose53iH0(b0, b1, b2, width);
428
    }
429
430
88396
    if (y - 1 < (unsigned)height)
431
86086
        horizontal_compose53i(b0, temp, width);
432
88396
    if (y + 0 < (unsigned)height)
433
85778
        horizontal_compose53i(b1, temp, width);
434
435
88396
    cs->b0  = b2;
436
88396
    cs->b1  = b3;
437
88396
    cs->y  += 2;
438
88396
}
439
440
113652
static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
441
                                  IDWTELEM *temp, int width, int height,
442
                                  int stride)
443
{
444
113652
    int y        = cs->y;
445
113652
    IDWTELEM *b0 = cs->b0;
446
113652
    IDWTELEM *b1 = cs->b1;
447
113652
    IDWTELEM *b2 = buffer + avpriv_mirror(y + 1, height - 1) * stride;
448
113652
    IDWTELEM *b3 = buffer + avpriv_mirror(y + 2, height - 1) * stride;
449
450
113652
    if (y + 1 < (unsigned)height)
451
110682
        vertical_compose53iL0(b1, b2, b3, width);
452
113652
    if (y + 0 < (unsigned)height)
453
110286
        vertical_compose53iH0(b0, b1, b2, width);
454
455
113652
    if (y - 1 < (unsigned)height)
456
110682
        horizontal_compose53i(b0, temp, width);
457
113652
    if (y + 0 < (unsigned)height)
458
110286
        horizontal_compose53i(b1, temp, width);
459
460
113652
    cs->b0  = b2;
461
113652
    cs->b1  = b3;
462
113652
    cs->y  += 2;
463
113652
}
464
465
245520
void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
466
{
467
245520
    const int w2 = (width + 1) >> 1;
468
    int x;
469
470
245520
    temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
471
19001232
    for (x = 1; x < (width >> 1); x++) {
472
18755712
        temp[2 * x]     = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
473
18755712
        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
474
    }
475
245520
    if (width & 1) {
476
3456
        temp[2 * x]     = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
477
3456
        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
478
    } else
479
242064
        temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
480
481
245520
    b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
482
19001232
    for (x = 2; x < width - 1; x += 2) {
483
18755712
        b[x]     = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
484
18755712
        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
485
    }
486
245520
    if (width & 1) {
487
3456
        b[x]     = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
488
3456
        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
489
    } else
490
242064
        b[x - 1] = temp[x - 1] + 3 * b[x - 2];
491
245520
}
492
493
93412
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
494
                                  int width)
495
{
496
    int i;
497
498
16894156
    for (i = 0; i < width; i++)
499
16800744
        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
500
93412
}
501
502
93412
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
503
                                  int width)
504
{
505
    int i;
506
507
16894156
    for (i = 0; i < width; i++)
508
16800744
        b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
509
93412
}
510
511
93412
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
512
                                  int width)
513
{
514
    int i;
515
516
16894156
    for (i = 0; i < width; i++)
517
16800744
        b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
518
93412
}
519
520
93412
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
521
                                  int width)
522
{
523
    int i;
524
525
16894156
    for (i = 0; i < width; i++)
526
16800744
        b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
527
93412
}
528
529
29376
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
530
                                 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
531
                                 int width)
532
{
533
    int i;
534
535
2232576
    for (i = 0; i < width; i++) {
536
2203200
        b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
537
2203200
        b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
538
2203200
        b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
539
2203200
        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
540
    }
541
29376
}
542
543
4605
static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
544
                                             int height, int stride_line)
545
{
546
4605
    cs->b0 = slice_buffer_get_line(sb, avpriv_mirror(-3 - 1, height - 1) * stride_line);
547
4605
    cs->b1 = slice_buffer_get_line(sb, avpriv_mirror(-3,     height - 1) * stride_line);
548
4605
    cs->b2 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 1, height - 1) * stride_line);
549
4605
    cs->b3 = slice_buffer_get_line(sb, avpriv_mirror(-3 + 2, height - 1) * stride_line);
550
4605
    cs->y  = -3;
551
4605
}
552
553
6420
static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
554
                                    int stride)
555
{
556
6420
    cs->b0 = buffer + avpriv_mirror(-3 - 1, height - 1) * stride;
557
6420
    cs->b1 = buffer + avpriv_mirror(-3,     height - 1) * stride;
558
6420
    cs->b2 = buffer + avpriv_mirror(-3 + 1, height - 1) * stride;
559
6420
    cs->b3 = buffer + avpriv_mirror(-3 + 2, height - 1) * stride;
560
6420
    cs->y  = -3;
561
6420
}
562
563
47278
static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs,
564
                                           slice_buffer * sb, IDWTELEM *temp,
565
                                           int width, int height,
566
                                           int stride_line)
567
{
568
47278
    int y = cs->y;
569
570
47278
    IDWTELEM *b0 = cs->b0;
571
47278
    IDWTELEM *b1 = cs->b1;
572
47278
    IDWTELEM *b2 = cs->b2;
573
47278
    IDWTELEM *b3 = cs->b3;
574
47278
    IDWTELEM *b4 = slice_buffer_get_line(sb,
575
                                         avpriv_mirror(y + 3, height - 1) *
576
                                         stride_line);
577
47278
    IDWTELEM *b5 = slice_buffer_get_line(sb,
578
                                         avpriv_mirror(y + 4, height - 1) *
579
                                         stride_line);
580
581

47278
    if (y > 0 && y + 4 < height) {
582
29472
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
583
    } else {
584
17806
        if (y + 3 < (unsigned)height)
585
8596
            vertical_compose97iL1(b3, b4, b5, width);
586
17806
        if (y + 2 < (unsigned)height)
587
8596
            vertical_compose97iH1(b2, b3, b4, width);
588
17806
        if (y + 1 < (unsigned)height)
589
8596
            vertical_compose97iL0(b1, b2, b3, width);
590
17806
        if (y + 0 < (unsigned)height)
591
8596
            vertical_compose97iH0(b0, b1, b2, width);
592
    }
593
594
47278
    if (y - 1 < (unsigned)height)
595
38068
        dsp->horizontal_compose97i(b0, temp, width);
596
47278
    if (y + 0 < (unsigned)height)
597
38068
        dsp->horizontal_compose97i(b1, temp, width);
598
599
47278
    cs->b0  = b2;
600
47278
    cs->b1  = b3;
601
47278
    cs->b2  = b4;
602
47278
    cs->b3  = b5;
603
47278
    cs->y  += 2;
604
47278
}
605
606
97656
static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
607
                                  IDWTELEM *temp, int width, int height,
608
                                  int stride)
609
{
610
97656
    int y        = cs->y;
611
97656
    IDWTELEM *b0 = cs->b0;
612
97656
    IDWTELEM *b1 = cs->b1;
613
97656
    IDWTELEM *b2 = cs->b2;
614
97656
    IDWTELEM *b3 = cs->b3;
615
97656
    IDWTELEM *b4 = buffer + avpriv_mirror(y + 3, height - 1) * stride;
616
97656
    IDWTELEM *b5 = buffer + avpriv_mirror(y + 4, height - 1) * stride;
617
618
97656
    if (y + 3 < (unsigned)height)
619
84816
        vertical_compose97iL1(b3, b4, b5, width);
620
97656
    if (y + 2 < (unsigned)height)
621
84816
        vertical_compose97iH1(b2, b3, b4, width);
622
97656
    if (y + 1 < (unsigned)height)
623
84816
        vertical_compose97iL0(b1, b2, b3, width);
624
97656
    if (y + 0 < (unsigned)height)
625
84816
        vertical_compose97iH0(b0, b1, b2, width);
626
627
97656
    if (y - 1 < (unsigned)height)
628
84816
        ff_snow_horizontal_compose97i(b0, temp, width);
629
97656
    if (y + 0 < (unsigned)height)
630
84816
        ff_snow_horizontal_compose97i(b1, temp, width);
631
632
97656
    cs->b0  = b2;
633
97656
    cs->b1  = b3;
634
97656
    cs->b2  = b4;
635
97656
    cs->b3  = b5;
636
97656
    cs->y  += 2;
637
97656
}
638
639
1383
void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
640
                                   int height, int stride_line, int type,
641
                                   int decomposition_count)
642
{
643
    int level;
644
8298
    for (level = decomposition_count - 1; level >= 0; level--) {
645
6915
        switch (type) {
646
4605
        case DWT_97:
647
4605
            spatial_compose97i_buffered_init(cs + level, sb, height >> level,
648
                                             stride_line << level);
649
4605
            break;
650
2310
        case DWT_53:
651
2310
            spatial_compose53i_buffered_init(cs + level, sb, height >> level,
652
                                             stride_line << level);
653
2310
            break;
654
        }
655
6915
    }
656
1383
}
657
658
33857
void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
659
                                    slice_buffer *slice_buf, IDWTELEM *temp,
660
                                    int width, int height, int stride_line,
661
                                    int type, int decomposition_count, int y)
662
{
663
33857
    const int support = type == 1 ? 3 : 5;
664
    int level;
665
33857
    if (type == 2)
666
        return;
667
668
203142
    for (level = decomposition_count - 1; level >= 0; level--)
669
169285
        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
670
135674
            switch (type) {
671
47278
            case DWT_97:
672
47278
                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
673
                                               width >> level,
674
                                               height >> level,
675
                                               stride_line << level);
676
47278
                break;
677
88396
            case DWT_53:
678
88396
                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
679
                                               width >> level,
680
                                               height >> level,
681
                                               stride_line << level);
682
88396
                break;
683
            }
684
304959
        }
685
}
686
687
1878
static void spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
688
                                 int height, int stride, int type,
689
                                 int decomposition_count)
690
{
691
    int level;
692
11268
    for (level = decomposition_count - 1; level >= 0; level--) {
693
9390
        switch (type) {
694
6420
        case DWT_97:
695
6420
            spatial_compose97i_init(cs + level, buffer, height >> level,
696
                                    stride << level);
697
6420
            break;
698
2970
        case DWT_53:
699
2970
            spatial_compose53i_init(cs + level, buffer, height >> level,
700
                                    stride << level);
701
2970
            break;
702
        }
703
9390
    }
704
1878
}
705
706
50400
static void spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
707
                                  IDWTELEM *temp, int width, int height,
708
                                  int stride, int type,
709
                                  int decomposition_count, int y)
710
{
711
50400
    const int support = type == 1 ? 3 : 5;
712
    int level;
713
50400
    if (type == 2)
714
        return;
715
716
302400
    for (level = decomposition_count - 1; level >= 0; level--)
717
252000
        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
718
211308
            switch (type) {
719
97656
            case DWT_97:
720
97656
                spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
721
                                      height >> level, stride << level);
722
97656
                break;
723
113652
            case DWT_53:
724
113652
                spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
725
                                      height >> level, stride << level);
726
113652
                break;
727
            }
728
463308
        }
729
}
730
731
1878
void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
732
                     int stride, int type, int decomposition_count)
733
{
734
    DWTCompose cs[MAX_DECOMPOSITIONS];
735
    int y;
736
1878
    spatial_idwt_init(cs, buffer, width, height, stride, type,
737
                         decomposition_count);
738
52278
    for (y = 0; y < height; y += 4)
739
50400
        spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
740
                              decomposition_count, y);
741
1878
}
742
743
486640
static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size,
744
                      int w, int h, int type)
745
{
746
    int s, i, j;
747
486640
    const int dec_count = w == 8 ? 3 : 4;
748
    int tmp[32 * 32], tmp2[32];
749
    int level, ori;
750
    static const int scale[2][2][4][4] = {
751
        {
752
            { // 9/7 8x8 dec=3
753
                { 268, 239, 239, 213 },
754
                { 0,   224, 224, 152 },
755
                { 0,   135, 135, 110 },
756
            },
757
            { // 9/7 16x16 or 32x32 dec=4
758
                { 344, 310, 310, 280 },
759
                { 0,   320, 320, 228 },
760
                { 0,   175, 175, 136 },
761
                { 0,   129, 129, 102 },
762
            }
763
        },
764
        {
765
            { // 5/3 8x8 dec=3
766
                { 275, 245, 245, 218 },
767
                { 0,   230, 230, 156 },
768
                { 0,   138, 138, 113 },
769
            },
770
            { // 5/3 16x16 or 32x32 dec=4
771
                { 352, 317, 317, 286 },
772
                { 0,   328, 328, 233 },
773
                { 0,   180, 180, 140 },
774
                { 0,   132, 132, 105 },
775
            }
776
        }
777
    };
778
779
13334128
    for (i = 0; i < h; i++) {
780
104727424
        for (j = 0; j < w; j += 4) {
781
91879936
            tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
782
91879936
            tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
783
91879936
            tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
784
91879936
            tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
785
        }
786
12847488
        pix1 += line_size;
787
12847488
        pix2 += line_size;
788
    }
789
790
486640
    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
791
792
486640
    s = 0;
793
    av_assert1(w == h);
794
2433200
    for (level = 0; level < dec_count; level++)
795
8272880
        for (ori = level ? 1 : 0; ori < 4; ori++) {
796
6326320
            int size   = w >> (dec_count - level);
797
6326320
            int sx     = (ori & 1) ? size : 0;
798
6326320
            int stride = 32 << (dec_count - level);
799
6326320
            int sy     = (ori & 2) ? stride >> 1 : 0;
800
801
43262848
            for (i = 0; i < size; i++)
802
404456272
                for (j = 0; j < size; j++) {
803
367519744
                    int v = tmp[sx + sy + i * stride + j] *
804
367519744
                            scale[type][dec_count - 3][level][ori];
805
367519744
                    s += FFABS(v);
806
                }
807
        }
808
    av_assert1(s >= 0);
809
486640
    return s >> 9;
810
}
811
812
static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
813
{
814
    return w_c(v, pix1, pix2, line_size, 8, h, 1);
815
}
816
817
static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
818
{
819
    return w_c(v, pix1, pix2, line_size, 8, h, 0);
820
}
821
822
static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
823
{
824
    return w_c(v, pix1, pix2, line_size, 16, h, 1);
825
}
826
827
170312
static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
828
{
829
170312
    return w_c(v, pix1, pix2, line_size, 16, h, 0);
830
}
831
832
int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
833
{
834
    return w_c(v, pix1, pix2, line_size, 32, h, 1);
835
}
836
837
316328
int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, ptrdiff_t line_size, int h)
838
{
839
316328
    return w_c(v, pix1, pix2, line_size, 32, h, 0);
840
}
841
842
980
av_cold void ff_dsputil_init_dwt(MECmpContext *c)
843
{
844
980
    c->w53[0] = w53_16_c;
845
980
    c->w53[1] = w53_8_c;
846
980
    c->w97[0] = w97_16_c;
847
980
    c->w97[1] = w97_8_c;
848
980
}
849
850
31
av_cold void ff_dwt_init(SnowDWTContext *c)
851
{
852
31
    c->vertical_compose97i   = ff_snow_vertical_compose97i;
853
31
    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
854
31
    c->inner_add_yblock      = ff_snow_inner_add_yblock;
855
856
    if (HAVE_MMX)
857
31
        ff_dwt_init_x86(c);
858
31
}
859
860