GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/x86/dirac_dwt_init.c Lines: 4 41 9.8 %
Date: 2020-09-25 23:16:12 Branches: 1 17 5.9 %

Line Branch Exec Source
1
/*
2
 * x86 optimized discrete wavelet transform
3
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
4
 * Copyright (c) 2010 David Conrad
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
#include "libavutil/x86/asm.h"
24
#include "libavutil/x86/cpu.h"
25
#include "libavcodec/dirac_dwt.h"
26
27
#define COMPOSE_VERTICAL(ext, align) \
28
void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
29
void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
30
void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
31
void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
32
void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
33
void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
34
void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
35
\
36
static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
37
{ \
38
    int i, width_align = width&~(align-1); \
39
    int16_t *b0 = (int16_t *)_b0; \
40
    int16_t *b1 = (int16_t *)_b1; \
41
    int16_t *b2 = (int16_t *)_b2; \
42
\
43
    for(i=width_align; i<width; i++) \
44
        b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
45
\
46
    ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
47
} \
48
\
49
static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
50
{ \
51
    int i, width_align = width&~(align-1); \
52
    int16_t *b0 = (int16_t *)_b0; \
53
    int16_t *b1 = (int16_t *)_b1; \
54
    int16_t *b2 = (int16_t *)_b2; \
55
\
56
    for(i=width_align; i<width; i++) \
57
        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
58
\
59
    ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
60
} \
61
\
62
static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
63
                                           uint8_t *_b3, uint8_t *_b4, int width) \
64
{ \
65
    int i, width_align = width&~(align-1); \
66
    int16_t *b0 = (int16_t *)_b0; \
67
    int16_t *b1 = (int16_t *)_b1; \
68
    int16_t *b2 = (int16_t *)_b2; \
69
    int16_t *b3 = (int16_t *)_b3; \
70
    int16_t *b4 = (int16_t *)_b4; \
71
\
72
    for(i=width_align; i<width; i++) \
73
        b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
74
\
75
    ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
76
} \
77
\
78
static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
79
                                          uint8_t *_b3, uint8_t *_b4, int width) \
80
{ \
81
    int i, width_align = width&~(align-1); \
82
    int16_t *b0 = (int16_t *)_b0; \
83
    int16_t *b1 = (int16_t *)_b1; \
84
    int16_t *b2 = (int16_t *)_b2; \
85
    int16_t *b3 = (int16_t *)_b3; \
86
    int16_t *b4 = (int16_t *)_b4; \
87
\
88
    for(i=width_align; i<width; i++) \
89
        b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
90
\
91
    ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
92
} \
93
static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
94
{ \
95
    int i, width_align = width&~(align-1); \
96
    int16_t *b0 = (int16_t *)_b0; \
97
    int16_t *b1 = (int16_t *)_b1; \
98
\
99
    for(i=width_align; i<width; i++) { \
100
        b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
101
        b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
102
    } \
103
\
104
    ff_vertical_compose_haar##ext(b0, b1, width_align); \
105
} \
106
static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
107
{\
108
    int w2= w>>1;\
109
    int x= w2 - (w2&(align-1));\
110
    int16_t *b = (int16_t *)_b; \
111
    int16_t *tmp = (int16_t *)_tmp; \
112
\
113
    ff_horizontal_compose_haar0i##ext(b, tmp, w);\
114
\
115
    for (; x < w2; x++) {\
116
        b[2*x  ] = tmp[x];\
117
        b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
118
    }\
119
}\
120
static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
121
{\
122
    int w2= w>>1;\
123
    int x= w2 - (w2&(align-1));\
124
    int16_t *b = (int16_t *)_b; \
125
    int16_t *tmp = (int16_t *)_tmp; \
126
\
127
    ff_horizontal_compose_haar1i##ext(b, tmp, w);\
128
\
129
    for (; x < w2; x++) {\
130
        b[2*x  ] = (tmp[x] + 1)>>1;\
131
        b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
132
    }\
133
}\
134
\
135
136
#if HAVE_X86ASM
137
#if !ARCH_X86_64
138
COMPOSE_VERTICAL(_mmx, 4)
139
#endif
140
COMPOSE_VERTICAL(_sse2, 8)
141
142
143
void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
144
145
static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
146
{
147
    int w2= w>>1;
148
    int x= w2 - (w2&7);
149
    int16_t *b = (int16_t *)_b;
150
    int16_t *tmp = (int16_t *)_tmp;
151
152
    ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
153
154
    for (; x < w2; x++) {
155
        b[2*x  ] = (tmp[x] + 1)>>1;
156
        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
157
    }
158
}
159
#endif
160
161
348
void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
162
{
163
#if HAVE_X86ASM
164
348
  int mm_flags = av_get_cpu_flags();
165
166
#if !ARCH_X86_64
167
    if (!(mm_flags & AV_CPU_FLAG_MMX))
168
        return;
169
170
    switch (type) {
171
    case DWT_DIRAC_DD9_7:
172
        d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
173
        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
174
        break;
175
    case DWT_DIRAC_LEGALL5_3:
176
        d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
177
        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
178
        break;
179
    case DWT_DIRAC_DD13_7:
180
        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
181
        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
182
        break;
183
    case DWT_DIRAC_HAAR0:
184
        d->vertical_compose   = (void*)vertical_compose_haar_mmx;
185
        d->horizontal_compose = horizontal_compose_haar0i_mmx;
186
        break;
187
    case DWT_DIRAC_HAAR1:
188
        d->vertical_compose   = (void*)vertical_compose_haar_mmx;
189
        d->horizontal_compose = horizontal_compose_haar1i_mmx;
190
        break;
191
    }
192
#endif
193
194
348
    if (!(mm_flags & AV_CPU_FLAG_SSE2))
195
348
        return;
196
197
    switch (type) {
198
    case DWT_DIRAC_DD9_7:
199
        d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
200
        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
201
        break;
202
    case DWT_DIRAC_LEGALL5_3:
203
        d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
204
        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
205
        break;
206
    case DWT_DIRAC_DD13_7:
207
        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
208
        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
209
        break;
210
    case DWT_DIRAC_HAAR0:
211
        d->vertical_compose   = (void*)vertical_compose_haar_sse2;
212
        d->horizontal_compose = horizontal_compose_haar0i_sse2;
213
        break;
214
    case DWT_DIRAC_HAAR1:
215
        d->vertical_compose   = (void*)vertical_compose_haar_sse2;
216
        d->horizontal_compose = horizontal_compose_haar1i_sse2;
217
        break;
218
    }
219
220
    if (!(mm_flags & AV_CPU_FLAG_SSSE3))
221
        return;
222
223
    switch (type) {
224
    case DWT_DIRAC_DD9_7:
225
        d->horizontal_compose = horizontal_compose_dd97i_ssse3;
226
        break;
227
    }
228
#endif // HAVE_X86ASM
229
}