1 |
|
|
/* |
2 |
|
|
* Copyright (C) 2007 Marco Gerards <marco@gnu.org> |
3 |
|
|
* Copyright (C) 2016 Open Broadcast Systems Ltd. |
4 |
|
|
* Author 2016 Rostislav Pehlivanov <atomnuker@gmail.com> |
5 |
|
|
* |
6 |
|
|
* This file is part of FFmpeg. |
7 |
|
|
* |
8 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
9 |
|
|
* modify it under the terms of the GNU Lesser General Public |
10 |
|
|
* License as published by the Free Software Foundation; either |
11 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
12 |
|
|
* |
13 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
14 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 |
|
|
* Lesser General Public License for more details. |
17 |
|
|
* |
18 |
|
|
* You should have received a copy of the GNU Lesser General Public |
19 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
20 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 |
|
|
*/ |
22 |
|
|
|
23 |
|
|
#include "libavutil/attributes.h" |
24 |
|
|
#include "libavutil/mem.h" |
25 |
|
|
#include "vc2enc_dwt.h" |
26 |
|
|
|
27 |
|
|
/* Since the transforms spit out interleaved coefficients, this function |
28 |
|
|
* rearranges the coefficients into the more traditional subdivision, |
29 |
|
|
* making it easier to encode and perform another level. */ |
30 |
|
1980 |
static av_always_inline void deinterleave(dwtcoef *linell, ptrdiff_t stride, |
31 |
|
|
int width, int height, dwtcoef *synthl) |
32 |
|
|
{ |
33 |
|
|
int x, y; |
34 |
|
1980 |
ptrdiff_t synthw = width << 1; |
35 |
|
1980 |
dwtcoef *linehl = linell + width; |
36 |
|
1980 |
dwtcoef *linelh = linell + height*stride; |
37 |
|
1980 |
dwtcoef *linehh = linelh + width; |
38 |
|
|
|
39 |
|
|
/* Deinterleave the coefficients. */ |
40 |
✓✓ |
123480 |
for (y = 0; y < height; y++) { |
41 |
✓✓ |
11986650 |
for (x = 0; x < width; x++) { |
42 |
|
11865150 |
linell[x] = synthl[(x << 1)]; |
43 |
|
11865150 |
linehl[x] = synthl[(x << 1) + 1]; |
44 |
|
11865150 |
linelh[x] = synthl[(x << 1) + synthw]; |
45 |
|
11865150 |
linehh[x] = synthl[(x << 1) + synthw + 1]; |
46 |
|
|
} |
47 |
|
121500 |
synthl += synthw << 1; |
48 |
|
121500 |
linell += stride; |
49 |
|
121500 |
linelh += stride; |
50 |
|
121500 |
linehl += stride; |
51 |
|
121500 |
linehh += stride; |
52 |
|
|
} |
53 |
|
1980 |
} |
54 |
|
|
|
55 |
|
1620 |
static void vc2_subband_dwt_97(VC2TransformContext *t, dwtcoef *data, |
56 |
|
|
ptrdiff_t stride, int width, int height) |
57 |
|
|
{ |
58 |
|
|
int x, y; |
59 |
|
1620 |
dwtcoef *datal = data, *synth = t->buffer, *synthl = synth; |
60 |
|
1620 |
const ptrdiff_t synth_width = width << 1; |
61 |
|
1620 |
const ptrdiff_t synth_height = height << 1; |
62 |
|
|
|
63 |
|
|
/* |
64 |
|
|
* Shift in one bit that is used for additional precision and copy |
65 |
|
|
* the data to the buffer. |
66 |
|
|
*/ |
67 |
✓✓ |
196020 |
for (y = 0; y < synth_height; y++) { |
68 |
✓✓ |
39576600 |
for (x = 0; x < synth_width; x++) |
69 |
|
39382200 |
synthl[x] = datal[x] * 2; |
70 |
|
194400 |
synthl += synth_width; |
71 |
|
194400 |
datal += stride; |
72 |
|
|
} |
73 |
|
|
|
74 |
|
|
/* Horizontal synthesis. */ |
75 |
|
1620 |
synthl = synth; |
76 |
✓✓ |
196020 |
for (y = 0; y < synth_height; y++) { |
77 |
|
|
/* Lifting stage 2. */ |
78 |
|
194400 |
synthl[1] -= (8*synthl[0] + 9*synthl[2] - synthl[4] + 8) >> 4; |
79 |
✓✓ |
19302300 |
for (x = 1; x < width - 2; x++) |
80 |
|
19107900 |
synthl[2*x + 1] -= (9*synthl[2*x] + 9*synthl[2*x + 2] - synthl[2*x + 4] - |
81 |
|
19107900 |
synthl[2 * x - 2] + 8) >> 4; |
82 |
|
194400 |
synthl[synth_width - 1] -= (17*synthl[synth_width - 2] - |
83 |
|
194400 |
synthl[synth_width - 4] + 8) >> 4; |
84 |
|
194400 |
synthl[synth_width - 3] -= (8*synthl[synth_width - 2] + |
85 |
|
194400 |
9*synthl[synth_width - 4] - |
86 |
|
194400 |
synthl[synth_width - 6] + 8) >> 4; |
87 |
|
|
/* Lifting stage 1. */ |
88 |
|
194400 |
synthl[0] += (synthl[1] + synthl[1] + 2) >> 2; |
89 |
✓✓ |
19496700 |
for (x = 1; x < width - 1; x++) |
90 |
|
19302300 |
synthl[2*x] += (synthl[2*x - 1] + synthl[2*x + 1] + 2) >> 2; |
91 |
|
|
|
92 |
|
194400 |
synthl[synth_width - 2] += (synthl[synth_width - 3] + |
93 |
|
194400 |
synthl[synth_width - 1] + 2) >> 2; |
94 |
|
194400 |
synthl += synth_width; |
95 |
|
|
} |
96 |
|
|
|
97 |
|
|
/* Vertical synthesis: Lifting stage 2. */ |
98 |
|
1620 |
synthl = synth + synth_width; |
99 |
✓✓ |
209520 |
for (x = 0; x < synth_width; x++) |
100 |
|
207900 |
synthl[x] -= (8*synthl[x - synth_width] + 9*synthl[x + synth_width] - |
101 |
|
207900 |
synthl[x + 3 * synth_width] + 8) >> 4; |
102 |
|
|
|
103 |
|
1620 |
synthl = synth + (synth_width << 1); |
104 |
✓✓ |
93960 |
for (y = 1; y < height - 2; y++) { |
105 |
✓✓ |
19159740 |
for (x = 0; x < synth_width; x++) |
106 |
|
19067400 |
synthl[x + synth_width] -= (9*synthl[x] + |
107 |
|
19067400 |
9*synthl[x + 2 * synth_width] - |
108 |
|
19067400 |
synthl[x - 2 * synth_width] - |
109 |
|
19067400 |
synthl[x + 4 * synth_width] + 8) >> 4; |
110 |
|
92340 |
synthl += synth_width << 1; |
111 |
|
|
} |
112 |
|
|
|
113 |
|
1620 |
synthl = synth + (synth_height - 1) * synth_width; |
114 |
✓✓ |
209520 |
for (x = 0; x < synth_width; x++) { |
115 |
|
207900 |
synthl[x] -= (17*synthl[x - synth_width] - |
116 |
|
207900 |
synthl[x - 3*synth_width] + 8) >> 4; |
117 |
|
207900 |
synthl[x - 2*synth_width] -= (9*synthl[x - 3*synth_width] + |
118 |
|
207900 |
8*synthl[x - 1*synth_width] - synthl[x - 5*synth_width] + 8) >> 4; |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
/* Vertical synthesis: Lifting stage 1. */ |
122 |
|
1620 |
synthl = synth; |
123 |
✓✓ |
209520 |
for (x = 0; x < synth_width; x++) |
124 |
|
207900 |
synthl[x] += (synthl[x + synth_width] + synthl[x + synth_width] + 2) >> 2; |
125 |
|
|
|
126 |
|
1620 |
synthl = synth + (synth_width << 1); |
127 |
✓✓ |
95580 |
for (y = 1; y < height - 1; y++) { |
128 |
✓✓ |
19369260 |
for (x = 0; x < synth_width; x++) |
129 |
|
19275300 |
synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
130 |
|
93960 |
synthl += synth_width << 1; |
131 |
|
|
} |
132 |
|
|
|
133 |
|
1620 |
synthl = synth + (synth_height - 2) * synth_width; |
134 |
✓✓ |
209520 |
for (x = 0; x < synth_width; x++) |
135 |
|
207900 |
synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
136 |
|
|
|
137 |
|
1620 |
deinterleave(data, stride, width, height, synth); |
138 |
|
1620 |
} |
139 |
|
|
|
140 |
|
180 |
static void vc2_subband_dwt_53(VC2TransformContext *t, dwtcoef *data, |
141 |
|
|
ptrdiff_t stride, int width, int height) |
142 |
|
|
{ |
143 |
|
|
int x, y; |
144 |
|
180 |
dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
145 |
|
180 |
const ptrdiff_t synth_width = width << 1; |
146 |
|
180 |
const ptrdiff_t synth_height = height << 1; |
147 |
|
|
|
148 |
|
|
/* |
149 |
|
|
* Shift in one bit that is used for additional precision and copy |
150 |
|
|
* the data to the buffer. |
151 |
|
|
*/ |
152 |
✓✓ |
24480 |
for (y = 0; y < synth_height; y++) { |
153 |
✓✓ |
4063500 |
for (x = 0; x < synth_width; x++) |
154 |
|
4039200 |
synthl[x] = datal[x] << 1; |
155 |
|
24300 |
synthl += synth_width; |
156 |
|
24300 |
datal += stride; |
157 |
|
|
} |
158 |
|
|
|
159 |
|
|
/* Horizontal synthesis. */ |
160 |
|
180 |
synthl = synth; |
161 |
✓✓ |
24480 |
for (y = 0; y < synth_height; y++) { |
162 |
|
|
/* Lifting stage 2. */ |
163 |
✓✓ |
2019600 |
for (x = 0; x < width - 1; x++) |
164 |
|
1995300 |
synthl[2 * x + 1] -= (synthl[2 * x] + synthl[2 * x + 2] + 1) >> 1; |
165 |
|
|
|
166 |
|
24300 |
synthl[synth_width - 1] -= (2*synthl[synth_width - 2] + 1) >> 1; |
167 |
|
|
|
168 |
|
|
/* Lifting stage 1. */ |
169 |
|
24300 |
synthl[0] += (2*synthl[1] + 2) >> 2; |
170 |
✓✓ |
1995300 |
for (x = 1; x < width - 1; x++) |
171 |
|
1971000 |
synthl[2 * x] += (synthl[2 * x - 1] + synthl[2 * x + 1] + 2) >> 2; |
172 |
|
|
|
173 |
|
24300 |
synthl[synth_width - 2] += (synthl[synth_width - 3] + synthl[synth_width - 1] + 2) >> 2; |
174 |
|
|
|
175 |
|
24300 |
synthl += synth_width; |
176 |
|
|
} |
177 |
|
|
|
178 |
|
|
/* Vertical synthesis: Lifting stage 2. */ |
179 |
|
180 |
synthl = synth + synth_width; |
180 |
✓✓ |
19980 |
for (x = 0; x < synth_width; x++) |
181 |
|
19800 |
synthl[x] -= (synthl[x - synth_width] + synthl[x + synth_width] + 1) >> 1; |
182 |
|
|
|
183 |
|
180 |
synthl = synth + (synth_width << 1); |
184 |
✓✓ |
11970 |
for (y = 1; y < height - 1; y++) { |
185 |
✓✓ |
1991790 |
for (x = 0; x < synth_width; x++) |
186 |
|
1980000 |
synthl[x + synth_width] -= (synthl[x] + synthl[x + synth_width * 2] + 1) >> 1; |
187 |
|
11790 |
synthl += (synth_width << 1); |
188 |
|
|
} |
189 |
|
|
|
190 |
|
180 |
synthl = synth + (synth_height - 1) * synth_width; |
191 |
✓✓ |
19980 |
for (x = 0; x < synth_width; x++) |
192 |
|
19800 |
synthl[x] -= (2*synthl[x - synth_width] + 1) >> 1; |
193 |
|
|
|
194 |
|
|
/* Vertical synthesis: Lifting stage 1. */ |
195 |
|
180 |
synthl = synth; |
196 |
✓✓ |
19980 |
for (x = 0; x < synth_width; x++) |
197 |
|
19800 |
synthl[x] += (2*synthl[synth_width + x] + 2) >> 2; |
198 |
|
|
|
199 |
|
180 |
synthl = synth + (synth_width << 1); |
200 |
✓✓ |
11970 |
for (y = 1; y < height - 1; y++) { |
201 |
✓✓ |
1991790 |
for (x = 0; x < synth_width; x++) |
202 |
|
1980000 |
synthl[x] += (synthl[x + synth_width] + synthl[x - synth_width] + 2) >> 2; |
203 |
|
11790 |
synthl += (synth_width << 1); |
204 |
|
|
} |
205 |
|
|
|
206 |
|
180 |
synthl = synth + (synth_height - 2)*synth_width; |
207 |
✓✓ |
19980 |
for (x = 0; x < synth_width; x++) |
208 |
|
19800 |
synthl[x] += (synthl[x - synth_width] + synthl[x + synth_width] + 2) >> 2; |
209 |
|
|
|
210 |
|
|
|
211 |
|
180 |
deinterleave(data, stride, width, height, synth); |
212 |
|
180 |
} |
213 |
|
|
|
214 |
|
180 |
static av_always_inline void dwt_haar(VC2TransformContext *t, dwtcoef *data, |
215 |
|
|
ptrdiff_t stride, int width, int height, |
216 |
|
|
const int s) |
217 |
|
|
{ |
218 |
|
|
int x, y; |
219 |
|
180 |
dwtcoef *synth = t->buffer, *synthl = synth, *datal = data; |
220 |
|
180 |
const ptrdiff_t synth_width = width << 1; |
221 |
|
180 |
const ptrdiff_t synth_height = height << 1; |
222 |
|
|
|
223 |
|
|
/* Horizontal synthesis. */ |
224 |
✓✓ |
24480 |
for (y = 0; y < synth_height; y++) { |
225 |
✓✓ |
2043900 |
for (x = 0; x < synth_width; x += 2) { |
226 |
|
2019600 |
synthl[y*synth_width + x + 1] = (datal[y*stride + x + 1] << s) - |
227 |
|
2019600 |
(datal[y*stride + x] << s); |
228 |
|
2019600 |
synthl[y*synth_width + x] = (datal[y*stride + x + 0] << s) + |
229 |
|
2019600 |
((synthl[y*synth_width + x + 1] + 1) >> 1); |
230 |
|
|
} |
231 |
|
|
} |
232 |
|
|
|
233 |
|
|
/* Vertical synthesis. */ |
234 |
✓✓ |
19980 |
for (x = 0; x < synth_width; x++) { |
235 |
✓✓ |
2039400 |
for (y = 0; y < synth_height; y += 2) { |
236 |
|
2019600 |
synthl[(y + 1)*synth_width + x] = synthl[(y + 1)*synth_width + x] - |
237 |
|
2019600 |
synthl[y*synth_width + x]; |
238 |
|
2019600 |
synthl[y*synth_width + x] = synthl[y*synth_width + x] + |
239 |
|
2019600 |
((synthl[(y + 1)*synth_width + x] + 1) >> 1); |
240 |
|
|
} |
241 |
|
|
} |
242 |
|
|
|
243 |
|
180 |
deinterleave(data, stride, width, height, synth); |
244 |
|
180 |
} |
245 |
|
|
|
246 |
|
|
static void vc2_subband_dwt_haar(VC2TransformContext *t, dwtcoef *data, |
247 |
|
|
ptrdiff_t stride, int width, int height) |
248 |
|
|
{ |
249 |
|
|
dwt_haar(t, data, stride, width, height, 0); |
250 |
|
|
} |
251 |
|
|
|
252 |
|
180 |
static void vc2_subband_dwt_haar_shift(VC2TransformContext *t, dwtcoef *data, |
253 |
|
|
ptrdiff_t stride, int width, int height) |
254 |
|
|
{ |
255 |
|
180 |
dwt_haar(t, data, stride, width, height, 1); |
256 |
|
180 |
} |
257 |
|
|
|
258 |
|
99 |
av_cold int ff_vc2enc_init_transforms(VC2TransformContext *s, int p_stride, |
259 |
|
|
int p_height, int slice_w, int slice_h) |
260 |
|
|
{ |
261 |
|
99 |
s->vc2_subband_dwt[VC2_TRANSFORM_9_7] = vc2_subband_dwt_97; |
262 |
|
99 |
s->vc2_subband_dwt[VC2_TRANSFORM_5_3] = vc2_subband_dwt_53; |
263 |
|
99 |
s->vc2_subband_dwt[VC2_TRANSFORM_HAAR] = vc2_subband_dwt_haar; |
264 |
|
99 |
s->vc2_subband_dwt[VC2_TRANSFORM_HAAR_S] = vc2_subband_dwt_haar_shift; |
265 |
|
|
|
266 |
|
|
/* Pad by the slice size, only matters for non-Haar wavelets */ |
267 |
|
99 |
s->buffer = av_calloc((p_stride + slice_w)*(p_height + slice_h), sizeof(dwtcoef)); |
268 |
✗✓ |
99 |
if (!s->buffer) |
269 |
|
|
return 1; |
270 |
|
|
|
271 |
|
99 |
s->padding = (slice_h >> 1)*p_stride + (slice_w >> 1); |
272 |
|
99 |
s->buffer += s->padding; |
273 |
|
|
|
274 |
|
99 |
return 0; |
275 |
|
|
} |
276 |
|
|
|
277 |
|
99 |
av_cold void ff_vc2enc_free_transforms(VC2TransformContext *s) |
278 |
|
|
{ |
279 |
|
99 |
av_free(s->buffer - s->padding); |
280 |
|
99 |
s->buffer = NULL; |
281 |
|
99 |
} |