Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (C) 2024 Niklas Haas |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include <assert.h> |
22 |
|
|
#include <string.h> |
23 |
|
|
|
24 |
|
|
#include "libavutil/attributes.h" |
25 |
|
|
#include "libavutil/avassert.h" |
26 |
|
|
#include "libavutil/mem.h" |
27 |
|
|
|
28 |
|
|
#include "cms.h" |
29 |
|
|
#include "csputils.h" |
30 |
|
|
#include "lut3d.h" |
31 |
|
|
|
32 |
|
✗ |
SwsLut3D *ff_sws_lut3d_alloc(void) |
33 |
|
|
{ |
34 |
|
✗ |
SwsLut3D *lut3d = av_malloc(sizeof(*lut3d)); |
35 |
|
✗ |
if (!lut3d) |
36 |
|
✗ |
return NULL; |
37 |
|
|
|
38 |
|
✗ |
lut3d->dynamic = false; |
39 |
|
✗ |
return lut3d; |
40 |
|
|
} |
41 |
|
|
|
42 |
|
✗ |
void ff_sws_lut3d_free(SwsLut3D **plut3d) |
43 |
|
|
{ |
44 |
|
✗ |
av_freep(plut3d); |
45 |
|
✗ |
} |
46 |
|
|
|
47 |
|
✗ |
bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output) |
48 |
|
|
{ |
49 |
|
✗ |
return fmt == AV_PIX_FMT_RGBA64; |
50 |
|
|
} |
51 |
|
|
|
52 |
|
✗ |
enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output) |
53 |
|
|
{ |
54 |
|
✗ |
return AV_PIX_FMT_RGBA64; |
55 |
|
|
} |
56 |
|
|
|
57 |
|
|
/** |
58 |
|
|
* v0 and v1 are 'black' and 'white' |
59 |
|
|
* v2 and v3 are closest RGB/CMY vertices |
60 |
|
|
* x >= y >= z are relative weights |
61 |
|
|
*/ |
62 |
|
|
static av_always_inline |
63 |
|
✗ |
v3u16_t barycentric(int shift, int x, int y, int z, |
64 |
|
|
v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3) |
65 |
|
|
{ |
66 |
|
✗ |
const int a = (1 << shift) - x; |
67 |
|
✗ |
const int b = x - y; |
68 |
|
✗ |
const int c = y - z; |
69 |
|
✗ |
const int d = z; |
70 |
|
|
av_assert2(x >= y); |
71 |
|
|
av_assert2(y >= z); |
72 |
|
|
|
73 |
|
✗ |
return (v3u16_t) { |
74 |
|
✗ |
(a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift, |
75 |
|
✗ |
(a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift, |
76 |
|
✗ |
(a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift, |
77 |
|
|
}; |
78 |
|
|
} |
79 |
|
|
|
80 |
|
|
static av_always_inline |
81 |
|
✗ |
v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx, |
82 |
|
|
int Rf, int Gf, int Bf) |
83 |
|
|
{ |
84 |
|
✗ |
const int shift = 16 - INPUT_LUT_BITS; |
85 |
|
✗ |
const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1); |
86 |
|
✗ |
const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1); |
87 |
|
✗ |
const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1); |
88 |
|
|
|
89 |
|
✗ |
const v3u16_t c000 = lut3d->input[Bx][Gx][Rx]; |
90 |
|
✗ |
const v3u16_t c111 = lut3d->input[Bn][Gn][Rn]; |
91 |
|
✗ |
if (Rf > Gf) { |
92 |
|
✗ |
if (Gf > Bf) { |
93 |
|
✗ |
const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; |
94 |
|
✗ |
const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; |
95 |
|
✗ |
return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111); |
96 |
|
✗ |
} else if (Rf > Bf) { |
97 |
|
✗ |
const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; |
98 |
|
✗ |
const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; |
99 |
|
✗ |
return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111); |
100 |
|
|
} else { |
101 |
|
✗ |
const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; |
102 |
|
✗ |
const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; |
103 |
|
✗ |
return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111); |
104 |
|
|
} |
105 |
|
|
} else { |
106 |
|
✗ |
if (Bf > Gf) { |
107 |
|
✗ |
const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; |
108 |
|
✗ |
const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; |
109 |
|
✗ |
return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111); |
110 |
|
✗ |
} else if (Bf > Rf) { |
111 |
|
✗ |
const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; |
112 |
|
✗ |
const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; |
113 |
|
✗ |
return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111); |
114 |
|
|
} else { |
115 |
|
✗ |
const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; |
116 |
|
✗ |
const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; |
117 |
|
✗ |
return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111); |
118 |
|
|
} |
119 |
|
|
} |
120 |
|
|
} |
121 |
|
|
|
122 |
|
✗ |
static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb) |
123 |
|
|
{ |
124 |
|
✗ |
const int shift = 16 - INPUT_LUT_BITS; |
125 |
|
✗ |
const int Rx = rgb.x >> shift; |
126 |
|
✗ |
const int Gx = rgb.y >> shift; |
127 |
|
✗ |
const int Bx = rgb.z >> shift; |
128 |
|
✗ |
const int Rf = rgb.x & ((1 << shift) - 1); |
129 |
|
✗ |
const int Gf = rgb.y & ((1 << shift) - 1); |
130 |
|
✗ |
const int Bf = rgb.z & ((1 << shift) - 1); |
131 |
|
✗ |
return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); |
132 |
|
|
} |
133 |
|
|
|
134 |
|
|
static av_always_inline v3u16_t lookup_input8(const SwsLut3D *lut3d, v3u8_t rgb) |
135 |
|
|
{ |
136 |
|
|
static_assert(INPUT_LUT_BITS <= 8, "INPUT_LUT_BITS must be <= 8"); |
137 |
|
|
const int shift = 8 - INPUT_LUT_BITS; |
138 |
|
|
const int Rx = rgb.x >> shift; |
139 |
|
|
const int Gx = rgb.y >> shift; |
140 |
|
|
const int Bx = rgb.z >> shift; |
141 |
|
|
const int Rf = rgb.x & ((1 << shift) - 1); |
142 |
|
|
const int Gf = rgb.y & ((1 << shift) - 1); |
143 |
|
|
const int Bf = rgb.z & ((1 << shift) - 1); |
144 |
|
|
return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); |
145 |
|
|
} |
146 |
|
|
|
147 |
|
|
/** |
148 |
|
|
* Note: These functions are scaled such that x == (1 << shift) corresponds to |
149 |
|
|
* a value of 1.0. This makes them suitable for use when interpolation LUT |
150 |
|
|
* entries with a fractional part that is just masked away from the index, |
151 |
|
|
* since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of |
152 |
|
|
* just slightly *less* than 1.0. |
153 |
|
|
*/ |
154 |
|
✗ |
static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift) |
155 |
|
|
{ |
156 |
|
✗ |
const int xi = (1 << shift) - x; |
157 |
|
✗ |
return (v2u16_t) { |
158 |
|
✗ |
(a.x * xi + b.x * x) >> shift, |
159 |
|
✗ |
(a.y * xi + b.y * x) >> shift, |
160 |
|
|
}; |
161 |
|
|
} |
162 |
|
|
|
163 |
|
✗ |
static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift) |
164 |
|
|
{ |
165 |
|
✗ |
const int xi = (1 << shift) - x; |
166 |
|
✗ |
return (v3u16_t) { |
167 |
|
✗ |
(a.x * xi + b.x * x) >> shift, |
168 |
|
✗ |
(a.y * xi + b.y * x) >> shift, |
169 |
|
✗ |
(a.z * xi + b.z * x) >> shift, |
170 |
|
|
}; |
171 |
|
|
} |
172 |
|
|
|
173 |
|
✗ |
static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt) |
174 |
|
|
{ |
175 |
|
✗ |
const int Ishift = 16 - OUTPUT_LUT_BITS_I; |
176 |
|
✗ |
const int Cshift = 16 - OUTPUT_LUT_BITS_PT; |
177 |
|
✗ |
const int Ix = ipt.x >> Ishift; |
178 |
|
✗ |
const int Px = ipt.y >> Cshift; |
179 |
|
✗ |
const int Tx = ipt.z >> Cshift; |
180 |
|
✗ |
const int If = ipt.x & ((1 << Ishift) - 1); |
181 |
|
✗ |
const int Pf = ipt.y & ((1 << Cshift) - 1); |
182 |
|
✗ |
const int Tf = ipt.z & ((1 << Cshift) - 1); |
183 |
|
✗ |
const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I - 1); |
184 |
|
✗ |
const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1); |
185 |
|
✗ |
const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1); |
186 |
|
|
|
187 |
|
|
/* Trilinear interpolation */ |
188 |
|
✗ |
const v3u16_t c000 = lut3d->output[Tx][Px][Ix]; |
189 |
|
✗ |
const v3u16_t c001 = lut3d->output[Tx][Px][In]; |
190 |
|
✗ |
const v3u16_t c010 = lut3d->output[Tx][Pn][Ix]; |
191 |
|
✗ |
const v3u16_t c011 = lut3d->output[Tx][Pn][In]; |
192 |
|
✗ |
const v3u16_t c100 = lut3d->output[Tn][Px][Ix]; |
193 |
|
✗ |
const v3u16_t c101 = lut3d->output[Tn][Px][In]; |
194 |
|
✗ |
const v3u16_t c110 = lut3d->output[Tn][Pn][Ix]; |
195 |
|
✗ |
const v3u16_t c111 = lut3d->output[Tn][Pn][In]; |
196 |
|
✗ |
const v3u16_t c00 = lerp3u16(c000, c100, Tf, Cshift); |
197 |
|
✗ |
const v3u16_t c10 = lerp3u16(c010, c110, Tf, Cshift); |
198 |
|
✗ |
const v3u16_t c01 = lerp3u16(c001, c101, Tf, Cshift); |
199 |
|
✗ |
const v3u16_t c11 = lerp3u16(c011, c111, Tf, Cshift); |
200 |
|
✗ |
const v3u16_t c0 = lerp3u16(c00, c10, Pf, Cshift); |
201 |
|
✗ |
const v3u16_t c1 = lerp3u16(c01, c11, Pf, Cshift); |
202 |
|
✗ |
const v3u16_t c = lerp3u16(c0, c1, If, Ishift); |
203 |
|
✗ |
return c; |
204 |
|
|
} |
205 |
|
|
|
206 |
|
✗ |
static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt) |
207 |
|
|
{ |
208 |
|
✗ |
const int shift = 16 - TONE_LUT_BITS; |
209 |
|
✗ |
const int Ix = ipt.x >> shift; |
210 |
|
✗ |
const int If = ipt.x & ((1 << shift) - 1); |
211 |
|
✗ |
const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1); |
212 |
|
|
|
213 |
|
✗ |
const v2u16_t w0 = lut3d->tone_map[Ix]; |
214 |
|
✗ |
const v2u16_t w1 = lut3d->tone_map[In]; |
215 |
|
✗ |
const v2u16_t w = lerp2u16(w0, w1, If, shift); |
216 |
|
✗ |
const int base = (1 << 15) - w.y; |
217 |
|
|
|
218 |
|
✗ |
ipt.x = w.x; |
219 |
|
✗ |
ipt.y = base + (ipt.y * w.y >> 15); |
220 |
|
✗ |
ipt.z = base + (ipt.z * w.y >> 15); |
221 |
|
✗ |
return ipt; |
222 |
|
|
} |
223 |
|
|
|
224 |
|
✗ |
int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in, |
225 |
|
|
enum AVPixelFormat fmt_out, const SwsColorMap *map) |
226 |
|
|
{ |
227 |
|
|
int ret; |
228 |
|
|
|
229 |
|
✗ |
if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1)) |
230 |
|
✗ |
return AVERROR(EINVAL); |
231 |
|
|
|
232 |
|
✗ |
lut3d->dynamic = map->src.frame_peak.num > 0; |
233 |
|
✗ |
lut3d->map = *map; |
234 |
|
|
|
235 |
|
✗ |
if (lut3d->dynamic) { |
236 |
|
✗ |
ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0], |
237 |
|
|
&lut3d->output[0][0][0], |
238 |
|
|
INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I, |
239 |
|
|
OUTPUT_LUT_SIZE_PT, map); |
240 |
|
✗ |
if (ret < 0) |
241 |
|
✗ |
return ret; |
242 |
|
|
|
243 |
|
|
/* Make sure initial state is valid */ |
244 |
|
✗ |
ff_sws_lut3d_update(lut3d, &map->src); |
245 |
|
✗ |
return 0; |
246 |
|
|
} else { |
247 |
|
✗ |
return ff_sws_color_map_generate_static(&lut3d->input[0][0][0], |
248 |
|
|
INPUT_LUT_SIZE, map); |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
|
252 |
|
✗ |
void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src) |
253 |
|
|
{ |
254 |
|
✗ |
if (!new_src || !lut3d->dynamic) |
255 |
|
✗ |
return; |
256 |
|
|
|
257 |
|
✗ |
lut3d->map.src.frame_peak = new_src->frame_peak; |
258 |
|
✗ |
lut3d->map.src.frame_avg = new_src->frame_avg; |
259 |
|
|
|
260 |
|
✗ |
ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map); |
261 |
|
|
} |
262 |
|
|
|
263 |
|
✗ |
void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride, |
264 |
|
|
uint8_t *out, int out_stride, int w, int h) |
265 |
|
|
{ |
266 |
|
✗ |
while (h--) { |
267 |
|
✗ |
const uint16_t *in16 = (const uint16_t *) in; |
268 |
|
✗ |
uint16_t *out16 = (uint16_t *) out; |
269 |
|
|
|
270 |
|
✗ |
for (int x = 0; x < w; x++) { |
271 |
|
✗ |
v3u16_t c = { in16[0], in16[1], in16[2] }; |
272 |
|
✗ |
c = lookup_input16(lut3d, c); |
273 |
|
|
|
274 |
|
✗ |
if (lut3d->dynamic) { |
275 |
|
✗ |
c = apply_tone_map(lut3d, c); |
276 |
|
✗ |
c = lookup_output(lut3d, c); |
277 |
|
|
} |
278 |
|
|
|
279 |
|
✗ |
out16[0] = c.x; |
280 |
|
✗ |
out16[1] = c.y; |
281 |
|
✗ |
out16[2] = c.z; |
282 |
|
✗ |
out16[3] = in16[3]; |
283 |
|
✗ |
in16 += 4; |
284 |
|
✗ |
out16 += 4; |
285 |
|
|
} |
286 |
|
|
|
287 |
|
✗ |
in += in_stride; |
288 |
|
✗ |
out += out_stride; |
289 |
|
|
} |
290 |
|
✗ |
} |
291 |
|
|
|