FFmpeg coverage


Directory: ../../../ffmpeg/
File: src/libswscale/lut3d.c
Date: 2025-01-20 09:27:23
Exec Total Coverage
Lines: 0 150 0.0%
Functions: 0 14 0.0%
Branches: 0 30 0.0%

Line Branch Exec Source
1 /*
2 * Copyright (C) 2024 Niklas Haas
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <assert.h>
22 #include <string.h>
23
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/mem.h"
27
28 #include "cms.h"
29 #include "csputils.h"
30 #include "lut3d.h"
31
32 SwsLut3D *ff_sws_lut3d_alloc(void)
33 {
34 SwsLut3D *lut3d = av_malloc(sizeof(*lut3d));
35 if (!lut3d)
36 return NULL;
37
38 lut3d->dynamic = false;
39 return lut3d;
40 }
41
42 void ff_sws_lut3d_free(SwsLut3D **plut3d)
43 {
44 av_freep(plut3d);
45 }
46
47 bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output)
48 {
49 return fmt == AV_PIX_FMT_RGBA64;
50 }
51
52 enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output)
53 {
54 return AV_PIX_FMT_RGBA64;
55 }
56
57 /**
58 * v0 and v1 are 'black' and 'white'
59 * v2 and v3 are closest RGB/CMY vertices
60 * x >= y >= z are relative weights
61 */
62 static av_always_inline
63 v3u16_t barycentric(int shift, int x, int y, int z,
64 v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3)
65 {
66 const int a = (1 << shift) - x;
67 const int b = x - y;
68 const int c = y - z;
69 const int d = z;
70 av_assert2(x >= y);
71 av_assert2(y >= z);
72
73 return (v3u16_t) {
74 (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift,
75 (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift,
76 (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift,
77 };
78 }
79
80 static av_always_inline
81 v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx,
82 int Rf, int Gf, int Bf)
83 {
84 const int shift = 16 - INPUT_LUT_BITS;
85 const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1);
86 const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1);
87 const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1);
88
89 const v3u16_t c000 = lut3d->input[Bx][Gx][Rx];
90 const v3u16_t c111 = lut3d->input[Bn][Gn][Rn];
91 if (Rf > Gf) {
92 if (Gf > Bf) {
93 const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
94 const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
95 return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111);
96 } else if (Rf > Bf) {
97 const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
98 const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
99 return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111);
100 } else {
101 const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
102 const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
103 return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111);
104 }
105 } else {
106 if (Bf > Gf) {
107 const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
108 const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
109 return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111);
110 } else if (Bf > Rf) {
111 const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
112 const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
113 return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111);
114 } else {
115 const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
116 const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
117 return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111);
118 }
119 }
120 }
121
122 static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb)
123 {
124 const int shift = 16 - INPUT_LUT_BITS;
125 const int Rx = rgb.x >> shift;
126 const int Gx = rgb.y >> shift;
127 const int Bx = rgb.z >> shift;
128 const int Rf = rgb.x & ((1 << shift) - 1);
129 const int Gf = rgb.y & ((1 << shift) - 1);
130 const int Bf = rgb.z & ((1 << shift) - 1);
131 return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
132 }
133
134 static av_always_inline v3u16_t lookup_input8(const SwsLut3D *lut3d, v3u8_t rgb)
135 {
136 static_assert(INPUT_LUT_BITS <= 8, "INPUT_LUT_BITS must be <= 8");
137 const int shift = 8 - INPUT_LUT_BITS;
138 const int Rx = rgb.x >> shift;
139 const int Gx = rgb.y >> shift;
140 const int Bx = rgb.z >> shift;
141 const int Rf = rgb.x & ((1 << shift) - 1);
142 const int Gf = rgb.y & ((1 << shift) - 1);
143 const int Bf = rgb.z & ((1 << shift) - 1);
144 return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
145 }
146
147 /**
148 * Note: These functions are scaled such that x == (1 << shift) corresponds to
149 * a value of 1.0. This makes them suitable for use when interpolation LUT
150 * entries with a fractional part that is just masked away from the index,
151 * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of
152 * just slightly *less* than 1.0.
153 */
154 static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift)
155 {
156 const int xi = (1 << shift) - x;
157 return (v2u16_t) {
158 (a.x * xi + b.x * x) >> shift,
159 (a.y * xi + b.y * x) >> shift,
160 };
161 }
162
163 static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift)
164 {
165 const int xi = (1 << shift) - x;
166 return (v3u16_t) {
167 (a.x * xi + b.x * x) >> shift,
168 (a.y * xi + b.y * x) >> shift,
169 (a.z * xi + b.z * x) >> shift,
170 };
171 }
172
173 static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt)
174 {
175 const int Ishift = 16 - OUTPUT_LUT_BITS_I;
176 const int Cshift = 16 - OUTPUT_LUT_BITS_PT;
177 const int Ix = ipt.x >> Ishift;
178 const int Px = ipt.y >> Cshift;
179 const int Tx = ipt.z >> Cshift;
180 const int If = ipt.x & ((1 << Ishift) - 1);
181 const int Pf = ipt.y & ((1 << Cshift) - 1);
182 const int Tf = ipt.z & ((1 << Cshift) - 1);
183 const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I - 1);
184 const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1);
185 const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1);
186
187 /* Trilinear interpolation */
188 const v3u16_t c000 = lut3d->output[Tx][Px][Ix];
189 const v3u16_t c001 = lut3d->output[Tx][Px][In];
190 const v3u16_t c010 = lut3d->output[Tx][Pn][Ix];
191 const v3u16_t c011 = lut3d->output[Tx][Pn][In];
192 const v3u16_t c100 = lut3d->output[Tn][Px][Ix];
193 const v3u16_t c101 = lut3d->output[Tn][Px][In];
194 const v3u16_t c110 = lut3d->output[Tn][Pn][Ix];
195 const v3u16_t c111 = lut3d->output[Tn][Pn][In];
196 const v3u16_t c00 = lerp3u16(c000, c100, Tf, Cshift);
197 const v3u16_t c10 = lerp3u16(c010, c110, Tf, Cshift);
198 const v3u16_t c01 = lerp3u16(c001, c101, Tf, Cshift);
199 const v3u16_t c11 = lerp3u16(c011, c111, Tf, Cshift);
200 const v3u16_t c0 = lerp3u16(c00, c10, Pf, Cshift);
201 const v3u16_t c1 = lerp3u16(c01, c11, Pf, Cshift);
202 const v3u16_t c = lerp3u16(c0, c1, If, Ishift);
203 return c;
204 }
205
206 static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt)
207 {
208 const int shift = 16 - TONE_LUT_BITS;
209 const int Ix = ipt.x >> shift;
210 const int If = ipt.x & ((1 << shift) - 1);
211 const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1);
212
213 const v2u16_t w0 = lut3d->tone_map[Ix];
214 const v2u16_t w1 = lut3d->tone_map[In];
215 const v2u16_t w = lerp2u16(w0, w1, If, shift);
216 const int base = (1 << 15) - w.y;
217
218 ipt.x = w.x;
219 ipt.y = base + (ipt.y * w.y >> 15);
220 ipt.z = base + (ipt.z * w.y >> 15);
221 return ipt;
222 }
223
224 int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in,
225 enum AVPixelFormat fmt_out, const SwsColorMap *map)
226 {
227 int ret;
228
229 if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1))
230 return AVERROR(EINVAL);
231
232 lut3d->dynamic = map->src.frame_peak.num > 0;
233 lut3d->map = *map;
234
235 if (lut3d->dynamic) {
236 ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0],
237 &lut3d->output[0][0][0],
238 INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I,
239 OUTPUT_LUT_SIZE_PT, map);
240 if (ret < 0)
241 return ret;
242
243 /* Make sure initial state is valid */
244 ff_sws_lut3d_update(lut3d, &map->src);
245 return 0;
246 } else {
247 return ff_sws_color_map_generate_static(&lut3d->input[0][0][0],
248 INPUT_LUT_SIZE, map);
249 }
250 }
251
252 void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src)
253 {
254 if (!new_src || !lut3d->dynamic)
255 return;
256
257 lut3d->map.src.frame_peak = new_src->frame_peak;
258 lut3d->map.src.frame_avg = new_src->frame_avg;
259
260 ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map);
261 }
262
263 void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride,
264 uint8_t *out, int out_stride, int w, int h)
265 {
266 while (h--) {
267 const uint16_t *in16 = (const uint16_t *) in;
268 uint16_t *out16 = (uint16_t *) out;
269
270 for (int x = 0; x < w; x++) {
271 v3u16_t c = { in16[0], in16[1], in16[2] };
272 c = lookup_input16(lut3d, c);
273
274 if (lut3d->dynamic) {
275 c = apply_tone_map(lut3d, c);
276 c = lookup_output(lut3d, c);
277 }
278
279 out16[0] = c.x;
280 out16[1] = c.y;
281 out16[2] = c.z;
282 out16[3] = in16[3];
283 in16 += 4;
284 out16 += 4;
285 }
286
287 in += in_stride;
288 out += out_stride;
289 }
290 }
291