Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | * Copyright (C) 2024 Niklas Haas | ||
3 | * | ||
4 | * This file is part of FFmpeg. | ||
5 | * | ||
6 | * FFmpeg is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU Lesser General Public | ||
8 | * License as published by the Free Software Foundation; either | ||
9 | * version 2.1 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * FFmpeg is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * Lesser General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU Lesser General Public | ||
17 | * License along with FFmpeg; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | */ | ||
20 | |||
21 | #include <math.h> | ||
22 | #include <string.h> | ||
23 | |||
24 | #include "libavutil/attributes.h" | ||
25 | #include "libavutil/avassert.h" | ||
26 | #include "libavutil/csp.h" | ||
27 | #include "libavutil/slicethread.h" | ||
28 | |||
29 | #include "cms.h" | ||
30 | #include "csputils.h" | ||
31 | #include "libswscale/swscale.h" | ||
32 | #include "utils.h" | ||
33 | |||
34 | 5508 | bool ff_sws_color_map_noop(const SwsColorMap *map) | |
35 | { | ||
36 | /* If the encoding space is different, we must go through a conversion */ | ||
37 |
2/4✓ Branch 0 taken 5508 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5508 times.
|
5508 | if (map->src.prim != map->dst.prim || map->src.trc != map->dst.trc) |
38 | ✗ | return false; | |
39 | |||
40 | /* If the black point changes, we have to perform black point compensation */ | ||
41 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5508 times.
|
5508 | if (av_cmp_q(map->src.min_luma, map->dst.min_luma)) |
42 | ✗ | return false; | |
43 | |||
44 |
1/3✓ Branch 0 taken 5508 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
5508 | switch (map->intent) { |
45 | 5508 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: | |
46 | case SWS_INTENT_RELATIVE_COLORIMETRIC: | ||
47 |
2/4✓ Branch 1 taken 5508 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5508 times.
✗ Branch 4 not taken.
|
11016 | return ff_prim_superset(&map->dst.gamut, &map->src.gamut) && |
48 | 5508 | av_cmp_q(map->src.max_luma, map->dst.max_luma) <= 0; | |
49 | ✗ | case SWS_INTENT_PERCEPTUAL: | |
50 | case SWS_INTENT_SATURATION: | ||
51 | ✗ | return ff_prim_equal(&map->dst.gamut, &map->src.gamut) && | |
52 | ✗ | !av_cmp_q(map->src.max_luma, map->dst.max_luma); | |
53 | ✗ | default: | |
54 | ✗ | av_assert0(!"Invalid gamut mapping intent?"); | |
55 | return true; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | /* Approximation of gamut hull at a given intensity level */ | ||
60 | ✗ | static const float hull(float I) | |
61 | { | ||
62 | ✗ | return ((I - 6.0f) * I + 9.0f) * I; | |
63 | } | ||
64 | |||
65 | /* For some minimal type safety, and code cleanliness */ | ||
66 | typedef struct RGB { | ||
67 | float R, G, B; /* nits */ | ||
68 | } RGB; | ||
69 | |||
70 | typedef struct IPT { | ||
71 | float I, P, T; | ||
72 | } IPT; | ||
73 | |||
74 | typedef struct ICh { | ||
75 | float I, C, h; | ||
76 | } ICh; | ||
77 | |||
78 | ✗ | static av_always_inline ICh ipt2ich(IPT c) | |
79 | { | ||
80 | ✗ | return (ICh) { | |
81 | ✗ | .I = c.I, | |
82 | ✗ | .C = sqrtf(c.P * c.P + c.T * c.T), | |
83 | ✗ | .h = atan2f(c.T, c.P), | |
84 | }; | ||
85 | } | ||
86 | |||
87 | ✗ | static av_always_inline IPT ich2ipt(ICh c) | |
88 | { | ||
89 | ✗ | return (IPT) { | |
90 | ✗ | .I = c.I, | |
91 | ✗ | .P = c.C * cosf(c.h), | |
92 | ✗ | .T = c.C * sinf(c.h), | |
93 | }; | ||
94 | } | ||
95 | |||
96 | /* Helper struct containing pre-computed cached values describing a gamut */ | ||
97 | typedef struct Gamut { | ||
98 | SwsMatrix3x3 encoding2lms; | ||
99 | SwsMatrix3x3 lms2encoding; | ||
100 | SwsMatrix3x3 lms2content; | ||
101 | SwsMatrix3x3 content2lms; | ||
102 | av_csp_eotf_function eotf; | ||
103 | av_csp_eotf_function eotf_inv; | ||
104 | float Iavg_frame; | ||
105 | float Imax_frame; | ||
106 | float Imin, Imax; | ||
107 | float Lb, Lw; | ||
108 | AVCIExy wp; | ||
109 | ICh peak; /* updated as needed in loop body when hue changes */ | ||
110 | } Gamut; | ||
111 | |||
112 | ✗ | static Gamut gamut_from_colorspace(SwsColor fmt) | |
113 | { | ||
114 | ✗ | const AVColorPrimariesDesc *encoding = av_csp_primaries_desc_from_id(fmt.prim); | |
115 | ✗ | const AVColorPrimariesDesc content = { | |
116 | .prim = fmt.gamut, | ||
117 | .wp = encoding->wp, | ||
118 | }; | ||
119 | |||
120 | ✗ | const float Lw = av_q2d(fmt.max_luma), Lb = av_q2d(fmt.min_luma); | |
121 | ✗ | const float Imax = pq_oetf(Lw); | |
122 | |||
123 | ✗ | return (Gamut) { | |
124 | ✗ | .encoding2lms = ff_sws_ipt_rgb2lms(encoding), | |
125 | ✗ | .lms2encoding = ff_sws_ipt_lms2rgb(encoding), | |
126 | ✗ | .lms2content = ff_sws_ipt_lms2rgb(&content), | |
127 | ✗ | .content2lms = ff_sws_ipt_rgb2lms(&content), | |
128 | ✗ | .eotf = av_csp_itu_eotf(fmt.trc), | |
129 | ✗ | .eotf_inv = av_csp_itu_eotf_inv(fmt.trc), | |
130 | .wp = encoding->wp, | ||
131 | ✗ | .Imin = pq_oetf(Lb), | |
132 | .Imax = Imax, | ||
133 | ✗ | .Imax_frame = fmt.frame_peak.den ? pq_oetf(av_q2d(fmt.frame_peak)) : Imax, | |
134 | ✗ | .Iavg_frame = fmt.frame_avg.den ? pq_oetf(av_q2d(fmt.frame_avg)) : 0.0f, | |
135 | .Lb = Lb, | ||
136 | .Lw = Lw, | ||
137 | }; | ||
138 | } | ||
139 | |||
140 | ✗ | static av_always_inline IPT rgb2ipt(RGB c, const SwsMatrix3x3 rgb2lms) | |
141 | { | ||
142 | ✗ | const float L = rgb2lms.m[0][0] * c.R + | |
143 | ✗ | rgb2lms.m[0][1] * c.G + | |
144 | ✗ | rgb2lms.m[0][2] * c.B; | |
145 | ✗ | const float M = rgb2lms.m[1][0] * c.R + | |
146 | ✗ | rgb2lms.m[1][1] * c.G + | |
147 | ✗ | rgb2lms.m[1][2] * c.B; | |
148 | ✗ | const float S = rgb2lms.m[2][0] * c.R + | |
149 | ✗ | rgb2lms.m[2][1] * c.G + | |
150 | ✗ | rgb2lms.m[2][2] * c.B; | |
151 | ✗ | const float Lp = pq_oetf(L); | |
152 | ✗ | const float Mp = pq_oetf(M); | |
153 | ✗ | const float Sp = pq_oetf(S); | |
154 | ✗ | return (IPT) { | |
155 | ✗ | .I = 0.4000f * Lp + 0.4000f * Mp + 0.2000f * Sp, | |
156 | ✗ | .P = 4.4550f * Lp - 4.8510f * Mp + 0.3960f * Sp, | |
157 | ✗ | .T = 0.8056f * Lp + 0.3572f * Mp - 1.1628f * Sp, | |
158 | }; | ||
159 | } | ||
160 | |||
161 | ✗ | static av_always_inline RGB ipt2rgb(IPT c, const SwsMatrix3x3 lms2rgb) | |
162 | { | ||
163 | ✗ | const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; | |
164 | ✗ | const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; | |
165 | ✗ | const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; | |
166 | ✗ | const float L = pq_eotf(Lp); | |
167 | ✗ | const float M = pq_eotf(Mp); | |
168 | ✗ | const float S = pq_eotf(Sp); | |
169 | ✗ | return (RGB) { | |
170 | ✗ | .R = lms2rgb.m[0][0] * L + | |
171 | ✗ | lms2rgb.m[0][1] * M + | |
172 | ✗ | lms2rgb.m[0][2] * S, | |
173 | ✗ | .G = lms2rgb.m[1][0] * L + | |
174 | ✗ | lms2rgb.m[1][1] * M + | |
175 | ✗ | lms2rgb.m[1][2] * S, | |
176 | ✗ | .B = lms2rgb.m[2][0] * L + | |
177 | ✗ | lms2rgb.m[2][1] * M + | |
178 | ✗ | lms2rgb.m[2][2] * S, | |
179 | }; | ||
180 | } | ||
181 | |||
182 | ✗ | static inline bool ingamut(IPT c, Gamut gamut) | |
183 | { | ||
184 | ✗ | const float min_rgb = gamut.Lb - 1e-4f; | |
185 | ✗ | const float max_rgb = gamut.Lw + 1e-2f; | |
186 | ✗ | const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T; | |
187 | ✗ | const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T; | |
188 | ✗ | const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T; | |
189 | ✗ | if (Lp < gamut.Imin || Lp > gamut.Imax || | |
190 | ✗ | Mp < gamut.Imin || Mp > gamut.Imax || | |
191 | ✗ | Sp < gamut.Imin || Sp > gamut.Imax) | |
192 | { | ||
193 | /* Values outside legal LMS range */ | ||
194 | ✗ | return false; | |
195 | } else { | ||
196 | ✗ | const float L = pq_eotf(Lp); | |
197 | ✗ | const float M = pq_eotf(Mp); | |
198 | ✗ | const float S = pq_eotf(Sp); | |
199 | ✗ | RGB rgb = { | |
200 | ✗ | .R = gamut.lms2content.m[0][0] * L + | |
201 | ✗ | gamut.lms2content.m[0][1] * M + | |
202 | ✗ | gamut.lms2content.m[0][2] * S, | |
203 | ✗ | .G = gamut.lms2content.m[1][0] * L + | |
204 | ✗ | gamut.lms2content.m[1][1] * M + | |
205 | ✗ | gamut.lms2content.m[1][2] * S, | |
206 | ✗ | .B = gamut.lms2content.m[2][0] * L + | |
207 | ✗ | gamut.lms2content.m[2][1] * M + | |
208 | ✗ | gamut.lms2content.m[2][2] * S, | |
209 | }; | ||
210 | ✗ | return rgb.R >= min_rgb && rgb.R <= max_rgb && | |
211 | ✗ | rgb.G >= min_rgb && rgb.G <= max_rgb && | |
212 | ✗ | rgb.B >= min_rgb && rgb.B <= max_rgb; | |
213 | } | ||
214 | } | ||
215 | |||
216 | static const float maxDelta = 5e-5f; | ||
217 | |||
218 | // Find gamut intersection using specified bounds | ||
219 | static inline ICh | ||
220 | ✗ | desat_bounded(float I, float h, float Cmin, float Cmax, Gamut gamut) | |
221 | { | ||
222 | ✗ | if (I <= gamut.Imin) | |
223 | ✗ | return (ICh) { .I = gamut.Imin, .C = 0, .h = h }; | |
224 | ✗ | else if (I >= gamut.Imax) | |
225 | ✗ | return (ICh) { .I = gamut.Imax, .C = 0, .h = h }; | |
226 | else { | ||
227 | ✗ | const float maxDI = I * maxDelta; | |
228 | ✗ | ICh res = { .I = I, .C = (Cmin + Cmax) / 2, .h = h }; | |
229 | do { | ||
230 | ✗ | if (ingamut(ich2ipt(res), gamut)) { | |
231 | ✗ | Cmin = res.C; | |
232 | } else { | ||
233 | ✗ | Cmax = res.C; | |
234 | } | ||
235 | ✗ | res.C = (Cmin + Cmax) / 2; | |
236 | ✗ | } while (Cmax - Cmin > maxDI); | |
237 | |||
238 | ✗ | return res; | |
239 | } | ||
240 | } | ||
241 | |||
242 | // Finds maximally saturated in-gamut color (for given hue) | ||
243 | ✗ | static inline ICh saturate(float hue, Gamut gamut) | |
244 | { | ||
245 | static const float invphi = 0.6180339887498948f; | ||
246 | static const float invphi2 = 0.38196601125010515f; | ||
247 | |||
248 | ✗ | ICh lo = { .I = gamut.Imin, .h = hue }; | |
249 | ✗ | ICh hi = { .I = gamut.Imax, .h = hue }; | |
250 | ✗ | float de = hi.I - lo.I; | |
251 | ✗ | ICh a = { .I = lo.I + invphi2 * de }; | |
252 | ✗ | ICh b = { .I = lo.I + invphi * de }; | |
253 | ✗ | a = desat_bounded(a.I, hue, 0.0f, 0.5f, gamut); | |
254 | ✗ | b = desat_bounded(b.I, hue, 0.0f, 0.5f, gamut); | |
255 | |||
256 | ✗ | while (de > maxDelta) { | |
257 | ✗ | de *= invphi; | |
258 | ✗ | if (a.C > b.C) { | |
259 | ✗ | hi = b; | |
260 | ✗ | b = a; | |
261 | ✗ | a.I = lo.I + invphi2 * de; | |
262 | ✗ | a = desat_bounded(a.I, hue, lo.C - maxDelta, 0.5f, gamut); | |
263 | } else { | ||
264 | ✗ | lo = a; | |
265 | ✗ | a = b; | |
266 | ✗ | b.I = lo.I + invphi * de; | |
267 | ✗ | b = desat_bounded(b.I, hue, hi.C - maxDelta, 0.5f, gamut); | |
268 | } | ||
269 | } | ||
270 | |||
271 | ✗ | return a.C > b.C ? a : b; | |
272 | } | ||
273 | |||
274 | ✗ | static float softclip(float value, float source, float target) | |
275 | { | ||
276 | ✗ | const float j = SOFTCLIP_KNEE; | |
277 | float peak, x, a, b, scale; | ||
278 | ✗ | if (!target) | |
279 | ✗ | return 0.0f; | |
280 | |||
281 | ✗ | peak = source / target; | |
282 | ✗ | x = fminf(value / target, peak); | |
283 | ✗ | if (x <= j || peak <= 1.0) | |
284 | ✗ | return value; | |
285 | |||
286 | /* Apply simple mobius function */ | ||
287 | ✗ | a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak); | |
288 | ✗ | b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f); | |
289 | ✗ | scale = (b*b + 2.0f * b*j + j*j) / (b - a); | |
290 | |||
291 | ✗ | return scale * (x + a) / (x + b) * target; | |
292 | } | ||
293 | |||
294 | /** | ||
295 | * Something like fmixf(base, c, x) but follows an exponential curve, note | ||
296 | * that this can be used to extend 'c' outwards for x > 1 | ||
297 | */ | ||
298 | ✗ | static inline ICh mix_exp(ICh c, float x, float gamma, float base) | |
299 | { | ||
300 | ✗ | return (ICh) { | |
301 | ✗ | .I = base + (c.I - base) * powf(x, gamma), | |
302 | ✗ | .C = c.C * x, | |
303 | ✗ | .h = c.h, | |
304 | }; | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * Drop gamma for colors approaching black and achromatic to avoid numerical | ||
309 | * instabilities, and excessive brightness boosting of grain, while also | ||
310 | * strongly boosting gamma for values exceeding the target peak | ||
311 | */ | ||
312 | ✗ | static inline float scale_gamma(float gamma, ICh ich, Gamut gamut) | |
313 | { | ||
314 | ✗ | const float Imin = gamut.Imin; | |
315 | ✗ | const float Irel = fmaxf((ich.I - Imin) / (gamut.peak.I - Imin), 0.0f); | |
316 | ✗ | return gamma * powf(Irel, 3) * fminf(ich.C / gamut.peak.C, 1.0f); | |
317 | } | ||
318 | |||
319 | /* Clip a color along the exponential curve given by `gamma` */ | ||
320 | ✗ | static inline IPT clip_gamma(IPT ipt, float gamma, Gamut gamut) | |
321 | { | ||
322 | ✗ | float lo = 0.0f, hi = 1.0f, x = 0.5f; | |
323 | ✗ | const float maxDI = fmaxf(ipt.I * maxDelta, 1e-7f); | |
324 | ICh ich; | ||
325 | |||
326 | ✗ | if (ipt.I <= gamut.Imin) | |
327 | ✗ | return (IPT) { .I = gamut.Imin }; | |
328 | ✗ | if (ingamut(ipt, gamut)) | |
329 | ✗ | return ipt; | |
330 | |||
331 | ✗ | ich = ipt2ich(ipt); | |
332 | ✗ | if (!gamma) | |
333 | ✗ | return ich2ipt(desat_bounded(ich.I, ich.h, 0.0f, ich.C, gamut)); | |
334 | |||
335 | ✗ | gamma = scale_gamma(gamma, ich, gamut); | |
336 | do { | ||
337 | ✗ | ICh test = mix_exp(ich, x, gamma, gamut.peak.I); | |
338 | ✗ | if (ingamut(ich2ipt(test), gamut)) { | |
339 | ✗ | lo = x; | |
340 | } else { | ||
341 | ✗ | hi = x; | |
342 | } | ||
343 | ✗ | x = (lo + hi) / 2.0f; | |
344 | ✗ | } while (hi - lo > maxDI); | |
345 | |||
346 | ✗ | return ich2ipt(mix_exp(ich, x, gamma, gamut.peak.I)); | |
347 | } | ||
348 | |||
349 | typedef struct CmsCtx CmsCtx; | ||
350 | struct CmsCtx { | ||
351 | /* Tone mapping parameters */ | ||
352 | float Qa, Qb, Qc, Pa, Pb, src_knee, dst_knee; /* perceptual */ | ||
353 | float I_scale, I_offset; /* linear methods */ | ||
354 | |||
355 | /* Colorspace parameters */ | ||
356 | Gamut src; | ||
357 | Gamut tmp; /* after tone mapping */ | ||
358 | Gamut dst; | ||
359 | SwsMatrix3x3 adaptation; /* for absolute intent */ | ||
360 | |||
361 | /* Invocation parameters */ | ||
362 | SwsColorMap map; | ||
363 | float (*tone_map)(const CmsCtx *ctx, float I); | ||
364 | IPT (*adapt_colors)(const CmsCtx *ctx, IPT ipt); | ||
365 | v3u16_t *input; | ||
366 | v3u16_t *output; | ||
367 | |||
368 | /* Threading parameters */ | ||
369 | int slice_size; | ||
370 | int size_input; | ||
371 | int size_output_I; | ||
372 | int size_output_PT; | ||
373 | }; | ||
374 | |||
375 | /** | ||
376 | * Helper function to pick a knee point based on the * HDR10+ brightness | ||
377 | * metadata and scene brightness average matching. | ||
378 | * | ||
379 | * Inspired by SMPTE ST2094-10, with some modifications | ||
380 | */ | ||
381 | ✗ | static void st2094_pick_knee(float src_max, float src_min, float src_avg, | |
382 | float dst_max, float dst_min, | ||
383 | float *out_src_knee, float *out_dst_knee) | ||
384 | { | ||
385 | ✗ | const float min_knee = PERCEPTUAL_KNEE_MIN; | |
386 | ✗ | const float max_knee = PERCEPTUAL_KNEE_MAX; | |
387 | ✗ | const float def_knee = PERCEPTUAL_KNEE_DEF; | |
388 | ✗ | const float src_knee_min = fmixf(src_min, src_max, min_knee); | |
389 | ✗ | const float src_knee_max = fmixf(src_min, src_max, max_knee); | |
390 | ✗ | const float dst_knee_min = fmixf(dst_min, dst_max, min_knee); | |
391 | ✗ | const float dst_knee_max = fmixf(dst_min, dst_max, max_knee); | |
392 | float src_knee, target, adapted, tuning, adaptation, dst_knee; | ||
393 | |||
394 | /* Choose source knee based on dynamic source scene brightness */ | ||
395 | ✗ | src_knee = src_avg ? src_avg : fmixf(src_min, src_max, def_knee); | |
396 | ✗ | src_knee = av_clipf(src_knee, src_knee_min, src_knee_max); | |
397 | |||
398 | /* Choose target adaptation point based on linearly re-scaling source knee */ | ||
399 | ✗ | target = (src_knee - src_min) / (src_max - src_min); | |
400 | ✗ | adapted = fmixf(dst_min, dst_max, target); | |
401 | |||
402 | /** | ||
403 | * Choose the destnation knee by picking the perceptual adaptation point | ||
404 | * between the source knee and the desired target. This moves the knee | ||
405 | * point, on the vertical axis, closer to the 1:1 (neutral) line. | ||
406 | * | ||
407 | * Adjust the adaptation strength towards 1 based on how close the knee | ||
408 | * point is to its extreme values (min/max knee) | ||
409 | */ | ||
410 | ✗ | tuning = smoothstepf(max_knee, def_knee, target) * | |
411 | ✗ | smoothstepf(min_knee, def_knee, target); | |
412 | ✗ | adaptation = fmixf(1.0f, PERCEPTUAL_ADAPTATION, tuning); | |
413 | ✗ | dst_knee = fmixf(src_knee, adapted, adaptation); | |
414 | ✗ | dst_knee = av_clipf(dst_knee, dst_knee_min, dst_knee_max); | |
415 | |||
416 | ✗ | *out_src_knee = src_knee; | |
417 | ✗ | *out_dst_knee = dst_knee; | |
418 | ✗ | } | |
419 | |||
420 | ✗ | static void tone_map_setup(CmsCtx *ctx, bool dynamic) | |
421 | { | ||
422 | ✗ | const float dst_min = ctx->dst.Imin; | |
423 | ✗ | const float dst_max = ctx->dst.Imax; | |
424 | ✗ | const float src_min = ctx->src.Imin; | |
425 | ✗ | const float src_max = dynamic ? ctx->src.Imax_frame : ctx->src.Imax; | |
426 | ✗ | const float src_avg = dynamic ? ctx->src.Iavg_frame : 0.0f; | |
427 | float slope, ratio, in_min, in_max, out_min, out_max, t; | ||
428 | |||
429 | ✗ | switch (ctx->map.intent) { | |
430 | ✗ | case SWS_INTENT_PERCEPTUAL: | |
431 | ✗ | st2094_pick_knee(src_max, src_min, src_avg, dst_max, dst_min, | |
432 | &ctx->src_knee, &ctx->dst_knee); | ||
433 | |||
434 | /* Solve for linear knee (Pa = 0) */ | ||
435 | ✗ | slope = (ctx->dst_knee - dst_min) / (ctx->src_knee - src_min); | |
436 | |||
437 | /** | ||
438 | * Tune the slope at the knee point slightly: raise it to a user-provided | ||
439 | * gamma exponent, multiplied by an extra tuning coefficient designed to | ||
440 | * make the slope closer to 1.0 when the difference in peaks is low, and | ||
441 | * closer to linear when the difference between peaks is high. | ||
442 | */ | ||
443 | ✗ | ratio = src_max / dst_max - 1.0f; | |
444 | ✗ | ratio = av_clipf(SLOPE_TUNING * ratio, SLOPE_OFFSET, 1.0f + SLOPE_OFFSET); | |
445 | ✗ | slope = powf(slope, (1.0f - PERCEPTUAL_CONTRAST) * ratio); | |
446 | |||
447 | /* Normalize everything the pivot to make the math easier */ | ||
448 | ✗ | in_min = src_min - ctx->src_knee; | |
449 | ✗ | in_max = src_max - ctx->src_knee; | |
450 | ✗ | out_min = dst_min - ctx->dst_knee; | |
451 | ✗ | out_max = dst_max - ctx->dst_knee; | |
452 | |||
453 | /** | ||
454 | * Solve P of order 2 for: | ||
455 | * P(in_min) = out_min | ||
456 | * P'(0.0) = slope | ||
457 | * P(0.0) = 0.0 | ||
458 | */ | ||
459 | ✗ | ctx->Pa = (out_min - slope * in_min) / (in_min * in_min); | |
460 | ✗ | ctx->Pb = slope; | |
461 | |||
462 | /** | ||
463 | * Solve Q of order 3 for: | ||
464 | * Q(in_max) = out_max | ||
465 | * Q''(in_max) = 0.0 | ||
466 | * Q(0.0) = 0.0 | ||
467 | * Q'(0.0) = slope | ||
468 | */ | ||
469 | ✗ | t = 2 * in_max * in_max; | |
470 | ✗ | ctx->Qa = (slope * in_max - out_max) / (in_max * t); | |
471 | ✗ | ctx->Qb = -3 * (slope * in_max - out_max) / t; | |
472 | ✗ | ctx->Qc = slope; | |
473 | ✗ | break; | |
474 | ✗ | case SWS_INTENT_SATURATION: | |
475 | /* Linear stretch */ | ||
476 | ✗ | ctx->I_scale = (dst_max - dst_min) / (src_max - src_min); | |
477 | ✗ | ctx->I_offset = dst_min - src_min * ctx->I_scale; | |
478 | ✗ | break; | |
479 | ✗ | case SWS_INTENT_RELATIVE_COLORIMETRIC: | |
480 | /* Pure black point adaptation */ | ||
481 | ✗ | ctx->I_scale = src_max / (src_max - src_min) / | |
482 | ✗ | (dst_max / (dst_max - dst_min)); | |
483 | ✗ | ctx->I_offset = dst_min - src_min * ctx->I_scale; | |
484 | ✗ | break; | |
485 | ✗ | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: | |
486 | /* Hard clip */ | ||
487 | ✗ | ctx->I_scale = 1.0f; | |
488 | ✗ | ctx->I_offset = 0.0f; | |
489 | ✗ | break; | |
490 | } | ||
491 | ✗ | } | |
492 | |||
493 | ✗ | static av_always_inline IPT tone_map_apply(const CmsCtx *ctx, IPT ipt) | |
494 | { | ||
495 | ✗ | float I = ipt.I, desat; | |
496 | |||
497 | ✗ | if (ctx->map.intent == SWS_INTENT_PERCEPTUAL) { | |
498 | ✗ | const float Pa = ctx->Pa, Pb = ctx->Pb; | |
499 | ✗ | const float Qa = ctx->Qa, Qb = ctx->Qb, Qc = ctx->Qc; | |
500 | ✗ | I -= ctx->src_knee; | |
501 | ✗ | I = I > 0 ? ((Qa * I + Qb) * I + Qc) * I : (Pa * I + Pb) * I; | |
502 | ✗ | I += ctx->dst_knee; | |
503 | } else { | ||
504 | ✗ | I = ctx->I_scale * I + ctx->I_offset; | |
505 | } | ||
506 | |||
507 | /** | ||
508 | * Avoids raising saturation excessively when raising brightness, and | ||
509 | * also desaturates when reducing brightness greatly to account for the | ||
510 | * reduction in gamut volume. | ||
511 | */ | ||
512 | ✗ | desat = fminf(ipt.I / I, hull(I) / hull(ipt.I)); | |
513 | ✗ | return (IPT) { | |
514 | .I = I, | ||
515 | ✗ | .P = ipt.P * desat, | |
516 | ✗ | .T = ipt.T * desat, | |
517 | }; | ||
518 | } | ||
519 | |||
520 | ✗ | static IPT perceptual(const CmsCtx *ctx, IPT ipt) | |
521 | { | ||
522 | ✗ | ICh ich = ipt2ich(ipt); | |
523 | ✗ | IPT mapped = rgb2ipt(ipt2rgb(ipt, ctx->tmp.lms2content), ctx->dst.content2lms); | |
524 | RGB rgb; | ||
525 | float maxRGB; | ||
526 | |||
527 | /* Protect in gamut region */ | ||
528 | ✗ | const float maxC = fmaxf(ctx->tmp.peak.C, ctx->dst.peak.C); | |
529 | ✗ | float k = smoothstepf(PERCEPTUAL_DEADZONE, 1.0f, ich.C / maxC); | |
530 | ✗ | k *= PERCEPTUAL_STRENGTH; | |
531 | ✗ | ipt.I = fmixf(ipt.I, mapped.I, k); | |
532 | ✗ | ipt.P = fmixf(ipt.P, mapped.P, k); | |
533 | ✗ | ipt.T = fmixf(ipt.T, mapped.T, k); | |
534 | |||
535 | ✗ | rgb = ipt2rgb(ipt, ctx->dst.lms2content); | |
536 | ✗ | maxRGB = fmaxf(rgb.R, fmaxf(rgb.G, rgb.B)); | |
537 | ✗ | rgb.R = fmaxf(softclip(rgb.R, maxRGB, ctx->dst.Lw), ctx->dst.Lb); | |
538 | ✗ | rgb.G = fmaxf(softclip(rgb.G, maxRGB, ctx->dst.Lw), ctx->dst.Lb); | |
539 | ✗ | rgb.B = fmaxf(softclip(rgb.B, maxRGB, ctx->dst.Lw), ctx->dst.Lb); | |
540 | |||
541 | ✗ | return rgb2ipt(rgb, ctx->dst.content2lms); | |
542 | } | ||
543 | |||
544 | ✗ | static IPT relative(const CmsCtx *ctx, IPT ipt) | |
545 | { | ||
546 | ✗ | return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst); | |
547 | } | ||
548 | |||
549 | ✗ | static IPT absolute(const CmsCtx *ctx, IPT ipt) | |
550 | { | ||
551 | ✗ | RGB rgb = ipt2rgb(ipt, ctx->dst.lms2encoding); | |
552 | ✗ | float c[3] = { rgb.R, rgb.G, rgb.B }; | |
553 | ✗ | ff_sws_matrix3x3_apply(&ctx->adaptation, c); | |
554 | ✗ | ipt = rgb2ipt((RGB) { c[0], c[1], c[2] }, ctx->dst.encoding2lms); | |
555 | |||
556 | ✗ | return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst); | |
557 | } | ||
558 | |||
559 | ✗ | static IPT saturation(const CmsCtx * ctx, IPT ipt) | |
560 | { | ||
561 | ✗ | RGB rgb = ipt2rgb(ipt, ctx->tmp.lms2content); | |
562 | ✗ | return rgb2ipt(rgb, ctx->dst.content2lms); | |
563 | } | ||
564 | |||
565 | ✗ | static av_always_inline av_const uint16_t av_round16f(float x) | |
566 | { | ||
567 | ✗ | return av_clip_uint16(x * (UINT16_MAX - 1) + 0.5f); | |
568 | } | ||
569 | |||
570 | /* Call this whenever the hue changes inside the loop body */ | ||
571 | ✗ | static av_always_inline void update_hue_peaks(CmsCtx *ctx, float P, float T) | |
572 | { | ||
573 | ✗ | const float hue = atan2f(T, P); | |
574 | ✗ | switch (ctx->map.intent) { | |
575 | ✗ | case SWS_INTENT_PERCEPTUAL: | |
576 | ✗ | ctx->tmp.peak = saturate(hue, ctx->tmp); | |
577 | /* fall through */ | ||
578 | ✗ | case SWS_INTENT_RELATIVE_COLORIMETRIC: | |
579 | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: | ||
580 | ✗ | ctx->dst.peak = saturate(hue, ctx->dst); | |
581 | ✗ | return; | |
582 | ✗ | default: | |
583 | ✗ | return; | |
584 | } | ||
585 | } | ||
586 | |||
587 | ✗ | static void generate_slice(void *priv, int jobnr, int threadnr, int nb_jobs, | |
588 | int nb_threads) | ||
589 | { | ||
590 | ✗ | CmsCtx ctx = *(const CmsCtx *) priv; | |
591 | |||
592 | ✗ | const int slice_start = jobnr * ctx.slice_size; | |
593 | ✗ | const int slice_stride = ctx.size_input * ctx.size_input; | |
594 | ✗ | const int slice_end = FFMIN((jobnr + 1) * ctx.slice_size, ctx.size_input); | |
595 | ✗ | v3u16_t *input = &ctx.input[slice_start * slice_stride]; | |
596 | |||
597 | ✗ | const int output_slice_h = (ctx.size_output_PT + nb_jobs - 1) / nb_jobs; | |
598 | ✗ | const int output_start = jobnr * output_slice_h; | |
599 | ✗ | const int output_stride = ctx.size_output_PT * ctx.size_output_I; | |
600 | ✗ | const int output_end = FFMIN((jobnr + 1) * output_slice_h, ctx.size_output_PT); | |
601 | ✗ | v3u16_t *output = ctx.output ? &ctx.output[output_start * output_stride] : NULL; | |
602 | |||
603 | ✗ | const float I_scale = 1.0f / (ctx.src.Imax - ctx.src.Imin); | |
604 | ✗ | const float I_offset = -ctx.src.Imin * I_scale; | |
605 | ✗ | const float PT_offset = (float) (1 << 15) / (UINT16_MAX - 1); | |
606 | |||
607 | ✗ | const float input_scale = 1.0f / (ctx.size_input - 1); | |
608 | ✗ | const float output_scale_PT = 1.0f / (ctx.size_output_PT - 1); | |
609 | ✗ | const float output_scale_I = (ctx.tmp.Imax - ctx.tmp.Imin) / | |
610 | ✗ | (ctx.size_output_I - 1); | |
611 | |||
612 | ✗ | for (int Bx = slice_start; Bx < slice_end; Bx++) { | |
613 | ✗ | const float B = input_scale * Bx; | |
614 | ✗ | for (int Gx = 0; Gx < ctx.size_input; Gx++) { | |
615 | ✗ | const float G = input_scale * Gx; | |
616 | ✗ | for (int Rx = 0; Rx < ctx.size_input; Rx++) { | |
617 | ✗ | double c[3] = { input_scale * Rx, G, B }; | |
618 | RGB rgb; | ||
619 | IPT ipt; | ||
620 | |||
621 | ✗ | ctx.src.eotf(ctx.src.Lw, ctx.src.Lb, c); | |
622 | ✗ | rgb = (RGB) { c[0], c[1], c[2] }; | |
623 | ✗ | ipt = rgb2ipt(rgb, ctx.src.encoding2lms); | |
624 | |||
625 | ✗ | if (output) { | |
626 | /* Save intermediate value to 3DLUT */ | ||
627 | ✗ | *input++ = (v3u16_t) { | |
628 | ✗ | av_round16f(I_scale * ipt.I + I_offset), | |
629 | ✗ | av_round16f(ipt.P + PT_offset), | |
630 | ✗ | av_round16f(ipt.T + PT_offset), | |
631 | }; | ||
632 | } else { | ||
633 | ✗ | update_hue_peaks(&ctx, ipt.P, ipt.T); | |
634 | |||
635 | ✗ | ipt = tone_map_apply(&ctx, ipt); | |
636 | ✗ | ipt = ctx.adapt_colors(&ctx, ipt); | |
637 | ✗ | rgb = ipt2rgb(ipt, ctx.dst.lms2encoding); | |
638 | |||
639 | ✗ | c[0] = rgb.R; | |
640 | ✗ | c[1] = rgb.G; | |
641 | ✗ | c[2] = rgb.B; | |
642 | ✗ | ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c); | |
643 | ✗ | *input++ = (v3u16_t) { | |
644 | ✗ | av_round16f(c[0]), | |
645 | ✗ | av_round16f(c[1]), | |
646 | ✗ | av_round16f(c[2]), | |
647 | }; | ||
648 | } | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | |||
653 | ✗ | if (!output) | |
654 | ✗ | return; | |
655 | |||
656 | /* Generate split gamut mapping LUT */ | ||
657 | ✗ | for (int Tx = output_start; Tx < output_end; Tx++) { | |
658 | ✗ | const float T = output_scale_PT * Tx - PT_offset; | |
659 | ✗ | for (int Px = 0; Px < ctx.size_output_PT; Px++) { | |
660 | ✗ | const float P = output_scale_PT * Px - PT_offset; | |
661 | ✗ | update_hue_peaks(&ctx, P, T); | |
662 | |||
663 | ✗ | for (int Ix = 0; Ix < ctx.size_output_I; Ix++) { | |
664 | ✗ | const float I = output_scale_I * Ix + ctx.tmp.Imin; | |
665 | ✗ | IPT ipt = ctx.adapt_colors(&ctx, (IPT) { I, P, T }); | |
666 | ✗ | RGB rgb = ipt2rgb(ipt, ctx.dst.lms2encoding); | |
667 | ✗ | double c[3] = { rgb.R, rgb.G, rgb.B }; | |
668 | ✗ | ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c); | |
669 | ✗ | *output++ = (v3u16_t) { | |
670 | ✗ | av_round16f(c[0]), | |
671 | ✗ | av_round16f(c[1]), | |
672 | ✗ | av_round16f(c[2]), | |
673 | }; | ||
674 | } | ||
675 | } | ||
676 | } | ||
677 | } | ||
678 | |||
679 | ✗ | int ff_sws_color_map_generate_static(v3u16_t *lut, int size, const SwsColorMap *map) | |
680 | { | ||
681 | ✗ | return ff_sws_color_map_generate_dynamic(lut, NULL, size, 1, 1, map); | |
682 | } | ||
683 | |||
684 | ✗ | int ff_sws_color_map_generate_dynamic(v3u16_t *input, v3u16_t *output, | |
685 | int size_input, int size_I, int size_PT, | ||
686 | const SwsColorMap *map) | ||
687 | { | ||
688 | AVSliceThread *slicethread; | ||
689 | int ret, num_slices; | ||
690 | |||
691 | ✗ | CmsCtx ctx = { | |
692 | .map = *map, | ||
693 | .input = input, | ||
694 | .output = output, | ||
695 | .size_input = size_input, | ||
696 | .size_output_I = size_I, | ||
697 | .size_output_PT = size_PT, | ||
698 | ✗ | .src = gamut_from_colorspace(map->src), | |
699 | ✗ | .dst = gamut_from_colorspace(map->dst), | |
700 | }; | ||
701 | |||
702 | ✗ | switch (ctx.map.intent) { | |
703 | ✗ | case SWS_INTENT_PERCEPTUAL: ctx.adapt_colors = perceptual; break; | |
704 | ✗ | case SWS_INTENT_RELATIVE_COLORIMETRIC: ctx.adapt_colors = relative; break; | |
705 | ✗ | case SWS_INTENT_SATURATION: ctx.adapt_colors = saturation; break; | |
706 | ✗ | case SWS_INTENT_ABSOLUTE_COLORIMETRIC: ctx.adapt_colors = absolute; break; | |
707 | ✗ | default: return AVERROR(EINVAL); | |
708 | } | ||
709 | |||
710 | ✗ | if (!output) { | |
711 | /* Tone mapping is handled in a separate step when using dynamic TM */ | ||
712 | ✗ | tone_map_setup(&ctx, false); | |
713 | } | ||
714 | |||
715 | /* Intermediate color space after tone mapping */ | ||
716 | ✗ | ctx.tmp = ctx.src; | |
717 | ✗ | ctx.tmp.Lb = ctx.dst.Lb; | |
718 | ✗ | ctx.tmp.Lw = ctx.dst.Lw; | |
719 | ✗ | ctx.tmp.Imin = ctx.dst.Imin; | |
720 | ✗ | ctx.tmp.Imax = ctx.dst.Imax; | |
721 | |||
722 | ✗ | if (ctx.map.intent == SWS_INTENT_ABSOLUTE_COLORIMETRIC) { | |
723 | /** | ||
724 | * The IPT transform already implies an explicit white point adaptation | ||
725 | * from src to dst, so to get absolute colorimetric semantics we have | ||
726 | * to explicitly undo this adaptation with a * corresponding inverse. | ||
727 | */ | ||
728 | ✗ | ctx.adaptation = ff_sws_get_adaptation(&ctx.map.dst.gamut, | |
729 | ctx.dst.wp, ctx.src.wp); | ||
730 | } | ||
731 | |||
732 | ✗ | ret = avpriv_slicethread_create(&slicethread, &ctx, generate_slice, NULL, 0); | |
733 | ✗ | if (ret < 0) | |
734 | ✗ | return ret; | |
735 | |||
736 | ✗ | ctx.slice_size = (ctx.size_input + ret - 1) / ret; | |
737 | ✗ | num_slices = (ctx.size_input + ctx.slice_size - 1) / ctx.slice_size; | |
738 | ✗ | avpriv_slicethread_execute(slicethread, num_slices, 0); | |
739 | ✗ | avpriv_slicethread_free(&slicethread); | |
740 | ✗ | return 0; | |
741 | } | ||
742 | |||
743 | ✗ | void ff_sws_tone_map_generate(v2u16_t *lut, int size, const SwsColorMap *map) | |
744 | { | ||
745 | ✗ | CmsCtx ctx = { | |
746 | .map = *map, | ||
747 | ✗ | .src = gamut_from_colorspace(map->src), | |
748 | ✗ | .dst = gamut_from_colorspace(map->dst), | |
749 | }; | ||
750 | |||
751 | ✗ | const float src_scale = (ctx.src.Imax - ctx.src.Imin) / (size - 1); | |
752 | ✗ | const float src_offset = ctx.src.Imin; | |
753 | ✗ | const float dst_scale = 1.0f / (ctx.dst.Imax - ctx.dst.Imin); | |
754 | ✗ | const float dst_offset = -ctx.dst.Imin * dst_scale; | |
755 | |||
756 | ✗ | tone_map_setup(&ctx, true); | |
757 | |||
758 | ✗ | for (int i = 0; i < size; i++) { | |
759 | ✗ | const float I = src_scale * i + src_offset; | |
760 | ✗ | IPT ipt = tone_map_apply(&ctx, (IPT) { I, 1.0f }); | |
761 | ✗ | lut[i] = (v2u16_t) { | |
762 | ✗ | av_round16f(dst_scale * ipt.I + dst_offset), | |
763 | ✗ | av_clip_uint16(ipt.P * (1 << 15) + 0.5f), | |
764 | }; | ||
765 | } | ||
766 | ✗ | } | |
767 |