| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (C) 2024 Niklas Haas | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include <assert.h> | ||
| 22 | #include <string.h> | ||
| 23 | |||
| 24 | #include "libavutil/attributes.h" | ||
| 25 | #include "libavutil/avassert.h" | ||
| 26 | #include "libavutil/mem.h" | ||
| 27 | |||
| 28 | #include "cms.h" | ||
| 29 | #include "csputils.h" | ||
| 30 | #include "lut3d.h" | ||
| 31 | |||
| 32 | ✗ | SwsLut3D *ff_sws_lut3d_alloc(void) | |
| 33 | { | ||
| 34 | ✗ | SwsLut3D *lut3d = av_malloc(sizeof(*lut3d)); | |
| 35 | ✗ | if (!lut3d) | |
| 36 | ✗ | return NULL; | |
| 37 | |||
| 38 | ✗ | lut3d->dynamic = false; | |
| 39 | ✗ | return lut3d; | |
| 40 | } | ||
| 41 | |||
| 42 | ✗ | void ff_sws_lut3d_free(SwsLut3D **plut3d) | |
| 43 | { | ||
| 44 | ✗ | av_freep(plut3d); | |
| 45 | ✗ | } | |
| 46 | |||
| 47 | ✗ | bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output) | |
| 48 | { | ||
| 49 | ✗ | return fmt == AV_PIX_FMT_RGBA64; | |
| 50 | } | ||
| 51 | |||
| 52 | ✗ | enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output) | |
| 53 | { | ||
| 54 | ✗ | return AV_PIX_FMT_RGBA64; | |
| 55 | } | ||
| 56 | |||
| 57 | /** | ||
| 58 | * v0 and v1 are 'black' and 'white' | ||
| 59 | * v2 and v3 are closest RGB/CMY vertices | ||
| 60 | * x >= y >= z are relative weights | ||
| 61 | */ | ||
| 62 | static av_always_inline | ||
| 63 | ✗ | v3u16_t barycentric(int shift, int x, int y, int z, | |
| 64 | v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3) | ||
| 65 | { | ||
| 66 | ✗ | const int a = (1 << shift) - x; | |
| 67 | ✗ | const int b = x - y; | |
| 68 | ✗ | const int c = y - z; | |
| 69 | ✗ | const int d = z; | |
| 70 | av_assert2(x >= y); | ||
| 71 | av_assert2(y >= z); | ||
| 72 | |||
| 73 | ✗ | return (v3u16_t) { | |
| 74 | ✗ | (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift, | |
| 75 | ✗ | (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift, | |
| 76 | ✗ | (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift, | |
| 77 | }; | ||
| 78 | } | ||
| 79 | |||
| 80 | static av_always_inline | ||
| 81 | ✗ | v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx, | |
| 82 | int Rf, int Gf, int Bf) | ||
| 83 | { | ||
| 84 | ✗ | const int shift = 16 - INPUT_LUT_BITS; | |
| 85 | ✗ | const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1); | |
| 86 | ✗ | const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1); | |
| 87 | ✗ | const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1); | |
| 88 | |||
| 89 | ✗ | const v3u16_t c000 = lut3d->input[Bx][Gx][Rx]; | |
| 90 | ✗ | const v3u16_t c111 = lut3d->input[Bn][Gn][Rn]; | |
| 91 | ✗ | if (Rf > Gf) { | |
| 92 | ✗ | if (Gf > Bf) { | |
| 93 | ✗ | const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; | |
| 94 | ✗ | const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; | |
| 95 | ✗ | return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111); | |
| 96 | ✗ | } else if (Rf > Bf) { | |
| 97 | ✗ | const v3u16_t c100 = lut3d->input[Bx][Gx][Rn]; | |
| 98 | ✗ | const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; | |
| 99 | ✗ | return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111); | |
| 100 | } else { | ||
| 101 | ✗ | const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; | |
| 102 | ✗ | const v3u16_t c101 = lut3d->input[Bn][Gx][Rn]; | |
| 103 | ✗ | return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111); | |
| 104 | } | ||
| 105 | } else { | ||
| 106 | ✗ | if (Bf > Gf) { | |
| 107 | ✗ | const v3u16_t c001 = lut3d->input[Bn][Gx][Rx]; | |
| 108 | ✗ | const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; | |
| 109 | ✗ | return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111); | |
| 110 | ✗ | } else if (Bf > Rf) { | |
| 111 | ✗ | const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; | |
| 112 | ✗ | const v3u16_t c011 = lut3d->input[Bn][Gn][Rx]; | |
| 113 | ✗ | return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111); | |
| 114 | } else { | ||
| 115 | ✗ | const v3u16_t c010 = lut3d->input[Bx][Gn][Rx]; | |
| 116 | ✗ | const v3u16_t c110 = lut3d->input[Bx][Gn][Rn]; | |
| 117 | ✗ | return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111); | |
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | ✗ | static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb) | |
| 123 | { | ||
| 124 | ✗ | const int shift = 16 - INPUT_LUT_BITS; | |
| 125 | ✗ | const int Rx = rgb.x >> shift; | |
| 126 | ✗ | const int Gx = rgb.y >> shift; | |
| 127 | ✗ | const int Bx = rgb.z >> shift; | |
| 128 | ✗ | const int Rf = rgb.x & ((1 << shift) - 1); | |
| 129 | ✗ | const int Gf = rgb.y & ((1 << shift) - 1); | |
| 130 | ✗ | const int Bf = rgb.z & ((1 << shift) - 1); | |
| 131 | ✗ | return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf); | |
| 132 | } | ||
| 133 | |||
| 134 | /** | ||
| 135 | * Note: These functions are scaled such that x == (1 << shift) corresponds to | ||
| 136 | * a value of 1.0. This makes them suitable for use when interpolation LUT | ||
| 137 | * entries with a fractional part that is just masked away from the index, | ||
| 138 | * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of | ||
| 139 | * just slightly *less* than 1.0. | ||
| 140 | */ | ||
| 141 | ✗ | static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift) | |
| 142 | { | ||
| 143 | ✗ | const int xi = (1 << shift) - x; | |
| 144 | ✗ | return (v2u16_t) { | |
| 145 | ✗ | (a.x * xi + b.x * x) >> shift, | |
| 146 | ✗ | (a.y * xi + b.y * x) >> shift, | |
| 147 | }; | ||
| 148 | } | ||
| 149 | |||
| 150 | ✗ | static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift) | |
| 151 | { | ||
| 152 | ✗ | const int xi = (1 << shift) - x; | |
| 153 | ✗ | return (v3u16_t) { | |
| 154 | ✗ | (a.x * xi + b.x * x) >> shift, | |
| 155 | ✗ | (a.y * xi + b.y * x) >> shift, | |
| 156 | ✗ | (a.z * xi + b.z * x) >> shift, | |
| 157 | }; | ||
| 158 | } | ||
| 159 | |||
| 160 | ✗ | static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt) | |
| 161 | { | ||
| 162 | ✗ | const int Ishift = 16 - OUTPUT_LUT_BITS_I; | |
| 163 | ✗ | const int Cshift = 16 - OUTPUT_LUT_BITS_PT; | |
| 164 | ✗ | const int Ix = ipt.x >> Ishift; | |
| 165 | ✗ | const int Px = ipt.y >> Cshift; | |
| 166 | ✗ | const int Tx = ipt.z >> Cshift; | |
| 167 | ✗ | const int If = ipt.x & ((1 << Ishift) - 1); | |
| 168 | ✗ | const int Pf = ipt.y & ((1 << Cshift) - 1); | |
| 169 | ✗ | const int Tf = ipt.z & ((1 << Cshift) - 1); | |
| 170 | ✗ | const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I - 1); | |
| 171 | ✗ | const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1); | |
| 172 | ✗ | const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1); | |
| 173 | |||
| 174 | /* Trilinear interpolation */ | ||
| 175 | ✗ | const v3u16_t c000 = lut3d->output[Tx][Px][Ix]; | |
| 176 | ✗ | const v3u16_t c001 = lut3d->output[Tx][Px][In]; | |
| 177 | ✗ | const v3u16_t c010 = lut3d->output[Tx][Pn][Ix]; | |
| 178 | ✗ | const v3u16_t c011 = lut3d->output[Tx][Pn][In]; | |
| 179 | ✗ | const v3u16_t c100 = lut3d->output[Tn][Px][Ix]; | |
| 180 | ✗ | const v3u16_t c101 = lut3d->output[Tn][Px][In]; | |
| 181 | ✗ | const v3u16_t c110 = lut3d->output[Tn][Pn][Ix]; | |
| 182 | ✗ | const v3u16_t c111 = lut3d->output[Tn][Pn][In]; | |
| 183 | ✗ | const v3u16_t c00 = lerp3u16(c000, c100, Tf, Cshift); | |
| 184 | ✗ | const v3u16_t c10 = lerp3u16(c010, c110, Tf, Cshift); | |
| 185 | ✗ | const v3u16_t c01 = lerp3u16(c001, c101, Tf, Cshift); | |
| 186 | ✗ | const v3u16_t c11 = lerp3u16(c011, c111, Tf, Cshift); | |
| 187 | ✗ | const v3u16_t c0 = lerp3u16(c00, c10, Pf, Cshift); | |
| 188 | ✗ | const v3u16_t c1 = lerp3u16(c01, c11, Pf, Cshift); | |
| 189 | ✗ | const v3u16_t c = lerp3u16(c0, c1, If, Ishift); | |
| 190 | ✗ | return c; | |
| 191 | } | ||
| 192 | |||
| 193 | ✗ | static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt) | |
| 194 | { | ||
| 195 | ✗ | const int shift = 16 - TONE_LUT_BITS; | |
| 196 | ✗ | const int Ix = ipt.x >> shift; | |
| 197 | ✗ | const int If = ipt.x & ((1 << shift) - 1); | |
| 198 | ✗ | const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1); | |
| 199 | |||
| 200 | ✗ | const v2u16_t w0 = lut3d->tone_map[Ix]; | |
| 201 | ✗ | const v2u16_t w1 = lut3d->tone_map[In]; | |
| 202 | ✗ | const v2u16_t w = lerp2u16(w0, w1, If, shift); | |
| 203 | ✗ | const int base = (1 << 15) - w.y; | |
| 204 | |||
| 205 | ✗ | ipt.x = w.x; | |
| 206 | ✗ | ipt.y = base + (ipt.y * w.y >> 15); | |
| 207 | ✗ | ipt.z = base + (ipt.z * w.y >> 15); | |
| 208 | ✗ | return ipt; | |
| 209 | } | ||
| 210 | |||
| 211 | ✗ | int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in, | |
| 212 | enum AVPixelFormat fmt_out, const SwsColorMap *map) | ||
| 213 | { | ||
| 214 | int ret; | ||
| 215 | |||
| 216 | ✗ | if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1)) | |
| 217 | ✗ | return AVERROR(EINVAL); | |
| 218 | |||
| 219 | ✗ | lut3d->dynamic = map->src.frame_peak.num > 0; | |
| 220 | ✗ | lut3d->map = *map; | |
| 221 | |||
| 222 | ✗ | if (lut3d->dynamic) { | |
| 223 | ✗ | ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0], | |
| 224 | &lut3d->output[0][0][0], | ||
| 225 | INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I, | ||
| 226 | OUTPUT_LUT_SIZE_PT, map); | ||
| 227 | ✗ | if (ret < 0) | |
| 228 | ✗ | return ret; | |
| 229 | |||
| 230 | /* Make sure initial state is valid */ | ||
| 231 | ✗ | ff_sws_lut3d_update(lut3d, &map->src); | |
| 232 | ✗ | return 0; | |
| 233 | } else { | ||
| 234 | ✗ | return ff_sws_color_map_generate_static(&lut3d->input[0][0][0], | |
| 235 | INPUT_LUT_SIZE, map); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | ✗ | void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src) | |
| 240 | { | ||
| 241 | ✗ | if (!new_src || !lut3d->dynamic) | |
| 242 | ✗ | return; | |
| 243 | |||
| 244 | ✗ | lut3d->map.src.frame_peak = new_src->frame_peak; | |
| 245 | ✗ | lut3d->map.src.frame_avg = new_src->frame_avg; | |
| 246 | |||
| 247 | ✗ | ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map); | |
| 248 | } | ||
| 249 | |||
| 250 | ✗ | void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride, | |
| 251 | uint8_t *out, int out_stride, int w, int h) | ||
| 252 | { | ||
| 253 | ✗ | while (h--) { | |
| 254 | ✗ | const uint16_t *in16 = (const uint16_t *) in; | |
| 255 | ✗ | uint16_t *out16 = (uint16_t *) out; | |
| 256 | |||
| 257 | ✗ | for (int x = 0; x < w; x++) { | |
| 258 | ✗ | v3u16_t c = { in16[0], in16[1], in16[2] }; | |
| 259 | ✗ | c = lookup_input16(lut3d, c); | |
| 260 | |||
| 261 | ✗ | if (lut3d->dynamic) { | |
| 262 | ✗ | c = apply_tone_map(lut3d, c); | |
| 263 | ✗ | c = lookup_output(lut3d, c); | |
| 264 | } | ||
| 265 | |||
| 266 | ✗ | out16[0] = c.x; | |
| 267 | ✗ | out16[1] = c.y; | |
| 268 | ✗ | out16[2] = c.z; | |
| 269 | ✗ | out16[3] = in16[3]; | |
| 270 | ✗ | in16 += 4; | |
| 271 | ✗ | out16 += 4; | |
| 272 | } | ||
| 273 | |||
| 274 | ✗ | in += in_stride; | |
| 275 | ✗ | out += out_stride; | |
| 276 | } | ||
| 277 | ✗ | } | |
| 278 |