| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * AOM film grain synthesis | ||
| 3 | * Copyright (c) 2023 Niklas Haas <ffmpeg@haasn.xyz> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | /* | ||
| 23 | * Copyright © 2018, Niklas Haas | ||
| 24 | * Copyright © 2018, VideoLAN and dav1d authors | ||
| 25 | * Copyright © 2018, Two Orioles, LLC | ||
| 26 | * All rights reserved. | ||
| 27 | * | ||
| 28 | * Redistribution and use in source and binary forms, with or without | ||
| 29 | * modification, are permitted provided that the following conditions are met: | ||
| 30 | * | ||
| 31 | * 1. Redistributions of source code must retain the above copyright notice, this | ||
| 32 | * list of conditions and the following disclaimer. | ||
| 33 | * | ||
| 34 | * 2. Redistributions in binary form must reproduce the above copyright notice, | ||
| 35 | * this list of conditions and the following disclaimer in the documentation | ||
| 36 | * and/or other materials provided with the distribution. | ||
| 37 | * | ||
| 38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
| 39 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| 40 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| 41 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
| 42 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| 43 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| 44 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
| 45 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| 47 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 48 | */ | ||
| 49 | |||
| 50 | #include "bit_depth_template.c" | ||
| 51 | |||
| 52 | #undef entry | ||
| 53 | #undef bitdepth | ||
| 54 | #undef bitdepth_max | ||
| 55 | #undef HBD_DECL | ||
| 56 | #undef HBD_CALL | ||
| 57 | #undef SCALING_SIZE | ||
| 58 | |||
| 59 | #if BIT_DEPTH > 8 | ||
| 60 | # define entry int16_t | ||
| 61 | # define bitdepth_max ((1 << bitdepth) - 1) | ||
| 62 | # define HBD_DECL , const int bitdepth | ||
| 63 | # define HBD_CALL , bitdepth | ||
| 64 | # define SCALING_SIZE 4096 | ||
| 65 | #else | ||
| 66 | # define entry int8_t | ||
| 67 | # define bitdepth 8 | ||
| 68 | # define bitdepth_max UINT8_MAX | ||
| 69 | # define HBD_DECL | ||
| 70 | # define HBD_CALL | ||
| 71 | # define SCALING_SIZE 256 | ||
| 72 | #endif | ||
| 73 | |||
| 74 | ✗ | static void FUNC(generate_grain_y_c)(entry buf[][GRAIN_WIDTH], | |
| 75 | const AVFilmGrainParams *const params | ||
| 76 | HBD_DECL) | ||
| 77 | { | ||
| 78 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 79 | ✗ | const int bitdepth_min_8 = bitdepth - 8; | |
| 80 | ✗ | unsigned seed = params->seed; | |
| 81 | ✗ | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; | |
| 82 | ✗ | const int grain_ctr = 128 << bitdepth_min_8; | |
| 83 | ✗ | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; | |
| 84 | |||
| 85 | ✗ | const int ar_pad = 3; | |
| 86 | ✗ | const int ar_lag = data->ar_coeff_lag; | |
| 87 | |||
| 88 | ✗ | for (int y = 0; y < GRAIN_HEIGHT; y++) { | |
| 89 | ✗ | for (int x = 0; x < GRAIN_WIDTH; x++) { | |
| 90 | ✗ | const int value = get_random_number(11, &seed); | |
| 91 | ✗ | buf[y][x] = round2(gaussian_sequence[ value ], shift); | |
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | ✗ | for (int y = ar_pad; y < GRAIN_HEIGHT; y++) { | |
| 96 | ✗ | for (int x = ar_pad; x < GRAIN_WIDTH - ar_pad; x++) { | |
| 97 | ✗ | const int8_t *coeff = data->ar_coeffs_y; | |
| 98 | ✗ | int sum = 0, grain; | |
| 99 | ✗ | for (int dy = -ar_lag; dy <= 0; dy++) { | |
| 100 | ✗ | for (int dx = -ar_lag; dx <= ar_lag; dx++) { | |
| 101 | ✗ | if (!dx && !dy) | |
| 102 | ✗ | break; | |
| 103 | ✗ | sum += *(coeff++) * buf[y + dy][x + dx]; | |
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | ✗ | grain = buf[y][x] + round2(sum, data->ar_coeff_shift); | |
| 108 | ✗ | buf[y][x] = av_clip(grain, grain_min, grain_max); | |
| 109 | } | ||
| 110 | } | ||
| 111 | ✗ | } | |
| 112 | |||
| 113 | static void | ||
| 114 | ✗ | FUNC(generate_grain_uv_c)(entry buf[][GRAIN_WIDTH], | |
| 115 | const entry buf_y[][GRAIN_WIDTH], | ||
| 116 | const AVFilmGrainParams *const params, const intptr_t uv, | ||
| 117 | const int subx, const int suby HBD_DECL) | ||
| 118 | { | ||
| 119 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 120 | ✗ | const int bitdepth_min_8 = bitdepth - 8; | |
| 121 | ✗ | unsigned seed = params->seed ^ (uv ? 0x49d8 : 0xb524); | |
| 122 | ✗ | const int shift = 4 - bitdepth_min_8 + data->grain_scale_shift; | |
| 123 | ✗ | const int grain_ctr = 128 << bitdepth_min_8; | |
| 124 | ✗ | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; | |
| 125 | |||
| 126 | ✗ | const int chromaW = subx ? SUB_GRAIN_WIDTH : GRAIN_WIDTH; | |
| 127 | ✗ | const int chromaH = suby ? SUB_GRAIN_HEIGHT : GRAIN_HEIGHT; | |
| 128 | |||
| 129 | ✗ | const int ar_pad = 3; | |
| 130 | ✗ | const int ar_lag = data->ar_coeff_lag; | |
| 131 | |||
| 132 | ✗ | for (int y = 0; y < chromaH; y++) { | |
| 133 | ✗ | for (int x = 0; x < chromaW; x++) { | |
| 134 | ✗ | const int value = get_random_number(11, &seed); | |
| 135 | ✗ | buf[y][x] = round2(gaussian_sequence[ value ], shift); | |
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | ✗ | for (int y = ar_pad; y < chromaH; y++) { | |
| 140 | ✗ | for (int x = ar_pad; x < chromaW - ar_pad; x++) { | |
| 141 | ✗ | const int8_t *coeff = data->ar_coeffs_uv[uv]; | |
| 142 | ✗ | int sum = 0, grain; | |
| 143 | ✗ | for (int dy = -ar_lag; dy <= 0; dy++) { | |
| 144 | ✗ | for (int dx = -ar_lag; dx <= ar_lag; dx++) { | |
| 145 | // For the final (current) pixel, we need to add in the | ||
| 146 | // contribution from the luma grain texture | ||
| 147 | ✗ | if (!dx && !dy) { | |
| 148 | ✗ | const int lumaX = ((x - ar_pad) << subx) + ar_pad; | |
| 149 | ✗ | const int lumaY = ((y - ar_pad) << suby) + ar_pad; | |
| 150 | ✗ | int luma = 0; | |
| 151 | ✗ | if (!data->num_y_points) | |
| 152 | ✗ | break; | |
| 153 | ✗ | for (int i = 0; i <= suby; i++) { | |
| 154 | ✗ | for (int j = 0; j <= subx; j++) { | |
| 155 | ✗ | luma += buf_y[lumaY + i][lumaX + j]; | |
| 156 | } | ||
| 157 | } | ||
| 158 | ✗ | luma = round2(luma, subx + suby); | |
| 159 | ✗ | sum += luma * (*coeff); | |
| 160 | ✗ | break; | |
| 161 | } | ||
| 162 | |||
| 163 | ✗ | sum += *(coeff++) * buf[y + dy][x + dx]; | |
| 164 | } | ||
| 165 | } | ||
| 166 | |||
| 167 | ✗ | grain = buf[y][x] + round2(sum, data->ar_coeff_shift); | |
| 168 | ✗ | buf[y][x] = av_clip(grain, grain_min, grain_max); | |
| 169 | } | ||
| 170 | } | ||
| 171 | ✗ | } | |
| 172 | |||
| 173 | // samples from the correct block of a grain LUT, while taking into account the | ||
| 174 | // offsets provided by the offsets cache | ||
| 175 | ✗ | static inline entry FUNC(sample_lut)(const entry grain_lut[][GRAIN_WIDTH], | |
| 176 | const int offsets[2][2], | ||
| 177 | const int subx, const int suby, | ||
| 178 | const int bx, const int by, | ||
| 179 | const int x, const int y) | ||
| 180 | { | ||
| 181 | ✗ | const int randval = offsets[bx][by]; | |
| 182 | ✗ | const int offx = 3 + (2 >> subx) * (3 + (randval >> 4)); | |
| 183 | ✗ | const int offy = 3 + (2 >> suby) * (3 + (randval & 0xF)); | |
| 184 | ✗ | return grain_lut[offy + y + (FG_BLOCK_SIZE >> suby) * by] | |
| 185 | ✗ | [offx + x + (FG_BLOCK_SIZE >> subx) * bx]; | |
| 186 | } | ||
| 187 | |||
| 188 | ✗ | static void FUNC(fgy_32x32xn_c)(pixel *const dst_row, const pixel *const src_row, | |
| 189 | const ptrdiff_t stride, | ||
| 190 | const AVFilmGrainParams *const params, const size_t pw, | ||
| 191 | const uint8_t scaling[SCALING_SIZE], | ||
| 192 | const entry grain_lut[][GRAIN_WIDTH], | ||
| 193 | const int bh, const int row_num HBD_DECL) | ||
| 194 | { | ||
| 195 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 196 | ✗ | const int rows = 1 + (data->overlap_flag && row_num > 0); | |
| 197 | ✗ | const int bitdepth_min_8 = bitdepth - 8; | |
| 198 | ✗ | const int grain_ctr = 128 << bitdepth_min_8; | |
| 199 | ✗ | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; | |
| 200 | unsigned seed[2]; | ||
| 201 | int offsets[2 /* col offset */][2 /* row offset */]; | ||
| 202 | |||
| 203 | int min_value, max_value; | ||
| 204 | ✗ | if (data->limit_output_range) { | |
| 205 | ✗ | min_value = 16 << bitdepth_min_8; | |
| 206 | ✗ | max_value = 235 << bitdepth_min_8; | |
| 207 | } else { | ||
| 208 | ✗ | min_value = 0; | |
| 209 | ✗ | max_value = bitdepth_max; | |
| 210 | } | ||
| 211 | |||
| 212 | // seed[0] contains the current row, seed[1] contains the previous | ||
| 213 | ✗ | for (int i = 0; i < rows; i++) { | |
| 214 | ✗ | seed[i] = params->seed; | |
| 215 | ✗ | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; | |
| 216 | ✗ | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); | |
| 217 | } | ||
| 218 | |||
| 219 | av_assert1(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); | ||
| 220 | |||
| 221 | // process this row in FG_BLOCK_SIZE^2 blocks | ||
| 222 | ✗ | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE) { | |
| 223 | ✗ | const int bw = FFMIN(FG_BLOCK_SIZE, (int) pw - bx); | |
| 224 | const pixel *src; | ||
| 225 | pixel *dst; | ||
| 226 | int noise; | ||
| 227 | |||
| 228 | // x/y block offsets to compensate for overlapped regions | ||
| 229 | ✗ | const int ystart = data->overlap_flag && row_num ? FFMIN(2, bh) : 0; | |
| 230 | ✗ | const int xstart = data->overlap_flag && bx ? FFMIN(2, bw) : 0; | |
| 231 | |||
| 232 | static const int w[2][2] = { { 27, 17 }, { 17, 27 } }; | ||
| 233 | |||
| 234 | ✗ | if (data->overlap_flag && bx) { | |
| 235 | // shift previous offsets left | ||
| 236 | ✗ | for (int i = 0; i < rows; i++) | |
| 237 | ✗ | offsets[1][i] = offsets[0][i]; | |
| 238 | } | ||
| 239 | |||
| 240 | // update current offsets | ||
| 241 | ✗ | for (int i = 0; i < rows; i++) | |
| 242 | ✗ | offsets[0][i] = get_random_number(8, &seed[i]); | |
| 243 | |||
| 244 | #define add_noise_y(x, y, grain) \ | ||
| 245 | src = (const pixel*)((const char*)src_row + (y) * stride) + (x) + bx; \ | ||
| 246 | dst = (pixel*)((char*)dst_row + (y) * stride) + (x) + bx; \ | ||
| 247 | noise = round2(scaling[ *src ] * (grain), data->scaling_shift); \ | ||
| 248 | *dst = av_clip(*src + noise, min_value, max_value); | ||
| 249 | |||
| 250 | ✗ | for (int y = ystart; y < bh; y++) { | |
| 251 | // Non-overlapped image region (straightforward) | ||
| 252 | ✗ | for (int x = xstart; x < bw; x++) { | |
| 253 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 0, x, y); | |
| 254 | ✗ | add_noise_y(x, y, grain); | |
| 255 | } | ||
| 256 | |||
| 257 | // Special case for overlapped column | ||
| 258 | ✗ | for (int x = 0; x < xstart; x++) { | |
| 259 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 0, x, y); | |
| 260 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 1, 0, x, y); | |
| 261 | ✗ | grain = round2(old * w[x][0] + grain * w[x][1], 5); | |
| 262 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 263 | ✗ | add_noise_y(x, y, grain); | |
| 264 | } | ||
| 265 | } | ||
| 266 | |||
| 267 | ✗ | for (int y = 0; y < ystart; y++) { | |
| 268 | // Special case for overlapped row (sans corner) | ||
| 269 | ✗ | for (int x = xstart; x < bw; x++) { | |
| 270 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 0, x, y); | |
| 271 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 1, x, y); | |
| 272 | ✗ | grain = round2(old * w[y][0] + grain * w[y][1], 5); | |
| 273 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 274 | ✗ | add_noise_y(x, y, grain); | |
| 275 | } | ||
| 276 | |||
| 277 | // Special case for doubly-overlapped corner | ||
| 278 | ✗ | for (int x = 0; x < xstart; x++) { | |
| 279 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 0, x, y); | |
| 280 | ✗ | int top = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 0, 1, x, y); | |
| 281 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 1, 1, x, y); | |
| 282 | |||
| 283 | // Blend the top pixel with the top left block | ||
| 284 | ✗ | top = round2(old * w[x][0] + top * w[x][1], 5); | |
| 285 | ✗ | top = av_clip(top, grain_min, grain_max); | |
| 286 | |||
| 287 | // Blend the current pixel with the left block | ||
| 288 | ✗ | old = FUNC(sample_lut)(grain_lut, offsets, 0, 0, 1, 0, x, y); | |
| 289 | ✗ | grain = round2(old * w[x][0] + grain * w[x][1], 5); | |
| 290 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 291 | |||
| 292 | // Mix the row rows together and apply grain | ||
| 293 | ✗ | grain = round2(top * w[y][0] + grain * w[y][1], 5); | |
| 294 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 295 | ✗ | add_noise_y(x, y, grain); | |
| 296 | } | ||
| 297 | } | ||
| 298 | } | ||
| 299 | ✗ | } | |
| 300 | |||
| 301 | static void | ||
| 302 | ✗ | FUNC(fguv_32x32xn_c)(pixel *const dst_row, const pixel *const src_row, | |
| 303 | const ptrdiff_t stride, const AVFilmGrainParams *const params, | ||
| 304 | const size_t pw, const uint8_t scaling[SCALING_SIZE], | ||
| 305 | const entry grain_lut[][GRAIN_WIDTH], const int bh, | ||
| 306 | const int row_num, const pixel *const luma_row, | ||
| 307 | const ptrdiff_t luma_stride, const int uv, const int is_id, | ||
| 308 | const int sx, const int sy HBD_DECL) | ||
| 309 | { | ||
| 310 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 311 | ✗ | const int rows = 1 + (data->overlap_flag && row_num > 0); | |
| 312 | ✗ | const int bitdepth_min_8 = bitdepth - 8; | |
| 313 | ✗ | const int grain_ctr = 128 << bitdepth_min_8; | |
| 314 | ✗ | const int grain_min = -grain_ctr, grain_max = grain_ctr - 1; | |
| 315 | unsigned seed[2]; | ||
| 316 | int offsets[2 /* col offset */][2 /* row offset */]; | ||
| 317 | |||
| 318 | int min_value, max_value; | ||
| 319 | ✗ | if (data->limit_output_range) { | |
| 320 | ✗ | min_value = 16 << bitdepth_min_8; | |
| 321 | ✗ | max_value = (is_id ? 235 : 240) << bitdepth_min_8; | |
| 322 | } else { | ||
| 323 | ✗ | min_value = 0; | |
| 324 | ✗ | max_value = bitdepth_max; | |
| 325 | } | ||
| 326 | |||
| 327 | // seed[0] contains the current row, seed[1] contains the previous | ||
| 328 | ✗ | for (int i = 0; i < rows; i++) { | |
| 329 | ✗ | seed[i] = params->seed; | |
| 330 | ✗ | seed[i] ^= (((row_num - i) * 37 + 178) & 0xFF) << 8; | |
| 331 | ✗ | seed[i] ^= (((row_num - i) * 173 + 105) & 0xFF); | |
| 332 | } | ||
| 333 | |||
| 334 | av_assert1(stride % (FG_BLOCK_SIZE * sizeof(pixel)) == 0); | ||
| 335 | |||
| 336 | // process this row in FG_BLOCK_SIZE^2 blocks (subsampled) | ||
| 337 | ✗ | for (unsigned bx = 0; bx < pw; bx += FG_BLOCK_SIZE >> sx) { | |
| 338 | ✗ | const int bw = FFMIN(FG_BLOCK_SIZE >> sx, (int)(pw - bx)); | |
| 339 | int val, lx, ly, noise; | ||
| 340 | const pixel *src, *luma; | ||
| 341 | pixel *dst, avg; | ||
| 342 | |||
| 343 | // x/y block offsets to compensate for overlapped regions | ||
| 344 | ✗ | const int ystart = data->overlap_flag && row_num ? FFMIN(2 >> sy, bh) : 0; | |
| 345 | ✗ | const int xstart = data->overlap_flag && bx ? FFMIN(2 >> sx, bw) : 0; | |
| 346 | |||
| 347 | static const int w[2 /* sub */][2 /* off */][2] = { | ||
| 348 | { { 27, 17 }, { 17, 27 } }, | ||
| 349 | { { 23, 22 } }, | ||
| 350 | }; | ||
| 351 | |||
| 352 | ✗ | if (data->overlap_flag && bx) { | |
| 353 | // shift previous offsets left | ||
| 354 | ✗ | for (int i = 0; i < rows; i++) | |
| 355 | ✗ | offsets[1][i] = offsets[0][i]; | |
| 356 | } | ||
| 357 | |||
| 358 | // update current offsets | ||
| 359 | ✗ | for (int i = 0; i < rows; i++) | |
| 360 | ✗ | offsets[0][i] = get_random_number(8, &seed[i]); | |
| 361 | |||
| 362 | #define add_noise_uv(x, y, grain) \ | ||
| 363 | lx = (bx + x) << sx; \ | ||
| 364 | ly = y << sy; \ | ||
| 365 | luma = (const pixel*)((const char*)luma_row + ly * luma_stride) + lx;\ | ||
| 366 | avg = luma[0]; \ | ||
| 367 | if (sx) \ | ||
| 368 | avg = (avg + luma[1] + 1) >> 1; \ | ||
| 369 | src = (const pixel*)((const char *)src_row + (y) * stride) + bx + (x);\ | ||
| 370 | dst = (pixel *) ((char *) dst_row + (y) * stride) + bx + (x); \ | ||
| 371 | val = avg; \ | ||
| 372 | if (!data->chroma_scaling_from_luma) { \ | ||
| 373 | const int combined = avg * data->uv_mult_luma[uv] + \ | ||
| 374 | *src * data->uv_mult[uv]; \ | ||
| 375 | val = av_clip( (combined >> 6) + \ | ||
| 376 | (data->uv_offset[uv] * (1 << bitdepth_min_8)), \ | ||
| 377 | 0, bitdepth_max ); \ | ||
| 378 | } \ | ||
| 379 | noise = round2(scaling[ val ] * (grain), data->scaling_shift); \ | ||
| 380 | *dst = av_clip(*src + noise, min_value, max_value); | ||
| 381 | |||
| 382 | ✗ | for (int y = ystart; y < bh; y++) { | |
| 383 | // Non-overlapped image region (straightforward) | ||
| 384 | ✗ | for (int x = xstart; x < bw; x++) { | |
| 385 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 0, x, y); | |
| 386 | ✗ | add_noise_uv(x, y, grain); | |
| 387 | } | ||
| 388 | |||
| 389 | // Special case for overlapped column | ||
| 390 | ✗ | for (int x = 0; x < xstart; x++) { | |
| 391 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 0, x, y); | |
| 392 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 1, 0, x, y); | |
| 393 | ✗ | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); | |
| 394 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 395 | ✗ | add_noise_uv(x, y, grain); | |
| 396 | } | ||
| 397 | } | ||
| 398 | |||
| 399 | ✗ | for (int y = 0; y < ystart; y++) { | |
| 400 | // Special case for overlapped row (sans corner) | ||
| 401 | ✗ | for (int x = xstart; x < bw; x++) { | |
| 402 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 0, x, y); | |
| 403 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 1, x, y); | |
| 404 | ✗ | grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5); | |
| 405 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 406 | ✗ | add_noise_uv(x, y, grain); | |
| 407 | } | ||
| 408 | |||
| 409 | // Special case for doubly-overlapped corner | ||
| 410 | ✗ | for (int x = 0; x < xstart; x++) { | |
| 411 | ✗ | int top = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 1, x, y); | |
| 412 | ✗ | int old = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 1, 1, x, y); | |
| 413 | ✗ | int grain = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 0, 0, x, y); | |
| 414 | |||
| 415 | // Blend the top pixel with the top left block | ||
| 416 | ✗ | top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5); | |
| 417 | ✗ | top = av_clip(top, grain_min, grain_max); | |
| 418 | |||
| 419 | // Blend the current pixel with the left block | ||
| 420 | ✗ | old = FUNC(sample_lut)(grain_lut, offsets, sx, sy, 1, 0, x, y); | |
| 421 | ✗ | grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); | |
| 422 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 423 | |||
| 424 | // Mix the row rows together and apply to image | ||
| 425 | ✗ | grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5); | |
| 426 | ✗ | grain = av_clip(grain, grain_min, grain_max); | |
| 427 | ✗ | add_noise_uv(x, y, grain); | |
| 428 | } | ||
| 429 | } | ||
| 430 | } | ||
| 431 | ✗ | } | |
| 432 | |||
| 433 | ✗ | static void FUNC(generate_scaling)(const uint8_t points[][2], const int num, | |
| 434 | uint8_t scaling[SCALING_SIZE] HBD_DECL) | ||
| 435 | { | ||
| 436 | ✗ | const int shift_x = bitdepth - 8; | |
| 437 | ✗ | const int scaling_size = 1 << bitdepth; | |
| 438 | ✗ | const int max_value = points[num - 1][0] << shift_x; | |
| 439 | ✗ | av_assert0(scaling_size <= SCALING_SIZE); | |
| 440 | |||
| 441 | ✗ | if (num == 0) { | |
| 442 | ✗ | memset(scaling, 0, scaling_size); | |
| 443 | ✗ | return; | |
| 444 | } | ||
| 445 | |||
| 446 | // Fill up the preceding entries with the initial value | ||
| 447 | ✗ | memset(scaling, points[0][1], points[0][0] << shift_x); | |
| 448 | |||
| 449 | // Linearly interpolate the values in the middle | ||
| 450 | ✗ | for (int i = 0; i < num - 1; i++) { | |
| 451 | ✗ | const int bx = points[i][0]; | |
| 452 | ✗ | const int by = points[i][1]; | |
| 453 | ✗ | const int ex = points[i+1][0]; | |
| 454 | ✗ | const int ey = points[i+1][1]; | |
| 455 | ✗ | const int dx = ex - bx; | |
| 456 | ✗ | const int dy = ey - by; | |
| 457 | ✗ | const int delta = dy * ((0x10000 + (dx >> 1)) / dx); | |
| 458 | av_assert1(dx > 0); | ||
| 459 | ✗ | for (int x = 0, d = 0x8000; x < dx; x++) { | |
| 460 | ✗ | scaling[(bx + x) << shift_x] = by + (d >> 16); | |
| 461 | ✗ | d += delta; | |
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | // Fill up the remaining entries with the final value | ||
| 466 | ✗ | memset(&scaling[max_value], points[num - 1][1], scaling_size - max_value); | |
| 467 | |||
| 468 | #if BIT_DEPTH != 8 | ||
| 469 | ✗ | for (int i = 0; i < num - 1; i++) { | |
| 470 | ✗ | const int pad = 1 << shift_x, rnd = pad >> 1; | |
| 471 | ✗ | const int bx = points[i][0] << shift_x; | |
| 472 | ✗ | const int ex = points[i+1][0] << shift_x; | |
| 473 | ✗ | const int dx = ex - bx; | |
| 474 | ✗ | for (int x = 0; x < dx; x += pad) { | |
| 475 | ✗ | const int range = scaling[bx + x + pad] - scaling[bx + x]; | |
| 476 | ✗ | for (int n = 1, r = rnd; n < pad; n++) { | |
| 477 | ✗ | r += range; | |
| 478 | ✗ | scaling[bx + x + n] = scaling[bx + x] + (r >> shift_x); | |
| 479 | } | ||
| 480 | } | ||
| 481 | } | ||
| 482 | #endif | ||
| 483 | } | ||
| 484 | |||
| 485 | static av_always_inline void | ||
| 486 | ✗ | FUNC(apply_grain_row)(AVFrame *out, const AVFrame *in, | |
| 487 | const int ss_x, const int ss_y, | ||
| 488 | const uint8_t scaling[3][SCALING_SIZE], | ||
| 489 | const entry grain_lut[3][GRAIN_HEIGHT+1][GRAIN_WIDTH], | ||
| 490 | const AVFilmGrainParams *params, | ||
| 491 | const int row HBD_DECL) | ||
| 492 | { | ||
| 493 | // Synthesize grain for the affected planes | ||
| 494 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 495 | ✗ | const int cpw = (out->width + ss_x) >> ss_x; | |
| 496 | ✗ | const int is_id = out->colorspace == AVCOL_SPC_RGB; | |
| 497 | ✗ | const int bh = (FFMIN(out->height - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE) + ss_y) >> ss_y; | |
| 498 | ✗ | const ptrdiff_t uv_off = row * FG_BLOCK_SIZE * out->linesize[1] >> ss_y; | |
| 499 | ✗ | pixel *const luma_src = (pixel *) | |
| 500 | ✗ | ((char *) in->data[0] + row * FG_BLOCK_SIZE * in->linesize[0]); | |
| 501 | |||
| 502 | ✗ | if (data->num_y_points) { | |
| 503 | ✗ | const int bh = FFMIN(out->height - row * FG_BLOCK_SIZE, FG_BLOCK_SIZE); | |
| 504 | ✗ | const ptrdiff_t off = row * FG_BLOCK_SIZE * out->linesize[0]; | |
| 505 | ✗ | FUNC(fgy_32x32xn_c)((pixel *) ((char *) out->data[0] + off), luma_src, | |
| 506 | ✗ | out->linesize[0], params, out->width, scaling[0], | |
| 507 | grain_lut[0], bh, row HBD_CALL); | ||
| 508 | } | ||
| 509 | |||
| 510 | ✗ | if (!data->num_uv_points[0] && !data->num_uv_points[1] && | |
| 511 | ✗ | !data->chroma_scaling_from_luma) | |
| 512 | { | ||
| 513 | ✗ | return; | |
| 514 | } | ||
| 515 | |||
| 516 | // extend padding pixels | ||
| 517 | ✗ | if (out->width & ss_x) { | |
| 518 | ✗ | pixel *ptr = luma_src; | |
| 519 | ✗ | for (int y = 0; y < bh; y++) { | |
| 520 | ✗ | ptr[out->width] = ptr[out->width - 1]; | |
| 521 | ✗ | ptr = (pixel *) ((char *) ptr + (in->linesize[0] << ss_y)); | |
| 522 | } | ||
| 523 | } | ||
| 524 | |||
| 525 | ✗ | if (data->chroma_scaling_from_luma) { | |
| 526 | ✗ | for (int pl = 0; pl < 2; pl++) | |
| 527 | ✗ | FUNC(fguv_32x32xn_c)((pixel *) ((char *) out->data[1 + pl] + uv_off), | |
| 528 | ✗ | (const pixel *) ((const char *) in->data[1 + pl] + uv_off), | |
| 529 | ✗ | in->linesize[1], params, cpw, scaling[0], | |
| 530 | ✗ | grain_lut[1 + pl], bh, row, luma_src, | |
| 531 | ✗ | in->linesize[0], pl, is_id, ss_x, ss_y HBD_CALL); | |
| 532 | } else { | ||
| 533 | ✗ | for (int pl = 0; pl < 2; pl++) { | |
| 534 | ✗ | if (data->num_uv_points[pl]) { | |
| 535 | ✗ | FUNC(fguv_32x32xn_c)((pixel *) ((char *) out->data[1 + pl] + uv_off), | |
| 536 | ✗ | (const pixel *) ((const char *) in->data[1 + pl] + uv_off), | |
| 537 | ✗ | in->linesize[1], params, cpw, scaling[1 + pl], | |
| 538 | ✗ | grain_lut[1 + pl], bh, row, luma_src, | |
| 539 | ✗ | in->linesize[0], pl, is_id, ss_x, ss_y HBD_CALL); | |
| 540 | } | ||
| 541 | } | ||
| 542 | } | ||
| 543 | } | ||
| 544 | |||
| 545 | ✗ | static int FUNC(apply_film_grain)(AVFrame *out_frame, const AVFrame *in_frame, | |
| 546 | const AVFilmGrainParams *params HBD_DECL) | ||
| 547 | { | ||
| 548 | entry grain_lut[3][GRAIN_HEIGHT + 1][GRAIN_WIDTH]; | ||
| 549 | uint8_t scaling[3][SCALING_SIZE]; | ||
| 550 | |||
| 551 | ✗ | const AVFilmGrainAOMParams *const data = ¶ms->codec.aom; | |
| 552 | ✗ | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(out_frame->format); | |
| 553 | ✗ | const int rows = AV_CEIL_RSHIFT(out_frame->height, 5); /* log2(FG_BLOCK_SIZE) */ | |
| 554 | ✗ | const int subx = desc->log2_chroma_w, suby = desc->log2_chroma_h; | |
| 555 | |||
| 556 | // Generate grain LUTs as needed | ||
| 557 | ✗ | FUNC(generate_grain_y_c)(grain_lut[0], params HBD_CALL); | |
| 558 | ✗ | if (data->num_uv_points[0] || data->chroma_scaling_from_luma) | |
| 559 | ✗ | FUNC(generate_grain_uv_c)(grain_lut[1], grain_lut[0], params, 0, subx, suby HBD_CALL); | |
| 560 | ✗ | if (data->num_uv_points[1] || data->chroma_scaling_from_luma) | |
| 561 | ✗ | FUNC(generate_grain_uv_c)(grain_lut[2], grain_lut[0], params, 1, subx, suby HBD_CALL); | |
| 562 | |||
| 563 | // Generate scaling LUTs as needed | ||
| 564 | ✗ | if (data->num_y_points || data->chroma_scaling_from_luma) | |
| 565 | ✗ | FUNC(generate_scaling)(data->y_points, data->num_y_points, scaling[0] HBD_CALL); | |
| 566 | ✗ | if (data->num_uv_points[0]) | |
| 567 | ✗ | FUNC(generate_scaling)(data->uv_points[0], data->num_uv_points[0], scaling[1] HBD_CALL); | |
| 568 | ✗ | if (data->num_uv_points[1]) | |
| 569 | ✗ | FUNC(generate_scaling)(data->uv_points[1], data->num_uv_points[1], scaling[2] HBD_CALL); | |
| 570 | |||
| 571 | ✗ | for (int row = 0; row < rows; row++) { | |
| 572 | ✗ | FUNC(apply_grain_row)(out_frame, in_frame, subx, suby, scaling, grain_lut, | |
| 573 | params, row HBD_CALL); | ||
| 574 | } | ||
| 575 | |||
| 576 | ✗ | return 0; | |
| 577 | } | ||
| 578 |