| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (c) 2021 Mark Reid <mindmark@gmail.com> | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/attributes.h" | ||
| 22 | #include "libavutil/cpu.h" | ||
| 23 | #include "libavutil/x86/cpu.h" | ||
| 24 | #include "libavfilter/lut3d.h" | ||
| 25 | |||
| 26 | #define DEFINE_INTERP_FUNC(name, format, opt) \ | ||
| 27 | void ff_interp_##name##_##format##_##opt(LUT3DContext *lut3d, Lut3DPreLut *prelut, AVFrame *src, AVFrame *dst, int slice_start, int slice_end, int has_alpha); \ | ||
| 28 | static int interp_##name##_##format##_##opt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ | ||
| 29 | { \ | ||
| 30 | LUT3DContext *lut3d = ctx->priv; \ | ||
| 31 | Lut3DPreLut *prelut = lut3d->prelut.size > 0? &lut3d->prelut: NULL; \ | ||
| 32 | ThreadData *td = arg; \ | ||
| 33 | AVFrame *in = td->in; \ | ||
| 34 | AVFrame *out = td->out; \ | ||
| 35 | int has_alpha = in->linesize[3] && out != in; \ | ||
| 36 | int slice_start = (in->height * jobnr ) / nb_jobs; \ | ||
| 37 | int slice_end = (in->height * (jobnr+1)) / nb_jobs; \ | ||
| 38 | ff_interp_##name##_##format##_##opt(lut3d, prelut, in, out, slice_start, slice_end, has_alpha); \ | ||
| 39 | return 0; \ | ||
| 40 | } | ||
| 41 | |||
| 42 | #if ARCH_X86_64 | ||
| 43 | #if HAVE_AVX2_EXTERNAL | ||
| 44 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, pf32, avx2) | |
| 45 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, p16, avx2) | |
| 46 | #endif | ||
| 47 | #if HAVE_AVX_EXTERNAL | ||
| 48 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, pf32, avx) | |
| 49 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, p16, avx) | |
| 50 | #endif | ||
| 51 | #if HAVE_SSE2_EXTERNAL | ||
| 52 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, pf32, sse2) | |
| 53 | ✗ | DEFINE_INTERP_FUNC(tetrahedral, p16, sse2) | |
| 54 | #endif | ||
| 55 | #endif | ||
| 56 | |||
| 57 | |||
| 58 | ✗ | av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc) | |
| 59 | { | ||
| 60 | ✗ | int cpu_flags = av_get_cpu_flags(); | |
| 61 | ✗ | int planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR; | |
| 62 | ✗ | int isfloat = desc->flags & AV_PIX_FMT_FLAG_FLOAT; | |
| 63 | ✗ | int depth = desc->comp[0].depth; | |
| 64 | |||
| 65 | #if ARCH_X86_64 | ||
| 66 | ✗ | if (EXTERNAL_AVX2_FAST(cpu_flags) && EXTERNAL_FMA3(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { | |
| 67 | #if HAVE_AVX2_EXTERNAL | ||
| 68 | ✗ | if (isfloat && planar) { | |
| 69 | ✗ | s->interp = interp_tetrahedral_pf32_avx2; | |
| 70 | ✗ | } else if (depth == 16) { | |
| 71 | ✗ | s->interp = interp_tetrahedral_p16_avx2; | |
| 72 | } | ||
| 73 | #endif | ||
| 74 | ✗ | } else if (EXTERNAL_AVX_FAST(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { | |
| 75 | #if HAVE_AVX_EXTERNAL | ||
| 76 | ✗ | if (isfloat) { | |
| 77 | ✗ | s->interp = interp_tetrahedral_pf32_avx; | |
| 78 | ✗ | } else if (depth == 16) { | |
| 79 | ✗ | s->interp = interp_tetrahedral_p16_avx; | |
| 80 | } | ||
| 81 | #endif | ||
| 82 | ✗ | } else if (EXTERNAL_SSE2(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { | |
| 83 | #if HAVE_SSE2_EXTERNAL | ||
| 84 | ✗ | if (isfloat) { | |
| 85 | ✗ | s->interp = interp_tetrahedral_pf32_sse2; | |
| 86 | ✗ | } else if (depth == 16) { | |
| 87 | ✗ | s->interp = interp_tetrahedral_p16_sse2; | |
| 88 | } | ||
| 89 | #endif | ||
| 90 | } | ||
| 91 | #endif | ||
| 92 | ✗ | } | |
| 93 |