Line |
Branch |
Exec |
Source |
1 |
|
|
/* |
2 |
|
|
* Copyright (c) 2021 Mark Reid <mindmark@gmail.com> |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#include "libavutil/attributes.h" |
22 |
|
|
#include "libavutil/cpu.h" |
23 |
|
|
#include "libavutil/x86/cpu.h" |
24 |
|
|
#include "libavfilter/lut3d.h" |
25 |
|
|
|
26 |
|
|
#define DEFINE_INTERP_FUNC(name, format, opt) \ |
27 |
|
|
void ff_interp_##name##_##format##_##opt(LUT3DContext *lut3d, Lut3DPreLut *prelut, AVFrame *src, AVFrame *dst, int slice_start, int slice_end, int has_alpha); \ |
28 |
|
|
static int interp_##name##_##format##_##opt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ |
29 |
|
|
{ \ |
30 |
|
|
LUT3DContext *lut3d = ctx->priv; \ |
31 |
|
|
Lut3DPreLut *prelut = lut3d->prelut.size > 0? &lut3d->prelut: NULL; \ |
32 |
|
|
ThreadData *td = arg; \ |
33 |
|
|
AVFrame *in = td->in; \ |
34 |
|
|
AVFrame *out = td->out; \ |
35 |
|
|
int has_alpha = in->linesize[3] && out != in; \ |
36 |
|
|
int slice_start = (in->height * jobnr ) / nb_jobs; \ |
37 |
|
|
int slice_end = (in->height * (jobnr+1)) / nb_jobs; \ |
38 |
|
|
ff_interp_##name##_##format##_##opt(lut3d, prelut, in, out, slice_start, slice_end, has_alpha); \ |
39 |
|
|
return 0; \ |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
#if ARCH_X86_64 |
43 |
|
|
#if HAVE_AVX2_EXTERNAL |
44 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, pf32, avx2) |
45 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, p16, avx2) |
46 |
|
|
#endif |
47 |
|
|
#if HAVE_AVX_EXTERNAL |
48 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, pf32, avx) |
49 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, p16, avx) |
50 |
|
|
#endif |
51 |
|
|
#if HAVE_SSE2_EXTERNAL |
52 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, pf32, sse2) |
53 |
|
✗ |
DEFINE_INTERP_FUNC(tetrahedral, p16, sse2) |
54 |
|
|
#endif |
55 |
|
|
#endif |
56 |
|
|
|
57 |
|
|
|
58 |
|
✗ |
av_cold void ff_lut3d_init_x86(LUT3DContext *s, const AVPixFmtDescriptor *desc) |
59 |
|
|
{ |
60 |
|
✗ |
int cpu_flags = av_get_cpu_flags(); |
61 |
|
✗ |
int planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR; |
62 |
|
✗ |
int isfloat = desc->flags & AV_PIX_FMT_FLAG_FLOAT; |
63 |
|
✗ |
int depth = desc->comp[0].depth; |
64 |
|
|
|
65 |
|
|
#if ARCH_X86_64 |
66 |
|
✗ |
if (EXTERNAL_AVX2_FAST(cpu_flags) && EXTERNAL_FMA3(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { |
67 |
|
|
#if HAVE_AVX2_EXTERNAL |
68 |
|
✗ |
if (isfloat && planar) { |
69 |
|
✗ |
s->interp = interp_tetrahedral_pf32_avx2; |
70 |
|
✗ |
} else if (depth == 16) { |
71 |
|
✗ |
s->interp = interp_tetrahedral_p16_avx2; |
72 |
|
|
} |
73 |
|
|
#endif |
74 |
|
✗ |
} else if (EXTERNAL_AVX_FAST(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { |
75 |
|
|
#if HAVE_AVX_EXTERNAL |
76 |
|
✗ |
if (isfloat) { |
77 |
|
✗ |
s->interp = interp_tetrahedral_pf32_avx; |
78 |
|
✗ |
} else if (depth == 16) { |
79 |
|
✗ |
s->interp = interp_tetrahedral_p16_avx; |
80 |
|
|
} |
81 |
|
|
#endif |
82 |
|
✗ |
} else if (EXTERNAL_SSE2(cpu_flags) && s->interpolation == INTERPOLATE_TETRAHEDRAL && planar) { |
83 |
|
|
#if HAVE_SSE2_EXTERNAL |
84 |
|
✗ |
if (isfloat) { |
85 |
|
✗ |
s->interp = interp_tetrahedral_pf32_sse2; |
86 |
|
✗ |
} else if (depth == 16) { |
87 |
|
✗ |
s->interp = interp_tetrahedral_p16_sse2; |
88 |
|
|
} |
89 |
|
|
#endif |
90 |
|
|
} |
91 |
|
|
#endif |
92 |
|
✗ |
} |
93 |
|
|
|