Line |
Branch |
Exec |
Source |
1 |
|
|
/** |
2 |
|
|
* Copyright (C) 2025 Niklas Haas |
3 |
|
|
* |
4 |
|
|
* This file is part of FFmpeg. |
5 |
|
|
* |
6 |
|
|
* FFmpeg is free software; you can redistribute it and/or |
7 |
|
|
* modify it under the terms of the GNU Lesser General Public |
8 |
|
|
* License as published by the Free Software Foundation; either |
9 |
|
|
* version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
* |
11 |
|
|
* FFmpeg is distributed in the hope that it will be useful, |
12 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
|
|
* Lesser General Public License for more details. |
15 |
|
|
* |
16 |
|
|
* You should have received a copy of the GNU Lesser General Public |
17 |
|
|
* License along with FFmpeg; if not, write to the Free Software |
18 |
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
|
|
*/ |
20 |
|
|
|
21 |
|
|
#ifndef SWSCALE_OPS_CHAIN_H |
22 |
|
|
#define SWSCALE_OPS_CHAIN_H |
23 |
|
|
|
24 |
|
|
#include "libavutil/cpu.h" |
25 |
|
|
|
26 |
|
|
#include "ops_internal.h" |
27 |
|
|
|
28 |
|
|
/** |
29 |
|
|
* Helpers for SIMD implementations based on chained kernels, using a |
30 |
|
|
* continuation passing style to link them together. |
31 |
|
|
* |
32 |
|
|
* The basic idea here is to "link" together a series of different operation |
33 |
|
|
* kernels by constructing a list of kernel addresses into an SwsOpChain. Each |
34 |
|
|
* kernel will load the address of the next kernel (the "continuation") from |
35 |
|
|
* this struct, and jump directly into it; using an internal function signature |
36 |
|
|
* that is an implementation detail of the specific backend. |
37 |
|
|
*/ |
38 |
|
|
|
39 |
|
|
/** |
40 |
|
|
* Private data for each kernel. |
41 |
|
|
*/ |
42 |
|
|
typedef union SwsOpPriv { |
43 |
|
|
DECLARE_ALIGNED_16(char, data)[16]; |
44 |
|
|
|
45 |
|
|
/* Common types */ |
46 |
|
|
void *ptr; |
47 |
|
|
uint8_t u8[16]; |
48 |
|
|
uint16_t u16[8]; |
49 |
|
|
uint32_t u32[4]; |
50 |
|
|
float f32[4]; |
51 |
|
|
} SwsOpPriv; |
52 |
|
|
|
53 |
|
|
static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch"); |
54 |
|
|
|
55 |
|
|
/* Setup helpers */ |
56 |
|
|
int ff_sws_setup_u(const SwsOp *op, SwsOpPriv *out); |
57 |
|
|
int ff_sws_setup_u8(const SwsOp *op, SwsOpPriv *out); |
58 |
|
|
int ff_sws_setup_q(const SwsOp *op, SwsOpPriv *out); |
59 |
|
|
int ff_sws_setup_q4(const SwsOp *op, SwsOpPriv *out); |
60 |
|
|
|
61 |
|
|
/** |
62 |
|
|
* Per-kernel execution context. |
63 |
|
|
* |
64 |
|
|
* Note: This struct is hard-coded in assembly, so do not change the layout. |
65 |
|
|
*/ |
66 |
|
|
typedef void (*SwsFuncPtr)(void); |
67 |
|
|
typedef struct SwsOpImpl { |
68 |
|
|
SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */ |
69 |
|
|
SwsOpPriv priv; /* [offset = 16] Private data for this operation. */ |
70 |
|
|
} SwsOpImpl; |
71 |
|
|
|
72 |
|
|
static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch"); |
73 |
|
|
static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch"); |
74 |
|
|
|
75 |
|
|
/** |
76 |
|
|
* Compiled "chain" of operations, which can be dispatched efficiently. |
77 |
|
|
* Effectively just a list of function pointers, alongside a small amount of |
78 |
|
|
* private data for each operation. |
79 |
|
|
*/ |
80 |
|
|
typedef struct SwsOpChain { |
81 |
|
|
#define SWS_MAX_OPS 16 |
82 |
|
|
SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */ |
83 |
|
|
void (*free[SWS_MAX_OPS + 1])(void *); |
84 |
|
|
int num_impl; |
85 |
|
|
int cpu_flags; /* set of all used CPU flags */ |
86 |
|
|
} SwsOpChain; |
87 |
|
|
|
88 |
|
|
SwsOpChain *ff_sws_op_chain_alloc(void); |
89 |
|
|
void ff_sws_op_chain_free_cb(void *chain); |
90 |
|
1647 |
static inline void ff_sws_op_chain_free(SwsOpChain *chain) |
91 |
|
|
{ |
92 |
|
1647 |
ff_sws_op_chain_free_cb(chain); |
93 |
|
1647 |
} |
94 |
|
|
|
95 |
|
|
/* Returns 0 on success, or a negative error code. */ |
96 |
|
|
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, |
97 |
|
|
void (*free)(void *), const SwsOpPriv *priv); |
98 |
|
|
|
99 |
|
|
typedef struct SwsOpEntry { |
100 |
|
|
/* Kernel metadata; reduced size subset of SwsOp */ |
101 |
|
|
SwsOpType op; |
102 |
|
|
SwsPixelType type; |
103 |
|
|
bool flexible; /* if true, only the type and op are matched */ |
104 |
|
|
bool unused[4]; /* for kernels which operate on a subset of components */ |
105 |
|
|
|
106 |
|
|
union { /* extra data defining the operation, unless `flexible` is true */ |
107 |
|
|
SwsReadWriteOp rw; |
108 |
|
|
SwsPackOp pack; |
109 |
|
|
SwsSwizzleOp swizzle; |
110 |
|
|
SwsConvertOp convert; |
111 |
|
|
uint32_t linear_mask; /* subset of SwsLinearOp */ |
112 |
|
|
int dither_size; /* subset of SwsDitherOp */ |
113 |
|
|
int clear_value; /* clear value for integer clears */ |
114 |
|
|
}; |
115 |
|
|
|
116 |
|
|
/* Kernel implementation */ |
117 |
|
|
SwsFuncPtr func; |
118 |
|
|
int (*setup)(const SwsOp *op, SwsOpPriv *out); /* optional */ |
119 |
|
|
void (*free)(void *priv); |
120 |
|
|
} SwsOpEntry; |
121 |
|
|
|
122 |
|
|
typedef struct SwsOpTable { |
123 |
|
|
unsigned cpu_flags; /* required CPU flags for this table */ |
124 |
|
|
int block_size; /* fixed block size of this table */ |
125 |
|
|
const SwsOpEntry *entries[]; /* terminated by NULL */ |
126 |
|
|
} SwsOpTable; |
127 |
|
|
|
128 |
|
|
/** |
129 |
|
|
* "Compile" a single op by looking it up in a list of fixed size op tables. |
130 |
|
|
* See `op_match` in `ops.c` for details on how the matching works. |
131 |
|
|
* |
132 |
|
|
* Returns 0, AVERROR(EAGAIN), or a negative error code. |
133 |
|
|
*/ |
134 |
|
|
int ff_sws_op_compile_tables(const SwsOpTable *const tables[], int num_tables, |
135 |
|
|
SwsOpList *ops, const int block_size, |
136 |
|
|
SwsOpChain *chain); |
137 |
|
|
|
138 |
|
|
#endif |
139 |
|
|
|