| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * Copyright (C) 2025 Niklas Haas | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #ifndef SWSCALE_OPS_CHAIN_H | ||
| 22 | #define SWSCALE_OPS_CHAIN_H | ||
| 23 | |||
| 24 | #include "libavutil/cpu.h" | ||
| 25 | #include "libavutil/mem.h" | ||
| 26 | |||
| 27 | #include "ops_internal.h" | ||
| 28 | |||
| 29 | /** | ||
| 30 | * Helpers for SIMD implementations based on chained kernels, using a | ||
| 31 | * continuation passing style to link them together. | ||
| 32 | * | ||
| 33 | * The basic idea here is to "link" together a series of different operation | ||
| 34 | * kernels by constructing a list of kernel addresses into an SwsOpChain. Each | ||
| 35 | * kernel will load the address of the next kernel (the "continuation") from | ||
| 36 | * this struct, and jump directly into it; using an internal function signature | ||
| 37 | * that is an implementation detail of the specific backend. | ||
| 38 | */ | ||
| 39 | |||
| 40 | typedef struct SwsOpTable SwsOpTable; | ||
| 41 | |||
| 42 | /** | ||
| 43 | * Private data for each kernel. | ||
| 44 | */ | ||
| 45 | typedef union SwsOpPriv { | ||
| 46 | DECLARE_ALIGNED_16(char, data)[16]; | ||
| 47 | |||
| 48 | /* Common types */ | ||
| 49 | void *ptr; | ||
| 50 | uint8_t u8[16]; | ||
| 51 | int8_t i8[16]; | ||
| 52 | uint16_t u16[8]; | ||
| 53 | int16_t i16[8]; | ||
| 54 | uint32_t u32[4]; | ||
| 55 | int32_t i32[4]; | ||
| 56 | float f32[4]; | ||
| 57 | uint64_t u64[2]; | ||
| 58 | int64_t i64[2]; | ||
| 59 | uintptr_t uptr[2]; | ||
| 60 | intptr_t iptr[2]; | ||
| 61 | } SwsOpPriv; | ||
| 62 | |||
| 63 | static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch"); | ||
| 64 | |||
| 65 | /** | ||
| 66 | * Per-kernel execution context. | ||
| 67 | * | ||
| 68 | * Note: This struct is hard-coded in assembly, so do not change the layout. | ||
| 69 | */ | ||
| 70 | typedef void (*SwsFuncPtr)(void); | ||
| 71 | typedef struct SwsOpImpl { | ||
| 72 | SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */ | ||
| 73 | SwsOpPriv priv; /* [offset = 16] Private data for this operation. */ | ||
| 74 | } SwsOpImpl; | ||
| 75 | |||
| 76 | static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch"); | ||
| 77 | static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch"); | ||
| 78 | |||
| 79 | /** | ||
| 80 | * Compiled "chain" of operations, which can be dispatched efficiently. | ||
| 81 | * Effectively just a list of function pointers, alongside a small amount of | ||
| 82 | * private data for each operation. | ||
| 83 | */ | ||
| 84 | typedef struct SwsOpChain { | ||
| 85 | #define SWS_MAX_OPS 16 | ||
| 86 | SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */ | ||
| 87 | void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *); | ||
| 88 | int num_impl; | ||
| 89 | int cpu_flags; /* set of all used CPU flags */ | ||
| 90 | int over_read; /* chain over-reads input by this many bytes */ | ||
| 91 | int over_write; /* chain over-writes output by this many bytes */ | ||
| 92 | } SwsOpChain; | ||
| 93 | |||
| 94 | SwsOpChain *ff_sws_op_chain_alloc(void); | ||
| 95 | void ff_sws_op_chain_free_cb(void *chain); | ||
| 96 | 5514 | static inline void ff_sws_op_chain_free(SwsOpChain *chain) | |
| 97 | { | ||
| 98 | 5514 | ff_sws_op_chain_free_cb(chain); | |
| 99 | 5514 | } | |
| 100 | |||
| 101 | /* Returns 0 on success, or a negative error code. */ | ||
| 102 | int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, | ||
| 103 | void (*free)(SwsOpPriv *), const SwsOpPriv *priv); | ||
| 104 | |||
| 105 | typedef struct SwsImplParams { | ||
| 106 | const SwsOpTable *table; | ||
| 107 | const SwsOp *op; | ||
| 108 | SwsContext *ctx; | ||
| 109 | } SwsImplParams; | ||
| 110 | |||
| 111 | typedef struct SwsImplResult { | ||
| 112 | SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */ | ||
| 113 | SwsOpPriv priv; /* private data for this implementation instance */ | ||
| 114 | void (*free)(SwsOpPriv *priv); /* free function for `priv` */ | ||
| 115 | int over_read; /* implementation over-reads input by this many bytes */ | ||
| 116 | int over_write; /* implementation over-writes output by this many bytes */ | ||
| 117 | } SwsImplResult; | ||
| 118 | |||
| 119 | typedef struct SwsOpEntry { | ||
| 120 | /* Kernel metadata; reduced size subset of SwsOp */ | ||
| 121 | SwsOpType op; | ||
| 122 | SwsPixelType type; | ||
| 123 | SwsCompMask mask; /* mask of active components (after operation) */ | ||
| 124 | bool flexible; /* if true, only the type and op are matched */ | ||
| 125 | |||
| 126 | union { /* extra data defining the operation, unless `flexible` is true */ | ||
| 127 | SwsReadWriteOp rw; | ||
| 128 | SwsPackOp pack; | ||
| 129 | SwsSwizzleOp swizzle; | ||
| 130 | SwsConvertOp convert; | ||
| 131 | SwsClearOp clear; | ||
| 132 | uint32_t linear_mask; /* subset of SwsLinearOp */ | ||
| 133 | int dither_size; /* subset of SwsDitherOp */ | ||
| 134 | AVRational scale; /* scale factor for SWS_OP_SCALE */ | ||
| 135 | }; | ||
| 136 | |||
| 137 | /* Kernel implementation */ | ||
| 138 | SwsFuncPtr func; | ||
| 139 | int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional */ | ||
| 140 | bool (*check)(const SwsImplParams *params); /* optional, return true if supported */ | ||
| 141 | } SwsOpEntry; | ||
| 142 | |||
| 143 | /* Setup helpers for common/trivial operation types */ | ||
| 144 | int ff_sws_setup_shift(const SwsImplParams *params, SwsImplResult *out); | ||
| 145 | int ff_sws_setup_scale(const SwsImplParams *params, SwsImplResult *out); | ||
| 146 | int ff_sws_setup_clamp(const SwsImplParams *params, SwsImplResult *out); | ||
| 147 | int ff_sws_setup_clear(const SwsImplParams *params, SwsImplResult *out); | ||
| 148 | |||
| 149 | 19847 | static inline void ff_op_priv_free(SwsOpPriv *priv) | |
| 150 | { | ||
| 151 | 19847 | av_freep(&priv->ptr); | |
| 152 | 19847 | } | |
| 153 | |||
| 154 | 3136 | static inline void ff_op_priv_unref(SwsOpPriv *priv) | |
| 155 | { | ||
| 156 | 3136 | av_refstruct_unref(&priv->ptr); | |
| 157 | 3136 | } | |
| 158 | |||
| 159 | struct SwsOpTable { | ||
| 160 | unsigned cpu_flags; /* required CPU flags for this table */ | ||
| 161 | int block_size; /* fixed block size of this table */ | ||
| 162 | const SwsOpEntry *entries[]; /* terminated by NULL */ | ||
| 163 | }; | ||
| 164 | |||
| 165 | /** | ||
| 166 | * "Compile" a single op by looking it up in a list of fixed size op tables. | ||
| 167 | * See `op_match` in `ops_chain.c` for details on how the matching works. | ||
| 168 | * | ||
| 169 | * Returns 0 or a negative error code. | ||
| 170 | */ | ||
| 171 | int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], | ||
| 172 | int num_tables, SwsOpList *ops, int ops_index, | ||
| 173 | const int block_size, SwsOpChain *chain); | ||
| 174 | |||
| 175 | #endif | ||
| 176 |