| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru> | ||
| 3 | * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at> | ||
| 4 | * | ||
| 5 | * This file is part of FFmpeg. | ||
| 6 | * | ||
| 7 | * FFmpeg is free software; you can redistribute it and/or | ||
| 8 | * modify it under the terms of the GNU Lesser General Public | ||
| 9 | * License as published by the Free Software Foundation; either | ||
| 10 | * version 2.1 of the License, or (at your option) any later version. | ||
| 11 | * | ||
| 12 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * Lesser General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU Lesser General Public | ||
| 18 | * License along with FFmpeg; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include "libavutil/attributes.h" | ||
| 23 | #include "libavutil/avassert.h" | ||
| 24 | #include "libavutil/cpu.h" | ||
| 25 | #include "libavutil/x86/asm.h" | ||
| 26 | #include "libavutil/x86/cpu.h" | ||
| 27 | #include "libavcodec/mpegvideo.h" | ||
| 28 | #include "libavcodec/mpegvideodata.h" | ||
| 29 | #include "libavcodec/mpegvideo_unquantize.h" | ||
| 30 | |||
| 31 | #if HAVE_SSE2_INLINE | ||
| 32 | |||
| 33 | #define SPLATW(reg) "punpcklwd %%" #reg ", %%" #reg "\n\t" \ | ||
| 34 | "pshufd $0, %%" #reg ", %%" #reg "\n\t" | ||
| 35 | |||
| 36 | #if HAVE_SSSE3_INLINE | ||
| 37 | |||
| 38 | 194461 | static void dct_unquantize_h263_intra_ssse3(const MPVContext *s, | |
| 39 | int16_t *block, int n, int qscale) | ||
| 40 | { | ||
| 41 | 194461 | x86_reg qmul = (unsigned)qscale << 1; | |
| 42 | int level, qadd; | ||
| 43 | |||
| 44 | av_assert2(s->block_last_index[n]>=0 || s->h263_aic); | ||
| 45 | |||
| 46 |
2/2✓ Branch 0 taken 194460 times.
✓ Branch 1 taken 1 times.
|
194461 | if (!s->h263_aic) { |
| 47 |
2/2✓ Branch 0 taken 129640 times.
✓ Branch 1 taken 64820 times.
|
194460 | if (n < 4) |
| 48 | 129640 | level = block[0] * s->y_dc_scale; | |
| 49 | else | ||
| 50 | 64820 | level = block[0] * s->c_dc_scale; | |
| 51 | 194460 | qadd = (qscale - 1) | 1; | |
| 52 | }else{ | ||
| 53 | 1 | qadd = 0; | |
| 54 | 1 | level= block[0]; | |
| 55 | } | ||
| 56 |
2/2✓ Branch 0 taken 194460 times.
✓ Branch 1 taken 1 times.
|
194461 | x86_reg offset = s->ac_pred ? 63 << 1 : s->intra_scantable.raster_end[s->block_last_index[n]] << 1; |
| 57 | |||
| 58 | 194461 | __asm__ volatile( | |
| 59 | "movd %k1, %%xmm0 \n\t" //qmul | ||
| 60 | "lea (%2, %0), %1 \n\t" | ||
| 61 | "neg %0 \n\t" | ||
| 62 | "movd %3, %%xmm1 \n\t" //qadd | ||
| 63 | SPLATW(xmm0) | ||
| 64 | SPLATW(xmm1) | ||
| 65 | |||
| 66 | ".p2align 4 \n\t" | ||
| 67 | "1: \n\t" | ||
| 68 | "movdqa (%1, %0), %%xmm2 \n\t" | ||
| 69 | "movdqa 16(%1, %0), %%xmm3 \n\t" | ||
| 70 | |||
| 71 | "movdqa %%xmm1, %%xmm4 \n\t" | ||
| 72 | "movdqa %%xmm1, %%xmm5 \n\t" | ||
| 73 | |||
| 74 | "psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd | ||
| 75 | "psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd | ||
| 76 | |||
| 77 | "pmullw %%xmm0, %%xmm2 \n\t" | ||
| 78 | "pmullw %%xmm0, %%xmm3 \n\t" | ||
| 79 | |||
| 80 | "paddw %%xmm4, %%xmm2 \n\t" | ||
| 81 | "paddw %%xmm5, %%xmm3 \n\t" | ||
| 82 | |||
| 83 | "movdqa %%xmm2, (%1, %0) \n\t" | ||
| 84 | "movdqa %%xmm3, 16(%1, %0) \n\t" | ||
| 85 | |||
| 86 | "add $32, %0 \n\t" | ||
| 87 | "jng 1b \n\t" | ||
| 88 | : "+r"(offset), "+r"(qmul) | ||
| 89 | : "r" (block), "rm" (qadd) | ||
| 90 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory" | ||
| 91 | ); | ||
| 92 | 194461 | block[0]= level; | |
| 93 | 194461 | } | |
| 94 | |||
| 95 | |||
| 96 | 49920 | static void dct_unquantize_h263_inter_ssse3(const MPVContext *s, | |
| 97 | int16_t *block, int n, int qscale) | ||
| 98 | { | ||
| 99 | 49920 | int qmul = qscale << 1; | |
| 100 | 49920 | int qadd = (qscale - 1) | 1; | |
| 101 | |||
| 102 | av_assert2(s->block_last_index[n]>=0 || s->h263_aic); | ||
| 103 | |||
| 104 | 49920 | x86_reg offset = s->inter_scantable.raster_end[s->block_last_index[n]] << 1; | |
| 105 | |||
| 106 | 49920 | __asm__ volatile( | |
| 107 | "movd %2, %%xmm0 \n\t" //qmul | ||
| 108 | "movd %3, %%xmm1 \n\t" //qadd | ||
| 109 | "add %1, %0 \n\t" | ||
| 110 | "neg %1 \n\t" | ||
| 111 | SPLATW(xmm0) | ||
| 112 | SPLATW(xmm1) | ||
| 113 | |||
| 114 | ".p2align 4 \n\t" | ||
| 115 | "1: \n\t" | ||
| 116 | "movdqa (%0, %1), %%xmm2 \n\t" | ||
| 117 | "movdqa 16(%0, %1), %%xmm3 \n\t" | ||
| 118 | |||
| 119 | "movdqa %%xmm1, %%xmm4 \n\t" | ||
| 120 | "movdqa %%xmm1, %%xmm5 \n\t" | ||
| 121 | |||
| 122 | "psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd | ||
| 123 | "psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd | ||
| 124 | |||
| 125 | "pmullw %%xmm0, %%xmm2 \n\t" | ||
| 126 | "pmullw %%xmm0, %%xmm3 \n\t" | ||
| 127 | |||
| 128 | "paddw %%xmm4, %%xmm2 \n\t" | ||
| 129 | "paddw %%xmm5, %%xmm3 \n\t" | ||
| 130 | |||
| 131 | "movdqa %%xmm2, (%0, %1) \n\t" | ||
| 132 | "movdqa %%xmm3, 16(%0, %1) \n\t" | ||
| 133 | |||
| 134 | "add $32, %1 \n\t" | ||
| 135 | "jng 1b \n\t" | ||
| 136 | : "+r" (block), "+r" (offset) | ||
| 137 | : "rm"(qmul), "rm" (qadd) | ||
| 138 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory" | ||
| 139 | ); | ||
| 140 | 49920 | } | |
| 141 | |||
| 142 | 1 | static void dct_unquantize_mpeg1_intra_ssse3(const MPVContext *s, | |
| 143 | int16_t *block, int n, int qscale) | ||
| 144 | { | ||
| 145 | x86_reg nCoeffs; | ||
| 146 | const uint16_t *quant_matrix; | ||
| 147 | int block0; | ||
| 148 | |||
| 149 | av_assert2(s->block_last_index[n]>=0); | ||
| 150 | |||
| 151 | 1 | nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; | |
| 152 | |||
| 153 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (n < 4) |
| 154 | 1 | block0 = block[0] * s->y_dc_scale; | |
| 155 | else | ||
| 156 | ✗ | block0 = block[0] * s->c_dc_scale; | |
| 157 | /* XXX: only MPEG-1 */ | ||
| 158 | 1 | quant_matrix = s->intra_matrix; | |
| 159 | 1 | x86_reg offset = -2 * nCoeffs; | |
| 160 | 1 | __asm__ volatile( | |
| 161 | "movd %3, %%xmm6 \n\t" | ||
| 162 | "pcmpeqw %%xmm7, %%xmm7 \n\t" | ||
| 163 | "psrlw $15, %%xmm7 \n\t" | ||
| 164 | SPLATW(xmm6) | ||
| 165 | ".p2align 4 \n\t" | ||
| 166 | "1: \n\t" | ||
| 167 | "movdqa (%2, %0), %%xmm4 \n\t" | ||
| 168 | "movdqa 16(%2, %0), %%xmm5 \n\t" | ||
| 169 | "movdqa (%1, %0), %%xmm0 \n\t" | ||
| 170 | "movdqa 16(%1, %0), %%xmm1 \n\t" | ||
| 171 | "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i] | ||
| 172 | "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i] | ||
| 173 | "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i]) | ||
| 174 | "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i]) | ||
| 175 | "pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])*q | ||
| 176 | "pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])*q | ||
| 177 | "psraw $3, %%xmm2 \n\t" | ||
| 178 | "psraw $3, %%xmm3 \n\t" | ||
| 179 | "psubw %%xmm7, %%xmm2 \n\t" | ||
| 180 | "psubw %%xmm7, %%xmm3 \n\t" | ||
| 181 | "por %%xmm7, %%xmm2 \n\t" | ||
| 182 | "por %%xmm7, %%xmm3 \n\t" | ||
| 183 | "psignw %%xmm0, %%xmm2 \n\t" | ||
| 184 | "psignw %%xmm1, %%xmm3 \n\t" | ||
| 185 | "movdqa %%xmm2, (%1, %0) \n\t" | ||
| 186 | "movdqa %%xmm3, 16(%1, %0) \n\t" | ||
| 187 | |||
| 188 | "add $32, %0 \n\t" | ||
| 189 | "js 1b \n\t" | ||
| 190 | : "+r" (offset) | ||
| 191 | 1 | : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale) | |
| 192 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",) | ||
| 193 | "memory" | ||
| 194 | ); | ||
| 195 | 1 | block[0]= block0; | |
| 196 | 1 | } | |
| 197 | |||
| 198 | 1 | static void dct_unquantize_mpeg1_inter_ssse3(const MPVContext *s, | |
| 199 | int16_t *block, int n, int qscale) | ||
| 200 | { | ||
| 201 | x86_reg nCoeffs; | ||
| 202 | const uint16_t *quant_matrix; | ||
| 203 | |||
| 204 | av_assert2(s->block_last_index[n]>=0); | ||
| 205 | |||
| 206 | 1 | nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; | |
| 207 | |||
| 208 | 1 | quant_matrix = s->inter_matrix; | |
| 209 | 1 | x86_reg offset = -2 * nCoeffs; | |
| 210 | 1 | __asm__ volatile( | |
| 211 | "movd %3, %%xmm6 \n\t" | ||
| 212 | "pcmpeqw %%xmm7, %%xmm7 \n\t" | ||
| 213 | "psrlw $15, %%xmm7 \n\t" | ||
| 214 | SPLATW(xmm6) | ||
| 215 | ".p2align 4 \n\t" | ||
| 216 | "1: \n\t" | ||
| 217 | "movdqa (%2, %0), %%xmm4 \n\t" | ||
| 218 | "movdqa 16(%2, %0), %%xmm5 \n\t" | ||
| 219 | "movdqa (%1, %0), %%xmm0 \n\t" | ||
| 220 | "movdqa 16(%1, %0), %%xmm1 \n\t" | ||
| 221 | "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i] | ||
| 222 | "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i] | ||
| 223 | "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i]) | ||
| 224 | "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i]) | ||
| 225 | "paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2 | ||
| 226 | "paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2 | ||
| 227 | "paddw %%xmm7, %%xmm2 \n\t" // abs(block[i])*2 + 1 | ||
| 228 | "paddw %%xmm7, %%xmm3 \n\t" // abs(block[i])*2 + 1 | ||
| 229 | "pmullw %%xmm4, %%xmm2 \n\t" // (abs(block[i])*2 + 1)*q | ||
| 230 | "pmullw %%xmm5, %%xmm3 \n\t" // (abs(block[i])*2 + 1)*q | ||
| 231 | "psraw $4, %%xmm2 \n\t" | ||
| 232 | "psraw $4, %%xmm3 \n\t" | ||
| 233 | "psubw %%xmm7, %%xmm2 \n\t" | ||
| 234 | "psubw %%xmm7, %%xmm3 \n\t" | ||
| 235 | "por %%xmm7, %%xmm2 \n\t" | ||
| 236 | "por %%xmm7, %%xmm3 \n\t" | ||
| 237 | "psignw %%xmm0, %%xmm2 \n\t" | ||
| 238 | "psignw %%xmm1, %%xmm3 \n\t" | ||
| 239 | "movdqa %%xmm2, (%1, %0) \n\t" | ||
| 240 | "movdqa %%xmm3, 16(%1, %0) \n\t" | ||
| 241 | |||
| 242 | "add $32, %0 \n\t" | ||
| 243 | "js 1b \n\t" | ||
| 244 | : "+r" (offset) | ||
| 245 | 1 | : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale) | |
| 246 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",) | ||
| 247 | "memory" | ||
| 248 | ); | ||
| 249 | 1 | } | |
| 250 | |||
| 251 | #endif /* HAVE_SSSE3_INLINE */ | ||
| 252 | |||
| 253 | ✗ | static void dct_unquantize_mpeg2_intra_sse2(const MPVContext *s, | |
| 254 | int16_t *block, int n, int qscale) | ||
| 255 | { | ||
| 256 | x86_reg nCoeffs; | ||
| 257 | const uint16_t *quant_matrix; | ||
| 258 | int block0; | ||
| 259 | |||
| 260 | av_assert2(s->block_last_index[n]>=0); | ||
| 261 | |||
| 262 | ✗ | if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale]; | |
| 263 | ✗ | else qscale <<= 1; | |
| 264 | |||
| 265 | ✗ | nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; | |
| 266 | |||
| 267 | ✗ | if (n < 4) | |
| 268 | ✗ | block0 = block[0] * s->y_dc_scale; | |
| 269 | else | ||
| 270 | ✗ | block0 = block[0] * s->c_dc_scale; | |
| 271 | ✗ | quant_matrix = s->intra_matrix; | |
| 272 | ✗ | x86_reg offset = -2 * nCoeffs; | |
| 273 | ✗ | __asm__ volatile( | |
| 274 | "movd %3, %%xmm6 \n\t" | ||
| 275 | SPLATW(xmm6) | ||
| 276 | ".p2align 4 \n\t" | ||
| 277 | "1: \n\t" | ||
| 278 | "movdqa (%1, %0), %%xmm0 \n\t" | ||
| 279 | "movdqa 16(%1, %0), %%xmm1 \n\t" | ||
| 280 | "movdqa (%2, %0), %%xmm4 \n\t" | ||
| 281 | "movdqa 16(%2, %0), %%xmm5 \n\t" | ||
| 282 | "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i] | ||
| 283 | "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i] | ||
| 284 | "movdqa %%xmm0, %%xmm2 \n\t" | ||
| 285 | "movdqa %%xmm1, %%xmm3 \n\t" | ||
| 286 | "psrlw $12, %%xmm2 \n\t" // block[i] < 0 ? 0xf : 0 | ||
| 287 | "psrlw $12, %%xmm3 \n\t" // (block[i] is in the -2048..2047 range) | ||
| 288 | "pmullw %%xmm4, %%xmm0 \n\t" // block[i]*q | ||
| 289 | "pmullw %%xmm5, %%xmm1 \n\t" // block[i]*q | ||
| 290 | "paddw %%xmm2, %%xmm0 \n\t" // bias negative block[i] | ||
| 291 | "paddw %%xmm3, %%xmm1 \n\t" // so that a right-shift | ||
| 292 | "psraw $4, %%xmm0 \n\t" // is equivalent to divide | ||
| 293 | "psraw $4, %%xmm1 \n\t" // with rounding towards zero | ||
| 294 | "movdqa %%xmm0, (%1, %0) \n\t" | ||
| 295 | "movdqa %%xmm1, 16(%1, %0) \n\t" | ||
| 296 | |||
| 297 | "add $32, %0 \n\t" | ||
| 298 | "jng 1b \n\t" | ||
| 299 | : "+r" (offset) | ||
| 300 | ✗ | : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale) | |
| 301 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6",) | ||
| 302 | "memory" | ||
| 303 | ); | ||
| 304 | ✗ | block[0]= block0; | |
| 305 | //Note, we do not do mismatch control for intra as errors cannot accumulate | ||
| 306 | ✗ | } | |
| 307 | |||
| 308 | #if HAVE_SSSE3_INLINE | ||
| 309 | |||
| 310 | 5942 | static void dct_unquantize_mpeg2_inter_ssse3(const MPVContext *s, | |
| 311 | int16_t *block, int n, int qscale) | ||
| 312 | { | ||
| 313 | av_assert2(s->block_last_index[n]>=0); | ||
| 314 | |||
| 315 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5942 times.
|
5942 | x86_reg qscale2 = s->q_scale_type ? ff_mpeg2_non_linear_qscale[qscale] : (unsigned)qscale << 1; |
| 316 | 5942 | x86_reg offset = s->intra_scantable.raster_end[s->block_last_index[n]] << 1; | |
| 317 | 5942 | const void *quant_matrix = (const char*)s->inter_matrix + offset; | |
| 318 | |||
| 319 | |||
| 320 | 5942 | __asm__ volatile( | |
| 321 | "movd %k1, %%xmm6 \n\t" | ||
| 322 | "lea (%2, %0), %1 \n\t" | ||
| 323 | "neg %0 \n\t" | ||
| 324 | SPLATW(xmm6) | ||
| 325 | "pcmpeqw %%xmm7, %%xmm7 \n\t" | ||
| 326 | "psrldq $14, %%xmm7 \n\t" | ||
| 327 | ".p2align 4 \n\t" | ||
| 328 | "1: \n\t" | ||
| 329 | "movdqa (%3, %0), %%xmm4 \n\t" | ||
| 330 | "movdqa 16(%3, %0), %%xmm5 \n\t" | ||
| 331 | "movdqa (%1, %0), %%xmm0 \n\t" | ||
| 332 | "movdqa 16(%1, %0), %%xmm1 \n\t" | ||
| 333 | "pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i] | ||
| 334 | "pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i] | ||
| 335 | "pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i]) | ||
| 336 | "pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i]) | ||
| 337 | "paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2 | ||
| 338 | "paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2 | ||
| 339 | "pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])*2*q | ||
| 340 | "pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])*2*q | ||
| 341 | "paddw %%xmm4, %%xmm2 \n\t" // (abs(block[i])*2 + 1)*q | ||
| 342 | "paddw %%xmm5, %%xmm3 \n\t" // (abs(block[i])*2 + 1)*q | ||
| 343 | "psrlw $5, %%xmm2 \n\t" | ||
| 344 | "psrlw $5, %%xmm3 \n\t" | ||
| 345 | "psignw %%xmm0, %%xmm2 \n\t" | ||
| 346 | "psignw %%xmm1, %%xmm3 \n\t" | ||
| 347 | "movdqa %%xmm2, (%1, %0) \n\t" | ||
| 348 | "movdqa %%xmm3, 16(%1, %0) \n\t" | ||
| 349 | "pxor %%xmm2, %%xmm7 \n\t" | ||
| 350 | "pxor %%xmm3, %%xmm7 \n\t" | ||
| 351 | |||
| 352 | "add $32, %0 \n\t" | ||
| 353 | "jng 1b \n\t" | ||
| 354 | "movd 124(%2), %%xmm0 \n\t" | ||
| 355 | "movhlps %%xmm7, %%xmm6 \n\t" | ||
| 356 | "pxor %%xmm6, %%xmm7 \n\t" | ||
| 357 | "pshufd $1, %%xmm7, %%xmm6 \n\t" | ||
| 358 | "pxor %%xmm6, %%xmm7 \n\t" | ||
| 359 | "pshuflw $1, %%xmm7, %%xmm6 \n\t" | ||
| 360 | "pxor %%xmm6, %%xmm7 \n\t" | ||
| 361 | "pslld $31, %%xmm7 \n\t" | ||
| 362 | "psrld $15, %%xmm7 \n\t" | ||
| 363 | "pxor %%xmm7, %%xmm0 \n\t" | ||
| 364 | "movd %%xmm0, 124(%2) \n\t" | ||
| 365 | |||
| 366 | : "+r"(offset), "+r" (qscale2) | ||
| 367 | : "r" (block), "r"(quant_matrix) | ||
| 368 | : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",) | ||
| 369 | "memory" | ||
| 370 | ); | ||
| 371 | 5942 | } | |
| 372 | |||
| 373 | #endif /* HAVE_SSSE3_INLINE */ | ||
| 374 | #endif /* HAVE_SSE2_INLINE */ | ||
| 375 | |||
| 376 | 696 | av_cold void ff_mpv_unquantize_init_x86(MPVUnquantDSPContext *s, int bitexact) | |
| 377 | { | ||
| 378 | #if HAVE_SSE2_INLINE | ||
| 379 | 696 | int cpu_flags = av_get_cpu_flags(); | |
| 380 | |||
| 381 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 600 times.
|
696 | if (INLINE_SSE2(cpu_flags)) { |
| 382 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 15 times.
|
96 | if (!bitexact) |
| 383 | 81 | s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_sse2; | |
| 384 | } | ||
| 385 | #if HAVE_SSSE3_INLINE | ||
| 386 |
2/2✓ Branch 0 taken 94 times.
✓ Branch 1 taken 602 times.
|
696 | if (INLINE_SSSE3(cpu_flags)) { |
| 387 | 94 | s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_ssse3; | |
| 388 | 94 | s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_ssse3; | |
| 389 | 94 | s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_ssse3; | |
| 390 | 94 | s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_ssse3; | |
| 391 | 94 | s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_ssse3; | |
| 392 | } | ||
| 393 | #endif /* HAVE_SSSE3_INLINE */ | ||
| 394 | #endif /* HAVE_SSE2_INLINE */ | ||
| 395 | 696 | } | |
| 396 |