LCOV - code coverage report
Current view: top level - libavcodec/x86 - mpegvideo.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 40 72 55.6 %
Date: 2017-12-17 23:02:56 Functions: 4 7 57.1 %

          Line data    Source code
       1             : /*
       2             :  * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
       3             :  * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
       4             :  *
       5             :  * This file is part of FFmpeg.
       6             :  *
       7             :  * FFmpeg is free software; you can redistribute it and/or
       8             :  * modify it under the terms of the GNU Lesser General Public
       9             :  * License as published by the Free Software Foundation; either
      10             :  * version 2.1 of the License, or (at your option) any later version.
      11             :  *
      12             :  * FFmpeg is distributed in the hope that it will be useful,
      13             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15             :  * Lesser General Public License for more details.
      16             :  *
      17             :  * You should have received a copy of the GNU Lesser General Public
      18             :  * License along with FFmpeg; if not, write to the Free Software
      19             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      20             :  */
      21             : 
      22             : #include "libavutil/attributes.h"
      23             : #include "libavutil/cpu.h"
      24             : #include "libavutil/x86/asm.h"
      25             : #include "libavutil/x86/cpu.h"
      26             : #include "libavcodec/avcodec.h"
      27             : #include "libavcodec/mpegvideo.h"
      28             : #include "libavcodec/mpegvideodata.h"
      29             : 
      30             : #if HAVE_MMX_INLINE
      31             : 
      32      204660 : static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
      33             :                                   int16_t *block, int n, int qscale)
      34             : {
      35             :     x86_reg level, qmul, qadd, nCoeffs;
      36             : 
      37      204660 :     qmul = qscale << 1;
      38             : 
      39             :     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
      40             : 
      41      204660 :     if (!s->h263_aic) {
      42      204660 :         if (n < 4)
      43      136440 :             level = block[0] * s->y_dc_scale;
      44             :         else
      45       68220 :             level = block[0] * s->c_dc_scale;
      46      204660 :         qadd = (qscale - 1) | 1;
      47             :     }else{
      48           0 :         qadd = 0;
      49           0 :         level= block[0];
      50             :     }
      51      204660 :     if(s->ac_pred)
      52           0 :         nCoeffs=63;
      53             :     else
      54      204660 :         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
      55             : 
      56      204660 : __asm__ volatile(
      57             :                 "movd %1, %%mm6                 \n\t" //qmul
      58             :                 "packssdw %%mm6, %%mm6          \n\t"
      59             :                 "packssdw %%mm6, %%mm6          \n\t"
      60             :                 "movd %2, %%mm5                 \n\t" //qadd
      61             :                 "pxor %%mm7, %%mm7              \n\t"
      62             :                 "packssdw %%mm5, %%mm5          \n\t"
      63             :                 "packssdw %%mm5, %%mm5          \n\t"
      64             :                 "psubw %%mm5, %%mm7             \n\t"
      65             :                 "pxor %%mm4, %%mm4              \n\t"
      66             :                 ".p2align 4                     \n\t"
      67             :                 "1:                             \n\t"
      68             :                 "movq (%0, %3), %%mm0           \n\t"
      69             :                 "movq 8(%0, %3), %%mm1          \n\t"
      70             : 
      71             :                 "pmullw %%mm6, %%mm0            \n\t"
      72             :                 "pmullw %%mm6, %%mm1            \n\t"
      73             : 
      74             :                 "movq (%0, %3), %%mm2           \n\t"
      75             :                 "movq 8(%0, %3), %%mm3          \n\t"
      76             : 
      77             :                 "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
      78             :                 "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
      79             : 
      80             :                 "pxor %%mm2, %%mm0              \n\t"
      81             :                 "pxor %%mm3, %%mm1              \n\t"
      82             : 
      83             :                 "paddw %%mm7, %%mm0             \n\t"
      84             :                 "paddw %%mm7, %%mm1             \n\t"
      85             : 
      86             :                 "pxor %%mm0, %%mm2              \n\t"
      87             :                 "pxor %%mm1, %%mm3              \n\t"
      88             : 
      89             :                 "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
      90             :                 "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
      91             : 
      92             :                 "pandn %%mm2, %%mm0             \n\t"
      93             :                 "pandn %%mm3, %%mm1             \n\t"
      94             : 
      95             :                 "movq %%mm0, (%0, %3)           \n\t"
      96             :                 "movq %%mm1, 8(%0, %3)          \n\t"
      97             : 
      98             :                 "add $16, %3                    \n\t"
      99             :                 "jng 1b                         \n\t"
     100      204660 :                 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
     101             :                 : "memory"
     102             :         );
     103      204660 :         block[0]= level;
     104      204660 : }
     105             : 
     106             : 
     107      168703 : static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
     108             :                                   int16_t *block, int n, int qscale)
     109             : {
     110             :     x86_reg qmul, qadd, nCoeffs;
     111             : 
     112      168703 :     qmul = qscale << 1;
     113      168703 :     qadd = (qscale - 1) | 1;
     114             : 
     115             :     av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
     116             : 
     117      168703 :     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
     118             : 
     119      168703 : __asm__ volatile(
     120             :                 "movd %1, %%mm6                 \n\t" //qmul
     121             :                 "packssdw %%mm6, %%mm6          \n\t"
     122             :                 "packssdw %%mm6, %%mm6          \n\t"
     123             :                 "movd %2, %%mm5                 \n\t" //qadd
     124             :                 "pxor %%mm7, %%mm7              \n\t"
     125             :                 "packssdw %%mm5, %%mm5          \n\t"
     126             :                 "packssdw %%mm5, %%mm5          \n\t"
     127             :                 "psubw %%mm5, %%mm7             \n\t"
     128             :                 "pxor %%mm4, %%mm4              \n\t"
     129             :                 ".p2align 4                     \n\t"
     130             :                 "1:                             \n\t"
     131             :                 "movq (%0, %3), %%mm0           \n\t"
     132             :                 "movq 8(%0, %3), %%mm1          \n\t"
     133             : 
     134             :                 "pmullw %%mm6, %%mm0            \n\t"
     135             :                 "pmullw %%mm6, %%mm1            \n\t"
     136             : 
     137             :                 "movq (%0, %3), %%mm2           \n\t"
     138             :                 "movq 8(%0, %3), %%mm3          \n\t"
     139             : 
     140             :                 "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
     141             :                 "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
     142             : 
     143             :                 "pxor %%mm2, %%mm0              \n\t"
     144             :                 "pxor %%mm3, %%mm1              \n\t"
     145             : 
     146             :                 "paddw %%mm7, %%mm0             \n\t"
     147             :                 "paddw %%mm7, %%mm1             \n\t"
     148             : 
     149             :                 "pxor %%mm0, %%mm2              \n\t"
     150             :                 "pxor %%mm1, %%mm3              \n\t"
     151             : 
     152             :                 "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
     153             :                 "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
     154             : 
     155             :                 "pandn %%mm2, %%mm0             \n\t"
     156             :                 "pandn %%mm3, %%mm1             \n\t"
     157             : 
     158             :                 "movq %%mm0, (%0, %3)           \n\t"
     159             :                 "movq %%mm1, 8(%0, %3)          \n\t"
     160             : 
     161             :                 "add $16, %3                    \n\t"
     162             :                 "jng 1b                         \n\t"
     163      168703 :                 ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
     164             :                 : "memory"
     165             :         );
     166      168703 : }
     167             : 
     168           0 : static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
     169             :                                      int16_t *block, int n, int qscale)
     170             : {
     171             :     x86_reg nCoeffs;
     172             :     const uint16_t *quant_matrix;
     173             :     int block0;
     174             : 
     175             :     av_assert2(s->block_last_index[n]>=0);
     176             : 
     177           0 :     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
     178             : 
     179           0 :     if (n < 4)
     180           0 :         block0 = block[0] * s->y_dc_scale;
     181             :     else
     182           0 :         block0 = block[0] * s->c_dc_scale;
     183             :     /* XXX: only MPEG-1 */
     184           0 :     quant_matrix = s->intra_matrix;
     185           0 : __asm__ volatile(
     186             :                 "pcmpeqw %%mm7, %%mm7           \n\t"
     187             :                 "psrlw $15, %%mm7               \n\t"
     188             :                 "movd %2, %%mm6                 \n\t"
     189             :                 "packssdw %%mm6, %%mm6          \n\t"
     190             :                 "packssdw %%mm6, %%mm6          \n\t"
     191             :                 "mov %3, %%"FF_REG_a"           \n\t"
     192             :                 ".p2align 4                     \n\t"
     193             :                 "1:                             \n\t"
     194             :                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
     195             :                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
     196             :                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
     197             :                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
     198             :                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
     199             :                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
     200             :                 "pxor %%mm2, %%mm2              \n\t"
     201             :                 "pxor %%mm3, %%mm3              \n\t"
     202             :                 "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
     203             :                 "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
     204             :                 "pxor %%mm2, %%mm0              \n\t"
     205             :                 "pxor %%mm3, %%mm1              \n\t"
     206             :                 "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
     207             :                 "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
     208             :                 "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
     209             :                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
     210             :                 "pxor %%mm4, %%mm4              \n\t"
     211             :                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
     212             :                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
     213             :                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
     214             :                 "psraw $3, %%mm0                \n\t"
     215             :                 "psraw $3, %%mm1                \n\t"
     216             :                 "psubw %%mm7, %%mm0             \n\t"
     217             :                 "psubw %%mm7, %%mm1             \n\t"
     218             :                 "por %%mm7, %%mm0               \n\t"
     219             :                 "por %%mm7, %%mm1               \n\t"
     220             :                 "pxor %%mm2, %%mm0              \n\t"
     221             :                 "pxor %%mm3, %%mm1              \n\t"
     222             :                 "psubw %%mm2, %%mm0             \n\t"
     223             :                 "psubw %%mm3, %%mm1             \n\t"
     224             :                 "pandn %%mm0, %%mm4             \n\t"
     225             :                 "pandn %%mm1, %%mm5             \n\t"
     226             :                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
     227             :                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
     228             : 
     229             :                 "add $16, %%"FF_REG_a"          \n\t"
     230             :                 "js 1b                          \n\t"
     231           0 :                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
     232             :                 : "%"FF_REG_a, "memory"
     233             :         );
     234           0 :     block[0]= block0;
     235           0 : }
     236             : 
     237           0 : static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
     238             :                                      int16_t *block, int n, int qscale)
     239             : {
     240             :     x86_reg nCoeffs;
     241             :     const uint16_t *quant_matrix;
     242             : 
     243             :     av_assert2(s->block_last_index[n]>=0);
     244             : 
     245           0 :     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
     246             : 
     247           0 :         quant_matrix = s->inter_matrix;
     248           0 : __asm__ volatile(
     249             :                 "pcmpeqw %%mm7, %%mm7           \n\t"
     250             :                 "psrlw $15, %%mm7               \n\t"
     251             :                 "movd %2, %%mm6                 \n\t"
     252             :                 "packssdw %%mm6, %%mm6          \n\t"
     253             :                 "packssdw %%mm6, %%mm6          \n\t"
     254             :                 "mov %3, %%"FF_REG_a"           \n\t"
     255             :                 ".p2align 4                     \n\t"
     256             :                 "1:                             \n\t"
     257             :                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
     258             :                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
     259             :                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
     260             :                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
     261             :                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
     262             :                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
     263             :                 "pxor %%mm2, %%mm2              \n\t"
     264             :                 "pxor %%mm3, %%mm3              \n\t"
     265             :                 "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
     266             :                 "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
     267             :                 "pxor %%mm2, %%mm0              \n\t"
     268             :                 "pxor %%mm3, %%mm1              \n\t"
     269             :                 "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
     270             :                 "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
     271             :                 "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
     272             :                 "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
     273             :                 "paddw %%mm7, %%mm0             \n\t" // abs(block[i])*2 + 1
     274             :                 "paddw %%mm7, %%mm1             \n\t" // abs(block[i])*2 + 1
     275             :                 "pmullw %%mm4, %%mm0            \n\t" // (abs(block[i])*2 + 1)*q
     276             :                 "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q
     277             :                 "pxor %%mm4, %%mm4              \n\t"
     278             :                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
     279             :                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
     280             :                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
     281             :                 "psraw $4, %%mm0                \n\t"
     282             :                 "psraw $4, %%mm1                \n\t"
     283             :                 "psubw %%mm7, %%mm0             \n\t"
     284             :                 "psubw %%mm7, %%mm1             \n\t"
     285             :                 "por %%mm7, %%mm0               \n\t"
     286             :                 "por %%mm7, %%mm1               \n\t"
     287             :                 "pxor %%mm2, %%mm0              \n\t"
     288             :                 "pxor %%mm3, %%mm1              \n\t"
     289             :                 "psubw %%mm2, %%mm0             \n\t"
     290             :                 "psubw %%mm3, %%mm1             \n\t"
     291             :                 "pandn %%mm0, %%mm4             \n\t"
     292             :                 "pandn %%mm1, %%mm5             \n\t"
     293             :                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
     294             :                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
     295             : 
     296             :                 "add $16, %%"FF_REG_a"          \n\t"
     297             :                 "js 1b                          \n\t"
     298           0 :                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
     299             :                 : "%"FF_REG_a, "memory"
     300             :         );
     301           0 : }
     302             : 
     303           0 : static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
     304             :                                      int16_t *block, int n, int qscale)
     305             : {
     306             :     x86_reg nCoeffs;
     307             :     const uint16_t *quant_matrix;
     308             :     int block0;
     309             : 
     310             :     av_assert2(s->block_last_index[n]>=0);
     311             : 
     312           0 :     if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
     313           0 :     else                 qscale <<= 1;
     314             : 
     315           0 :     if(s->alternate_scan) nCoeffs= 63; //FIXME
     316           0 :     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
     317             : 
     318           0 :     if (n < 4)
     319           0 :         block0 = block[0] * s->y_dc_scale;
     320             :     else
     321           0 :         block0 = block[0] * s->c_dc_scale;
     322           0 :     quant_matrix = s->intra_matrix;
     323           0 : __asm__ volatile(
     324             :                 "pcmpeqw %%mm7, %%mm7           \n\t"
     325             :                 "psrlw $15, %%mm7               \n\t"
     326             :                 "movd %2, %%mm6                 \n\t"
     327             :                 "packssdw %%mm6, %%mm6          \n\t"
     328             :                 "packssdw %%mm6, %%mm6          \n\t"
     329             :                 "mov %3, %%"FF_REG_a"           \n\t"
     330             :                 ".p2align 4                     \n\t"
     331             :                 "1:                             \n\t"
     332             :                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
     333             :                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
     334             :                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
     335             :                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
     336             :                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
     337             :                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
     338             :                 "pxor %%mm2, %%mm2              \n\t"
     339             :                 "pxor %%mm3, %%mm3              \n\t"
     340             :                 "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
     341             :                 "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
     342             :                 "pxor %%mm2, %%mm0              \n\t"
     343             :                 "pxor %%mm3, %%mm1              \n\t"
     344             :                 "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
     345             :                 "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
     346             :                 "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
     347             :                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
     348             :                 "pxor %%mm4, %%mm4              \n\t"
     349             :                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
     350             :                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
     351             :                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
     352             :                 "psraw $4, %%mm0                \n\t"
     353             :                 "psraw $4, %%mm1                \n\t"
     354             :                 "pxor %%mm2, %%mm0              \n\t"
     355             :                 "pxor %%mm3, %%mm1              \n\t"
     356             :                 "psubw %%mm2, %%mm0             \n\t"
     357             :                 "psubw %%mm3, %%mm1             \n\t"
     358             :                 "pandn %%mm0, %%mm4             \n\t"
     359             :                 "pandn %%mm1, %%mm5             \n\t"
     360             :                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
     361             :                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
     362             : 
     363             :                 "add $16, %%"FF_REG_a"          \n\t"
     364             :                 "jng 1b                         \n\t"
     365           0 :                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
     366             :                 : "%"FF_REG_a, "memory"
     367             :         );
     368           0 :     block[0]= block0;
     369             :         //Note, we do not do mismatch control for intra as errors cannot accumulate
     370           0 : }
     371             : 
     372        5941 : static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
     373             :                                      int16_t *block, int n, int qscale)
     374             : {
     375             :     x86_reg nCoeffs;
     376             :     const uint16_t *quant_matrix;
     377             : 
     378             :     av_assert2(s->block_last_index[n]>=0);
     379             : 
     380        5941 :     if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
     381        5941 :     else                 qscale <<= 1;
     382             : 
     383        5941 :     if(s->alternate_scan) nCoeffs= 63; //FIXME
     384        5941 :     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
     385             : 
     386        5941 :         quant_matrix = s->inter_matrix;
     387        5941 : __asm__ volatile(
     388             :                 "pcmpeqw %%mm7, %%mm7           \n\t"
     389             :                 "psrlq $48, %%mm7               \n\t"
     390             :                 "movd %2, %%mm6                 \n\t"
     391             :                 "packssdw %%mm6, %%mm6          \n\t"
     392             :                 "packssdw %%mm6, %%mm6          \n\t"
     393             :                 "mov %3, %%"FF_REG_a"           \n\t"
     394             :                 ".p2align 4                     \n\t"
     395             :                 "1:                             \n\t"
     396             :                 "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
     397             :                 "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
     398             :                 "movq (%1, %%"FF_REG_a"), %%mm4 \n\t"
     399             :                 "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
     400             :                 "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
     401             :                 "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
     402             :                 "pxor %%mm2, %%mm2              \n\t"
     403             :                 "pxor %%mm3, %%mm3              \n\t"
     404             :                 "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
     405             :                 "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
     406             :                 "pxor %%mm2, %%mm0              \n\t"
     407             :                 "pxor %%mm3, %%mm1              \n\t"
     408             :                 "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
     409             :                 "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
     410             :                 "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
     411             :                 "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
     412             :                 "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*2*q
     413             :                 "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*2*q
     414             :                 "paddw %%mm4, %%mm0             \n\t" // (abs(block[i])*2 + 1)*q
     415             :                 "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q
     416             :                 "pxor %%mm4, %%mm4              \n\t"
     417             :                 "pxor %%mm5, %%mm5              \n\t" // FIXME slow
     418             :                 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
     419             :                 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
     420             :                 "psrlw $5, %%mm0                \n\t"
     421             :                 "psrlw $5, %%mm1                \n\t"
     422             :                 "pxor %%mm2, %%mm0              \n\t"
     423             :                 "pxor %%mm3, %%mm1              \n\t"
     424             :                 "psubw %%mm2, %%mm0             \n\t"
     425             :                 "psubw %%mm3, %%mm1             \n\t"
     426             :                 "pandn %%mm0, %%mm4             \n\t"
     427             :                 "pandn %%mm1, %%mm5             \n\t"
     428             :                 "pxor %%mm4, %%mm7              \n\t"
     429             :                 "pxor %%mm5, %%mm7              \n\t"
     430             :                 "movq %%mm4, (%0, %%"FF_REG_a") \n\t"
     431             :                 "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
     432             : 
     433             :                 "add $16, %%"FF_REG_a"          \n\t"
     434             :                 "jng 1b                         \n\t"
     435             :                 "movd 124(%0, %3), %%mm0        \n\t"
     436             :                 "movq %%mm7, %%mm6              \n\t"
     437             :                 "psrlq $32, %%mm7               \n\t"
     438             :                 "pxor %%mm6, %%mm7              \n\t"
     439             :                 "movq %%mm7, %%mm6              \n\t"
     440             :                 "psrlq $16, %%mm7               \n\t"
     441             :                 "pxor %%mm6, %%mm7              \n\t"
     442             :                 "pslld $31, %%mm7               \n\t"
     443             :                 "psrlq $15, %%mm7               \n\t"
     444             :                 "pxor %%mm7, %%mm0              \n\t"
     445             :                 "movd %%mm0, 124(%0, %3)        \n\t"
     446             : 
     447        5941 :                 ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
     448             :                 : "%"FF_REG_a, "memory"
     449             :         );
     450        5941 : }
     451             : 
     452             : #endif /* HAVE_MMX_INLINE */
     453             : 
     454         737 : av_cold void ff_mpv_common_init_x86(MpegEncContext *s)
     455             : {
     456             : #if HAVE_MMX_INLINE
     457         737 :     int cpu_flags = av_get_cpu_flags();
     458             : 
     459         737 :     if (INLINE_MMX(cpu_flags)) {
     460         119 :         s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
     461         119 :         s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
     462         119 :         s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
     463         119 :         s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
     464         119 :         if (!(s->avctx->flags & AV_CODEC_FLAG_BITEXACT))
     465         116 :             s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
     466         119 :         s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
     467             :     }
     468             : #endif /* HAVE_MMX_INLINE */
     469         737 : }

Generated by: LCOV version 1.13