FFmpeg coverage

Directory:	../../../ffmpeg/
File:	src/libavcodec/x86/mpegvideo.c
Date:	2026-04-26 21:21:04

	Exec	Total	Coverage
Lines:	54	68	79.4%
Functions:	6	7	85.7%
Branches:	14	20	70.0%

  
      Line
      Branch
      Exec
      Source
    
      /*
    
       * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
    
       * H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
    
       *
    
       * This file is part of FFmpeg.
    
       *
    
       * FFmpeg is free software; you can redistribute it and/or
    
       * modify it under the terms of the GNU Lesser General Public
    
       * License as published by the Free Software Foundation; either
    
       * version 2.1 of the License, or (at your option) any later version.
    
       *
    
       * FFmpeg is distributed in the hope that it will be useful,
    
       * but WITHOUT ANY WARRANTY; without even the implied warranty of
    
       * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    
       * Lesser General Public License for more details.
    
       *
    
       * You should have received a copy of the GNU Lesser General Public
    
       * License along with FFmpeg; if not, write to the Free Software
    
       * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
    
       */
    
      #include "libavutil/attributes.h"
    
      #include "libavutil/avassert.h"
    
      #include "libavutil/cpu.h"
    
      #include "libavutil/x86/asm.h"
    
      #include "libavutil/x86/cpu.h"
    
      #include "libavcodec/mpegvideo.h"
    
      #include "libavcodec/mpegvideodata.h"
    
      #include "libavcodec/mpegvideo_unquantize.h"
    
      #if HAVE_SSE2_INLINE
    
      #define SPLATW(reg) "punpcklwd    %%" #reg ", %%" #reg "\n\t" \
    
                          "pshufd   $0, %%" #reg ", %%" #reg "\n\t"
    
      #if HAVE_SSSE3_INLINE
    
      207979
      static void dct_unquantize_h263_intra_ssse3(const MPVContext *s,
    
                                                  int16_t *block, int n, int qscale)
    
      {
    
      207979
          x86_reg qmul = (unsigned)qscale << 1;
    
          int level, qadd;
    
          av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
    
        2/2✓ Branch 0 taken 207978 times.
✓ Branch 1 taken 1 times.

      207979
          if (!s->h263_aic) {
    
        2/2✓ Branch 0 taken 138652 times.
✓ Branch 1 taken 69326 times.

      207978
              if (n < 4)
    
      138652
                  level = block[0] * s->y_dc_scale;
    
              else
    
      69326
                  level = block[0] * s->c_dc_scale;
    
      207978
              qadd = (qscale - 1) | 1;
    
          }else{
    
      1
              qadd = 0;
    
      1
              level= block[0];
    
          }
    
        2/2✓ Branch 0 taken 207978 times.
✓ Branch 1 taken 1 times.

      207979
          x86_reg offset = s->ac_pred ? 63 << 1 : s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
    
      207979
      __asm__ volatile(
    
                      "movd          %k1, %%xmm0     \n\t" //qmul
    
                      "lea      (%2, %0), %1         \n\t"
    
                      "neg            %0             \n\t"
    
                      "movd           %3, %%xmm1     \n\t" //qadd
    
                      SPLATW(xmm0)
    
                      SPLATW(xmm1)
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%1, %0), %%xmm2     \n\t"
    
                      "movdqa 16(%1, %0), %%xmm3     \n\t"
    
                      "movdqa     %%xmm1, %%xmm4     \n\t"
    
                      "movdqa     %%xmm1, %%xmm5     \n\t"
    
                      "psignw     %%xmm2, %%xmm4     \n\t" // sgn(block[i])*qadd
    
                      "psignw     %%xmm3, %%xmm5     \n\t" // sgn(block[i])*qadd
    
                      "pmullw     %%xmm0, %%xmm2     \n\t"
    
                      "pmullw     %%xmm0, %%xmm3     \n\t"
    
                      "paddw      %%xmm4, %%xmm2     \n\t"
    
                      "paddw      %%xmm5, %%xmm3     \n\t"
    
                      "movdqa     %%xmm2, (%1, %0)   \n\t"
    
                      "movdqa     %%xmm3, 16(%1, %0) \n\t"
    
                      "add           $32, %0         \n\t"
    
                      "jng            1b             \n\t"
    
                      : "+r"(offset), "+r"(qmul)
    
                      : "r" (block), "rm" (qadd)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
    
              );
    
      207979
              block[0]= level;
    
      207979
      }
    
      69052
      static void dct_unquantize_h263_inter_ssse3(const MPVContext *s,
    
                                                  int16_t *block, int n, int qscale)
    
      {
    
      69052
          int qmul = qscale << 1;
    
      69052
          int qadd = (qscale - 1) | 1;
    
          av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
    
      69052
          x86_reg offset = s->inter_scantable.raster_end[s->block_last_index[n]] << 1;
    
      69052
      __asm__ volatile(
    
                      "movd           %2, %%xmm0     \n\t" //qmul
    
                      "movd           %3, %%xmm1     \n\t" //qadd
    
                      "add            %1, %0         \n\t"
    
                      "neg            %1             \n\t"
    
                      SPLATW(xmm0)
    
                      SPLATW(xmm1)
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%0, %1), %%xmm2     \n\t"
    
                      "movdqa 16(%0, %1), %%xmm3     \n\t"
    
                      "movdqa     %%xmm1, %%xmm4     \n\t"
    
                      "movdqa     %%xmm1, %%xmm5     \n\t"
    
                      "psignw     %%xmm2, %%xmm4     \n\t" // sgn(block[i])*qadd
    
                      "psignw     %%xmm3, %%xmm5     \n\t" // sgn(block[i])*qadd
    
                      "pmullw     %%xmm0, %%xmm2     \n\t"
    
                      "pmullw     %%xmm0, %%xmm3     \n\t"
    
                      "paddw      %%xmm4, %%xmm2     \n\t"
    
                      "paddw      %%xmm5, %%xmm3     \n\t"
    
                      "movdqa     %%xmm2, (%0, %1)   \n\t"
    
                      "movdqa     %%xmm3, 16(%0, %1) \n\t"
    
                      "add           $32, %1         \n\t"
    
                      "jng 1b                        \n\t"
    
                      : "+r" (block), "+r" (offset)
    
                      : "rm"(qmul), "rm" (qadd)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
    
              );
    
      69052
      }
    
      1
      static void dct_unquantize_mpeg1_intra_ssse3(const MPVContext *s,
    
                                                   int16_t *block, int n, int qscale)
    
      {
    
          x86_reg nCoeffs;
    
          const uint16_t *quant_matrix;
    
          int block0;
    
          av_assert2(s->block_last_index[n]>=0);
    
      1
          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
    
        1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.

      1
          if (n < 4)
    
      1
              block0 = block[0] * s->y_dc_scale;
    
          else
    
      ✗
              block0 = block[0] * s->c_dc_scale;
    
          /* XXX: only MPEG-1 */
    
      1
          quant_matrix = s->intra_matrix;
    
      1
          x86_reg offset = -2 * nCoeffs;
    
      1
      __asm__ volatile(
    
                      "movd           %3, %%xmm6     \n\t"
    
                      "pcmpeqw    %%xmm7, %%xmm7     \n\t"
    
                      "psrlw         $15, %%xmm7     \n\t"
    
                      SPLATW(xmm6)
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%2, %0), %%xmm4     \n\t"
    
                      "movdqa 16(%2, %0), %%xmm5     \n\t"
    
                      "movdqa   (%1, %0), %%xmm0     \n\t"
    
                      "movdqa 16(%1, %0), %%xmm1     \n\t"
    
                      "pmullw     %%xmm6, %%xmm4     \n\t" // q=qscale*quant_matrix[i]
    
                      "pmullw     %%xmm6, %%xmm5     \n\t" // q=qscale*quant_matrix[i]
    
                      "pabsw      %%xmm0, %%xmm2     \n\t" // abs(block[i])
    
                      "pabsw      %%xmm1, %%xmm3     \n\t" // abs(block[i])
    
                      "pmullw     %%xmm4, %%xmm2     \n\t" // abs(block[i])*q
    
                      "pmullw     %%xmm5, %%xmm3     \n\t" // abs(block[i])*q
    
                      "psraw          $3, %%xmm2     \n\t"
    
                      "psraw          $3, %%xmm3     \n\t"
    
                      "psubw      %%xmm7, %%xmm2     \n\t"
    
                      "psubw      %%xmm7, %%xmm3     \n\t"
    
                      "por        %%xmm7, %%xmm2     \n\t"
    
                      "por        %%xmm7, %%xmm3     \n\t"
    
                      "psignw     %%xmm0, %%xmm2     \n\t"
    
                      "psignw     %%xmm1, %%xmm3     \n\t"
    
                      "movdqa     %%xmm2, (%1, %0)   \n\t"
    
                      "movdqa     %%xmm3, 16(%1, %0) \n\t"
    
                      "add           $32, %0         \n\t"
    
                      "js 1b                         \n\t"
    
                      : "+r" (offset)
    
      1
                      : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
    
                        "memory"
    
              );
    
      1
          block[0]= block0;
    
      1
      }
    
      1
      static void dct_unquantize_mpeg1_inter_ssse3(const MPVContext *s,
    
                                                   int16_t *block, int n, int qscale)
    
      {
    
          x86_reg nCoeffs;
    
          const uint16_t *quant_matrix;
    
          av_assert2(s->block_last_index[n]>=0);
    
      1
          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
    
      1
              quant_matrix = s->inter_matrix;
    
      1
          x86_reg offset = -2 * nCoeffs;
    
      1
      __asm__ volatile(
    
                      "movd           %3, %%xmm6     \n\t"
    
                      "pcmpeqw    %%xmm7, %%xmm7     \n\t"
    
                      "psrlw         $15, %%xmm7     \n\t"
    
                      SPLATW(xmm6)
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%2, %0), %%xmm4     \n\t"
    
                      "movdqa 16(%2, %0), %%xmm5     \n\t"
    
                      "movdqa   (%1, %0), %%xmm0     \n\t"
    
                      "movdqa 16(%1, %0), %%xmm1     \n\t"
    
                      "pmullw     %%xmm6, %%xmm4     \n\t" // q=qscale*quant_matrix[i]
    
                      "pmullw     %%xmm6, %%xmm5     \n\t" // q=qscale*quant_matrix[i]
    
                      "pabsw      %%xmm0, %%xmm2     \n\t" // abs(block[i])
    
                      "pabsw      %%xmm1, %%xmm3     \n\t" // abs(block[i])
    
                      "paddw      %%xmm2, %%xmm2     \n\t" // abs(block[i])*2
    
                      "paddw      %%xmm3, %%xmm3     \n\t" // abs(block[i])*2
    
                      "paddw      %%xmm7, %%xmm2     \n\t" // abs(block[i])*2 + 1
    
                      "paddw      %%xmm7, %%xmm3     \n\t" // abs(block[i])*2 + 1
    
                      "pmullw     %%xmm4, %%xmm2     \n\t" // (abs(block[i])*2 + 1)*q
    
                      "pmullw     %%xmm5, %%xmm3     \n\t" // (abs(block[i])*2 + 1)*q
    
                      "psraw          $4, %%xmm2     \n\t"
    
                      "psraw          $4, %%xmm3     \n\t"
    
                      "psubw      %%xmm7, %%xmm2     \n\t"
    
                      "psubw      %%xmm7, %%xmm3     \n\t"
    
                      "por        %%xmm7, %%xmm2     \n\t"
    
                      "por        %%xmm7, %%xmm3     \n\t"
    
                      "psignw     %%xmm0, %%xmm2     \n\t"
    
                      "psignw     %%xmm1, %%xmm3     \n\t"
    
                      "movdqa     %%xmm2, (%1, %0)   \n\t"
    
                      "movdqa     %%xmm3, 16(%1, %0) \n\t"
    
                      "add           $32, %0         \n\t"
    
                      "js 1b                         \n\t"
    
                      : "+r" (offset)
    
      1
                      : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
    
                        "memory"
    
              );
    
      1
      }
    
      #endif /* HAVE_SSSE3_INLINE */
    
      ✗
      static void dct_unquantize_mpeg2_intra_sse2(const MPVContext *s,
    
                                                  int16_t *block, int n, int qscale)
    
      {
    
          x86_reg nCoeffs;
    
          const uint16_t *quant_matrix;
    
          int block0;
    
          av_assert2(s->block_last_index[n]>=0);
    
      ✗
          if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
    
      ✗
          else                 qscale <<= 1;
    
      ✗
          nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
    
      ✗
          if (n < 4)
    
      ✗
              block0 = block[0] * s->y_dc_scale;
    
          else
    
      ✗
              block0 = block[0] * s->c_dc_scale;
    
      ✗
          quant_matrix = s->intra_matrix;
    
      ✗
          x86_reg offset = -2 * nCoeffs;
    
      ✗
      __asm__ volatile(
    
                      "movd           %3, %%xmm6     \n\t"
    
                      SPLATW(xmm6)
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%1, %0), %%xmm0     \n\t"
    
                      "movdqa 16(%1, %0), %%xmm1     \n\t"
    
                      "movdqa   (%2, %0), %%xmm4     \n\t"
    
                      "movdqa 16(%2, %0), %%xmm5     \n\t"
    
                      "pmullw     %%xmm6, %%xmm4     \n\t" // q=qscale*quant_matrix[i]
    
                      "pmullw     %%xmm6, %%xmm5     \n\t" // q=qscale*quant_matrix[i]
    
                      "movdqa     %%xmm0, %%xmm2     \n\t"
    
                      "movdqa     %%xmm1, %%xmm3     \n\t"
    
                      "psrlw         $12, %%xmm2     \n\t" // block[i] < 0 ? 0xf : 0
    
                      "psrlw         $12, %%xmm3     \n\t" // (block[i] is in the -2048..2047 range)
    
                      "pmullw     %%xmm4, %%xmm0     \n\t" // block[i]*q
    
                      "pmullw     %%xmm5, %%xmm1     \n\t" // block[i]*q
    
                      "paddw      %%xmm2, %%xmm0     \n\t" // bias negative block[i]
    
                      "paddw      %%xmm3, %%xmm1     \n\t" // so that a right-shift
    
                      "psraw          $4, %%xmm0     \n\t" // is equivalent to divide
    
                      "psraw          $4, %%xmm1     \n\t" // with rounding towards zero
    
                      "movdqa     %%xmm0, (%1, %0)   \n\t"
    
                      "movdqa     %%xmm1, 16(%1, %0) \n\t"
    
                      "add           $32, %0         \n\t"
    
                      "jng 1b                        \n\t"
    
                      : "+r" (offset)
    
      ✗
                      : "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6",)
    
                        "memory"
    
              );
    
      ✗
          block[0]= block0;
    
              //Note, we do not do mismatch control for intra as errors cannot accumulate
    
      ✗
      }
    
      #if HAVE_SSSE3_INLINE
    
      5942
      static void dct_unquantize_mpeg2_inter_ssse3(const MPVContext *s,
    
                                                   int16_t *block, int n, int qscale)
    
      {
    
          av_assert2(s->block_last_index[n]>=0);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5942 times.

      5942
          x86_reg qscale2 = s->q_scale_type ? ff_mpeg2_non_linear_qscale[qscale] : (unsigned)qscale << 1;
    
      5942
          x86_reg offset  = s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
    
      5942
          const void *quant_matrix = (const char*)s->inter_matrix + offset;
    
      5942
      __asm__ volatile(
    
                      "movd          %k1, %%xmm6     \n\t"
    
                      "lea      (%2, %0), %1         \n\t"
    
                      "neg            %0             \n\t"
    
                      SPLATW(xmm6)
    
                      "pcmpeqw    %%xmm7, %%xmm7     \n\t"
    
                      "psrldq        $14, %%xmm7     \n\t"
    
                      ".p2align 4                    \n\t"
    
                      "1:                            \n\t"
    
                      "movdqa   (%3, %0), %%xmm4     \n\t"
    
                      "movdqa 16(%3, %0), %%xmm5     \n\t"
    
                      "movdqa   (%1, %0), %%xmm0     \n\t"
    
                      "movdqa 16(%1, %0), %%xmm1     \n\t"
    
                      "pmullw     %%xmm6, %%xmm4     \n\t" // q=qscale*quant_matrix[i]
    
                      "pmullw     %%xmm6, %%xmm5     \n\t" // q=qscale*quant_matrix[i]
    
                      "pabsw      %%xmm0, %%xmm2     \n\t" // abs(block[i])
    
                      "pabsw      %%xmm1, %%xmm3     \n\t" // abs(block[i])
    
                      "paddw      %%xmm2, %%xmm2     \n\t" // abs(block[i])*2
    
                      "paddw      %%xmm3, %%xmm3     \n\t" // abs(block[i])*2
    
                      "pmullw     %%xmm4, %%xmm2     \n\t" // abs(block[i])*2*q
    
                      "pmullw     %%xmm5, %%xmm3     \n\t" // abs(block[i])*2*q
    
                      "paddw      %%xmm4, %%xmm2     \n\t" // (abs(block[i])*2 + 1)*q
    
                      "paddw      %%xmm5, %%xmm3     \n\t" // (abs(block[i])*2 + 1)*q
    
                      "psrlw          $5, %%xmm2     \n\t"
    
                      "psrlw          $5, %%xmm3     \n\t"
    
                      "psignw     %%xmm0, %%xmm2     \n\t"
    
                      "psignw     %%xmm1, %%xmm3     \n\t"
    
                      "movdqa     %%xmm2, (%1, %0)   \n\t"
    
                      "movdqa     %%xmm3, 16(%1, %0) \n\t"
    
                      "pxor       %%xmm2, %%xmm7     \n\t"
    
                      "pxor       %%xmm3, %%xmm7     \n\t"
    
                      "add           $32, %0         \n\t"
    
                      "jng 1b                        \n\t"
    
                      "movd      124(%2), %%xmm0     \n\t"
    
                      "movhlps    %%xmm7, %%xmm6     \n\t"
    
                      "pxor       %%xmm6, %%xmm7     \n\t"
    
                      "pshufd $1, %%xmm7, %%xmm6     \n\t"
    
                      "pxor       %%xmm6, %%xmm7     \n\t"
    
                      "pshuflw $1, %%xmm7, %%xmm6    \n\t"
    
                      "pxor       %%xmm6, %%xmm7     \n\t"
    
                      "pslld         $31, %%xmm7     \n\t"
    
                      "psrld         $15, %%xmm7     \n\t"
    
                      "pxor       %%xmm7, %%xmm0     \n\t"
    
                      "movd       %%xmm0, 124(%2)    \n\t"
    
                      : "+r"(offset), "+r" (qscale2)
    
                      : "r" (block), "r"(quant_matrix)
    
                      : XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
    
                        "memory"
    
              );
    
      5942
      }
    
      #endif /* HAVE_SSSE3_INLINE */
    
      #endif /* HAVE_SSE2_INLINE */
    
      708
      av_cold void ff_mpv_unquantize_init_x86(MPVUnquantDSPContext *s, int bitexact)
    
      {
    
      #if HAVE_SSE2_INLINE
    
      708
          int cpu_flags = av_get_cpu_flags();
    
        2/2✓ Branch 0 taken 105 times.
✓ Branch 1 taken 603 times.

      708
          if (INLINE_SSE2(cpu_flags)) {
    
        2/2✓ Branch 0 taken 89 times.
✓ Branch 1 taken 16 times.

      105
              if (!bitexact)
    
      89
                  s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_sse2;
    
          }
    
      #if HAVE_SSSE3_INLINE
    
        2/2✓ Branch 0 taken 103 times.
✓ Branch 1 taken 605 times.

      708
          if (INLINE_SSSE3(cpu_flags)) {
    
      103
              s->dct_unquantize_h263_intra  = dct_unquantize_h263_intra_ssse3;
    
      103
              s->dct_unquantize_h263_inter  = dct_unquantize_h263_inter_ssse3;
    
      103
              s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_ssse3;
    
      103
              s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_ssse3;
    
      103
              s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_ssse3;
    
          }
    
      #endif /* HAVE_SSSE3_INLINE */
    
      #endif /* HAVE_SSE2_INLINE */
    
      708
      }

Line	Branch	Exec	Source
1			/*
2			* Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
3			* H.263, MPEG-1, MPEG-2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
4			*
5			* This file is part of FFmpeg.
6			*
7			* FFmpeg is free software; you can redistribute it and/or
8			* modify it under the terms of the GNU Lesser General Public
9			* License as published by the Free Software Foundation; either
10			* version 2.1 of the License, or (at your option) any later version.
11			*
12			* FFmpeg is distributed in the hope that it will be useful,
13			* but WITHOUT ANY WARRANTY; without even the implied warranty of
14			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15			* Lesser General Public License for more details.
16			*
17			* You should have received a copy of the GNU Lesser General Public
18			* License along with FFmpeg; if not, write to the Free Software
19			* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20			*/
21
22			#include "libavutil/attributes.h"
23			#include "libavutil/avassert.h"
24			#include "libavutil/cpu.h"
25			#include "libavutil/x86/asm.h"
26			#include "libavutil/x86/cpu.h"
27			#include "libavcodec/mpegvideo.h"
28			#include "libavcodec/mpegvideodata.h"
29			#include "libavcodec/mpegvideo_unquantize.h"
30
31			#if HAVE_SSE2_INLINE
32
33			#define SPLATW(reg) "punpcklwd %%" #reg ", %%" #reg "\n\t" \
34			"pshufd $0, %%" #reg ", %%" #reg "\n\t"
35
36			#if HAVE_SSSE3_INLINE
37
38		207979	static void dct_unquantize_h263_intra_ssse3(const MPVContext *s,
39			int16_t *block, int n, int qscale)
40			{
41		207979	x86_reg qmul = (unsigned)qscale << 1;
42			int level, qadd;
43
44			av_assert2(s->block_last_index[n]>=0 \|\| s->h263_aic);
45
46	2/2 ✓ Branch 0 taken 207978 times. ✓ Branch 1 taken 1 times.	207979	if (!s->h263_aic) {
47	2/2 ✓ Branch 0 taken 138652 times. ✓ Branch 1 taken 69326 times.	207978	if (n < 4)
48		138652	level = block[0] * s->y_dc_scale;
49			else
50		69326	level = block[0] * s->c_dc_scale;
51		207978	qadd = (qscale - 1) \| 1;
52			}else{
53		1	qadd = 0;
54		1	level= block[0];
55			}
56	2/2 ✓ Branch 0 taken 207978 times. ✓ Branch 1 taken 1 times.	207979	x86_reg offset = s->ac_pred ? 63 << 1 : s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
57
58		207979	__asm__ volatile(
59			"movd %k1, %%xmm0 \n\t" //qmul
60			"lea (%2, %0), %1 \n\t"
61			"neg %0 \n\t"
62			"movd %3, %%xmm1 \n\t" //qadd
63			SPLATW(xmm0)
64			SPLATW(xmm1)
65
66			".p2align 4 \n\t"
67			"1: \n\t"
68			"movdqa (%1, %0), %%xmm2 \n\t"
69			"movdqa 16(%1, %0), %%xmm3 \n\t"
70
71			"movdqa %%xmm1, %%xmm4 \n\t"
72			"movdqa %%xmm1, %%xmm5 \n\t"
73
74			"psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd
75			"psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd
76
77			"pmullw %%xmm0, %%xmm2 \n\t"
78			"pmullw %%xmm0, %%xmm3 \n\t"
79
80			"paddw %%xmm4, %%xmm2 \n\t"
81			"paddw %%xmm5, %%xmm3 \n\t"
82
83			"movdqa %%xmm2, (%1, %0) \n\t"
84			"movdqa %%xmm3, 16(%1, %0) \n\t"
85
86			"add $32, %0 \n\t"
87			"jng 1b \n\t"
88			: "+r"(offset), "+r"(qmul)
89			: "r" (block), "rm" (qadd)
90			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
91			);
92		207979	block[0]= level;
93		207979	}
94
95
96		69052	static void dct_unquantize_h263_inter_ssse3(const MPVContext *s,
97			int16_t *block, int n, int qscale)
98			{
99		69052	int qmul = qscale << 1;
100		69052	int qadd = (qscale - 1) \| 1;
101
102			av_assert2(s->block_last_index[n]>=0 \|\| s->h263_aic);
103
104		69052	x86_reg offset = s->inter_scantable.raster_end[s->block_last_index[n]] << 1;
105
106		69052	__asm__ volatile(
107			"movd %2, %%xmm0 \n\t" //qmul
108			"movd %3, %%xmm1 \n\t" //qadd
109			"add %1, %0 \n\t"
110			"neg %1 \n\t"
111			SPLATW(xmm0)
112			SPLATW(xmm1)
113
114			".p2align 4 \n\t"
115			"1: \n\t"
116			"movdqa (%0, %1), %%xmm2 \n\t"
117			"movdqa 16(%0, %1), %%xmm3 \n\t"
118
119			"movdqa %%xmm1, %%xmm4 \n\t"
120			"movdqa %%xmm1, %%xmm5 \n\t"
121
122			"psignw %%xmm2, %%xmm4 \n\t" // sgn(block[i])*qadd
123			"psignw %%xmm3, %%xmm5 \n\t" // sgn(block[i])*qadd
124
125			"pmullw %%xmm0, %%xmm2 \n\t"
126			"pmullw %%xmm0, %%xmm3 \n\t"
127
128			"paddw %%xmm4, %%xmm2 \n\t"
129			"paddw %%xmm5, %%xmm3 \n\t"
130
131			"movdqa %%xmm2, (%0, %1) \n\t"
132			"movdqa %%xmm3, 16(%0, %1) \n\t"
133
134			"add $32, %1 \n\t"
135			"jng 1b \n\t"
136			: "+r" (block), "+r" (offset)
137			: "rm"(qmul), "rm" (qadd)
138			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5",) "memory"
139			);
140		69052	}
141
142		1	static void dct_unquantize_mpeg1_intra_ssse3(const MPVContext *s,
143			int16_t *block, int n, int qscale)
144			{
145			x86_reg nCoeffs;
146			const uint16_t *quant_matrix;
147			int block0;
148
149			av_assert2(s->block_last_index[n]>=0);
150
151		1	nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
152
153	1/2 ✓ Branch 0 taken 1 times. ✗ Branch 1 not taken.	1	if (n < 4)
154		1	block0 = block[0] * s->y_dc_scale;
155			else
156		✗	block0 = block[0] * s->c_dc_scale;
157			/* XXX: only MPEG-1 */
158		1	quant_matrix = s->intra_matrix;
159		1	x86_reg offset = -2 * nCoeffs;
160		1	__asm__ volatile(
161			"movd %3, %%xmm6 \n\t"
162			"pcmpeqw %%xmm7, %%xmm7 \n\t"
163			"psrlw $15, %%xmm7 \n\t"
164			SPLATW(xmm6)
165			".p2align 4 \n\t"
166			"1: \n\t"
167			"movdqa (%2, %0), %%xmm4 \n\t"
168			"movdqa 16(%2, %0), %%xmm5 \n\t"
169			"movdqa (%1, %0), %%xmm0 \n\t"
170			"movdqa 16(%1, %0), %%xmm1 \n\t"
171			"pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
172			"pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
173			"pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
174			"pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
175			"pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])*q
176			"pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])*q
177			"psraw $3, %%xmm2 \n\t"
178			"psraw $3, %%xmm3 \n\t"
179			"psubw %%xmm7, %%xmm2 \n\t"
180			"psubw %%xmm7, %%xmm3 \n\t"
181			"por %%xmm7, %%xmm2 \n\t"
182			"por %%xmm7, %%xmm3 \n\t"
183			"psignw %%xmm0, %%xmm2 \n\t"
184			"psignw %%xmm1, %%xmm3 \n\t"
185			"movdqa %%xmm2, (%1, %0) \n\t"
186			"movdqa %%xmm3, 16(%1, %0) \n\t"
187
188			"add $32, %0 \n\t"
189			"js 1b \n\t"
190			: "+r" (offset)
191		1	: "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
192			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
193			"memory"
194			);
195		1	block[0]= block0;
196		1	}
197
198		1	static void dct_unquantize_mpeg1_inter_ssse3(const MPVContext *s,
199			int16_t *block, int n, int qscale)
200			{
201			x86_reg nCoeffs;
202			const uint16_t *quant_matrix;
203
204			av_assert2(s->block_last_index[n]>=0);
205
206		1	nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
207
208		1	quant_matrix = s->inter_matrix;
209		1	x86_reg offset = -2 * nCoeffs;
210		1	__asm__ volatile(
211			"movd %3, %%xmm6 \n\t"
212			"pcmpeqw %%xmm7, %%xmm7 \n\t"
213			"psrlw $15, %%xmm7 \n\t"
214			SPLATW(xmm6)
215			".p2align 4 \n\t"
216			"1: \n\t"
217			"movdqa (%2, %0), %%xmm4 \n\t"
218			"movdqa 16(%2, %0), %%xmm5 \n\t"
219			"movdqa (%1, %0), %%xmm0 \n\t"
220			"movdqa 16(%1, %0), %%xmm1 \n\t"
221			"pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
222			"pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
223			"pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
224			"pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
225			"paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2
226			"paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2
227			"paddw %%xmm7, %%xmm2 \n\t" // abs(block[i])*2 + 1
228			"paddw %%xmm7, %%xmm3 \n\t" // abs(block[i])*2 + 1
229			"pmullw %%xmm4, %%xmm2 \n\t" // (abs(block[i])2 + 1)q
230			"pmullw %%xmm5, %%xmm3 \n\t" // (abs(block[i])2 + 1)q
231			"psraw $4, %%xmm2 \n\t"
232			"psraw $4, %%xmm3 \n\t"
233			"psubw %%xmm7, %%xmm2 \n\t"
234			"psubw %%xmm7, %%xmm3 \n\t"
235			"por %%xmm7, %%xmm2 \n\t"
236			"por %%xmm7, %%xmm3 \n\t"
237			"psignw %%xmm0, %%xmm2 \n\t"
238			"psignw %%xmm1, %%xmm3 \n\t"
239			"movdqa %%xmm2, (%1, %0) \n\t"
240			"movdqa %%xmm3, 16(%1, %0) \n\t"
241
242			"add $32, %0 \n\t"
243			"js 1b \n\t"
244			: "+r" (offset)
245		1	: "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
246			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
247			"memory"
248			);
249		1	}
250
251			#endif /* HAVE_SSSE3_INLINE */
252
253		✗	static void dct_unquantize_mpeg2_intra_sse2(const MPVContext *s,
254			int16_t *block, int n, int qscale)
255			{
256			x86_reg nCoeffs;
257			const uint16_t *quant_matrix;
258			int block0;
259
260			av_assert2(s->block_last_index[n]>=0);
261
262		✗	if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
263		✗	else qscale <<= 1;
264
265		✗	nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
266
267		✗	if (n < 4)
268		✗	block0 = block[0] * s->y_dc_scale;
269			else
270		✗	block0 = block[0] * s->c_dc_scale;
271		✗	quant_matrix = s->intra_matrix;
272		✗	x86_reg offset = -2 * nCoeffs;
273		✗	__asm__ volatile(
274			"movd %3, %%xmm6 \n\t"
275			SPLATW(xmm6)
276			".p2align 4 \n\t"
277			"1: \n\t"
278			"movdqa (%1, %0), %%xmm0 \n\t"
279			"movdqa 16(%1, %0), %%xmm1 \n\t"
280			"movdqa (%2, %0), %%xmm4 \n\t"
281			"movdqa 16(%2, %0), %%xmm5 \n\t"
282			"pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
283			"pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
284			"movdqa %%xmm0, %%xmm2 \n\t"
285			"movdqa %%xmm1, %%xmm3 \n\t"
286			"psrlw $12, %%xmm2 \n\t" // block[i] < 0 ? 0xf : 0
287			"psrlw $12, %%xmm3 \n\t" // (block[i] is in the -2048..2047 range)
288			"pmullw %%xmm4, %%xmm0 \n\t" // block[i]*q
289			"pmullw %%xmm5, %%xmm1 \n\t" // block[i]*q
290			"paddw %%xmm2, %%xmm0 \n\t" // bias negative block[i]
291			"paddw %%xmm3, %%xmm1 \n\t" // so that a right-shift
292			"psraw $4, %%xmm0 \n\t" // is equivalent to divide
293			"psraw $4, %%xmm1 \n\t" // with rounding towards zero
294			"movdqa %%xmm0, (%1, %0) \n\t"
295			"movdqa %%xmm1, 16(%1, %0) \n\t"
296
297			"add $32, %0 \n\t"
298			"jng 1b \n\t"
299			: "+r" (offset)
300		✗	: "r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale)
301			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6",)
302			"memory"
303			);
304		✗	block[0]= block0;
305			//Note, we do not do mismatch control for intra as errors cannot accumulate
306		✗	}
307
308			#if HAVE_SSSE3_INLINE
309
310		5942	static void dct_unquantize_mpeg2_inter_ssse3(const MPVContext *s,
311			int16_t *block, int n, int qscale)
312			{
313			av_assert2(s->block_last_index[n]>=0);
314
315	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5942 times.	5942	x86_reg qscale2 = s->q_scale_type ? ff_mpeg2_non_linear_qscale[qscale] : (unsigned)qscale << 1;
316		5942	x86_reg offset = s->intra_scantable.raster_end[s->block_last_index[n]] << 1;
317		5942	const void quant_matrix = (const char)s->inter_matrix + offset;
318
319
320		5942	__asm__ volatile(
321			"movd %k1, %%xmm6 \n\t"
322			"lea (%2, %0), %1 \n\t"
323			"neg %0 \n\t"
324			SPLATW(xmm6)
325			"pcmpeqw %%xmm7, %%xmm7 \n\t"
326			"psrldq $14, %%xmm7 \n\t"
327			".p2align 4 \n\t"
328			"1: \n\t"
329			"movdqa (%3, %0), %%xmm4 \n\t"
330			"movdqa 16(%3, %0), %%xmm5 \n\t"
331			"movdqa (%1, %0), %%xmm0 \n\t"
332			"movdqa 16(%1, %0), %%xmm1 \n\t"
333			"pmullw %%xmm6, %%xmm4 \n\t" // q=qscale*quant_matrix[i]
334			"pmullw %%xmm6, %%xmm5 \n\t" // q=qscale*quant_matrix[i]
335			"pabsw %%xmm0, %%xmm2 \n\t" // abs(block[i])
336			"pabsw %%xmm1, %%xmm3 \n\t" // abs(block[i])
337			"paddw %%xmm2, %%xmm2 \n\t" // abs(block[i])*2
338			"paddw %%xmm3, %%xmm3 \n\t" // abs(block[i])*2
339			"pmullw %%xmm4, %%xmm2 \n\t" // abs(block[i])2q
340			"pmullw %%xmm5, %%xmm3 \n\t" // abs(block[i])2q
341			"paddw %%xmm4, %%xmm2 \n\t" // (abs(block[i])2 + 1)q
342			"paddw %%xmm5, %%xmm3 \n\t" // (abs(block[i])2 + 1)q
343			"psrlw $5, %%xmm2 \n\t"
344			"psrlw $5, %%xmm3 \n\t"
345			"psignw %%xmm0, %%xmm2 \n\t"
346			"psignw %%xmm1, %%xmm3 \n\t"
347			"movdqa %%xmm2, (%1, %0) \n\t"
348			"movdqa %%xmm3, 16(%1, %0) \n\t"
349			"pxor %%xmm2, %%xmm7 \n\t"
350			"pxor %%xmm3, %%xmm7 \n\t"
351
352			"add $32, %0 \n\t"
353			"jng 1b \n\t"
354			"movd 124(%2), %%xmm0 \n\t"
355			"movhlps %%xmm7, %%xmm6 \n\t"
356			"pxor %%xmm6, %%xmm7 \n\t"
357			"pshufd $1, %%xmm7, %%xmm6 \n\t"
358			"pxor %%xmm6, %%xmm7 \n\t"
359			"pshuflw $1, %%xmm7, %%xmm6 \n\t"
360			"pxor %%xmm6, %%xmm7 \n\t"
361			"pslld $31, %%xmm7 \n\t"
362			"psrld $15, %%xmm7 \n\t"
363			"pxor %%xmm7, %%xmm0 \n\t"
364			"movd %%xmm0, 124(%2) \n\t"
365
366			: "+r"(offset), "+r" (qscale2)
367			: "r" (block), "r"(quant_matrix)
368			: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",)
369			"memory"
370			);
371		5942	}
372
373			#endif /* HAVE_SSSE3_INLINE */
374			#endif /* HAVE_SSE2_INLINE */
375
376		708	av_cold void ff_mpv_unquantize_init_x86(MPVUnquantDSPContext *s, int bitexact)
377			{
378			#if HAVE_SSE2_INLINE
379		708	int cpu_flags = av_get_cpu_flags();
380
381	2/2 ✓ Branch 0 taken 105 times. ✓ Branch 1 taken 603 times.	708	if (INLINE_SSE2(cpu_flags)) {
382	2/2 ✓ Branch 0 taken 89 times. ✓ Branch 1 taken 16 times.	105	if (!bitexact)
383		89	s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_sse2;
384			}
385			#if HAVE_SSSE3_INLINE
386	2/2 ✓ Branch 0 taken 103 times. ✓ Branch 1 taken 605 times.	708	if (INLINE_SSSE3(cpu_flags)) {
387		103	s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_ssse3;
388		103	s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_ssse3;
389		103	s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_ssse3;
390		103	s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_ssse3;
391		103	s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_ssse3;
392			}
393			#endif /* HAVE_SSSE3_INLINE */
394			#endif /* HAVE_SSE2_INLINE */
395		708	}
396

Function (Line)	Call count	Block coverage
dct_unquantize_h263_inter_ssse3 (line 96)	called 69052 times, returned 69052 times	100.0%
dct_unquantize_h263_intra_ssse3 (line 38)	called 207979 times, returned 207979 times	100.0%
dct_unquantize_mpeg1_inter_ssse3 (line 198)	called 1 time, returned 1 time	100.0%
dct_unquantize_mpeg1_intra_ssse3 (line 142)	called 1 time, returned 1 time	80.0%
dct_unquantize_mpeg2_inter_ssse3 (line 310)	called 5942 times, returned 5942 times	80.0%
dct_unquantize_mpeg2_intra_sse2 (line 253)	not called	0.0%
ff_mpv_unquantize_init_x86 (line 376)	called 708 times, returned 708 times	100.0%