GCC Code Coverage Report
Directory: ../../../ffmpeg/ Exec Total Coverage
File: src/libavcodec/x86/rnd_template.c Lines: 4 8 50.0 %
Date: 2020-10-23 17:01:47 Branches: 0 0 - %

Line Branch Exec Source
1
/*
2
 * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7
 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
8
 * and improved by Zdenek Kabelac <kabi@users.sf.net>
9
 *
10
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or
13
 * modify it under the terms of the GNU Lesser General Public
14
 * License as published by the Free Software Foundation; either
15
 * version 2.1 of the License, or (at your option) any later version.
16
 *
17
 * FFmpeg is distributed in the hope that it will be useful,
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
 * Lesser General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU Lesser General Public
23
 * License along with FFmpeg; if not, write to the Free Software
24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25
 */
26
27
#include <stddef.h>
28
#include <stdint.h>
29
30
#include "inline_asm.h"
31
32
// put_pixels
33
116494
av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
34
                                  ptrdiff_t line_size, int h)
35
{
36
116494
    MOVQ_ZERO(mm7);
37
116494
    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
38
116494
    __asm__ volatile(
39
        "movq   (%1), %%mm0             \n\t"
40
        "movq   1(%1), %%mm4            \n\t"
41
        "movq   %%mm0, %%mm1            \n\t"
42
        "movq   %%mm4, %%mm5            \n\t"
43
        "punpcklbw %%mm7, %%mm0         \n\t"
44
        "punpcklbw %%mm7, %%mm4         \n\t"
45
        "punpckhbw %%mm7, %%mm1         \n\t"
46
        "punpckhbw %%mm7, %%mm5         \n\t"
47
        "paddusw %%mm0, %%mm4           \n\t"
48
        "paddusw %%mm1, %%mm5           \n\t"
49
        "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
50
        "add    %3, %1                  \n\t"
51
        ".p2align 3                     \n\t"
52
        "1:                             \n\t"
53
        "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
54
        "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
55
        "movq   %%mm0, %%mm1            \n\t"
56
        "movq   %%mm2, %%mm3            \n\t"
57
        "punpcklbw %%mm7, %%mm0         \n\t"
58
        "punpcklbw %%mm7, %%mm2         \n\t"
59
        "punpckhbw %%mm7, %%mm1         \n\t"
60
        "punpckhbw %%mm7, %%mm3         \n\t"
61
        "paddusw %%mm2, %%mm0           \n\t"
62
        "paddusw %%mm3, %%mm1           \n\t"
63
        "paddusw %%mm6, %%mm4           \n\t"
64
        "paddusw %%mm6, %%mm5           \n\t"
65
        "paddusw %%mm0, %%mm4           \n\t"
66
        "paddusw %%mm1, %%mm5           \n\t"
67
        "psrlw  $2, %%mm4               \n\t"
68
        "psrlw  $2, %%mm5               \n\t"
69
        "packuswb  %%mm5, %%mm4         \n\t"
70
        "movq   %%mm4, (%2, %%"FF_REG_a")  \n\t"
71
        "add    %3, %%"FF_REG_a"           \n\t"
72
73
        "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
74
        "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
75
        "movq   %%mm2, %%mm3            \n\t"
76
        "movq   %%mm4, %%mm5            \n\t"
77
        "punpcklbw %%mm7, %%mm2         \n\t"
78
        "punpcklbw %%mm7, %%mm4         \n\t"
79
        "punpckhbw %%mm7, %%mm3         \n\t"
80
        "punpckhbw %%mm7, %%mm5         \n\t"
81
        "paddusw %%mm2, %%mm4           \n\t"
82
        "paddusw %%mm3, %%mm5           \n\t"
83
        "paddusw %%mm6, %%mm0           \n\t"
84
        "paddusw %%mm6, %%mm1           \n\t"
85
        "paddusw %%mm4, %%mm0           \n\t"
86
        "paddusw %%mm5, %%mm1           \n\t"
87
        "psrlw  $2, %%mm0               \n\t"
88
        "psrlw  $2, %%mm1               \n\t"
89
        "packuswb  %%mm1, %%mm0         \n\t"
90
        "movq   %%mm0, (%2, %%"FF_REG_a")  \n\t"
91
        "add    %3, %%"FF_REG_a"        \n\t"
92
93
        "subl   $2, %0                  \n\t"
94
        "jnz    1b                      \n\t"
95
        :"+g"(h), "+S"(pixels)
96
        :"D"(block), "r"((x86_reg)line_size)
97
        :FF_REG_a, "memory");
98
}
99
100
// avg_pixels
101
// this routine is 'slightly' suboptimal but mostly unused
102
av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
103
                                  ptrdiff_t line_size, int h)
104
{
105
    MOVQ_ZERO(mm7);
106
    SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
107
    __asm__ volatile(
108
        "movq   (%1), %%mm0             \n\t"
109
        "movq   1(%1), %%mm4            \n\t"
110
        "movq   %%mm0, %%mm1            \n\t"
111
        "movq   %%mm4, %%mm5            \n\t"
112
        "punpcklbw %%mm7, %%mm0         \n\t"
113
        "punpcklbw %%mm7, %%mm4         \n\t"
114
        "punpckhbw %%mm7, %%mm1         \n\t"
115
        "punpckhbw %%mm7, %%mm5         \n\t"
116
        "paddusw %%mm0, %%mm4           \n\t"
117
        "paddusw %%mm1, %%mm5           \n\t"
118
        "xor    %%"FF_REG_a", %%"FF_REG_a" \n\t"
119
        "add    %3, %1                  \n\t"
120
        ".p2align 3                     \n\t"
121
        "1:                             \n\t"
122
        "movq   (%1, %%"FF_REG_a"), %%mm0  \n\t"
123
        "movq   1(%1, %%"FF_REG_a"), %%mm2 \n\t"
124
        "movq   %%mm0, %%mm1            \n\t"
125
        "movq   %%mm2, %%mm3            \n\t"
126
        "punpcklbw %%mm7, %%mm0         \n\t"
127
        "punpcklbw %%mm7, %%mm2         \n\t"
128
        "punpckhbw %%mm7, %%mm1         \n\t"
129
        "punpckhbw %%mm7, %%mm3         \n\t"
130
        "paddusw %%mm2, %%mm0           \n\t"
131
        "paddusw %%mm3, %%mm1           \n\t"
132
        "paddusw %%mm6, %%mm4           \n\t"
133
        "paddusw %%mm6, %%mm5           \n\t"
134
        "paddusw %%mm0, %%mm4           \n\t"
135
        "paddusw %%mm1, %%mm5           \n\t"
136
        "psrlw  $2, %%mm4               \n\t"
137
        "psrlw  $2, %%mm5               \n\t"
138
                "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
139
        "packuswb  %%mm5, %%mm4         \n\t"
140
                "pcmpeqd %%mm2, %%mm2   \n\t"
141
                "paddb %%mm2, %%mm2     \n\t"
142
                PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
143
                "movq   %%mm5, (%2, %%"FF_REG_a")  \n\t"
144
        "add    %3, %%"FF_REG_a"        \n\t"
145
146
        "movq   (%1, %%"FF_REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
147
        "movq   1(%1, %%"FF_REG_a"), %%mm4 \n\t"
148
        "movq   %%mm2, %%mm3            \n\t"
149
        "movq   %%mm4, %%mm5            \n\t"
150
        "punpcklbw %%mm7, %%mm2         \n\t"
151
        "punpcklbw %%mm7, %%mm4         \n\t"
152
        "punpckhbw %%mm7, %%mm3         \n\t"
153
        "punpckhbw %%mm7, %%mm5         \n\t"
154
        "paddusw %%mm2, %%mm4           \n\t"
155
        "paddusw %%mm3, %%mm5           \n\t"
156
        "paddusw %%mm6, %%mm0           \n\t"
157
        "paddusw %%mm6, %%mm1           \n\t"
158
        "paddusw %%mm4, %%mm0           \n\t"
159
        "paddusw %%mm5, %%mm1           \n\t"
160
        "psrlw  $2, %%mm0               \n\t"
161
        "psrlw  $2, %%mm1               \n\t"
162
                "movq   (%2, %%"FF_REG_a"), %%mm3  \n\t"
163
        "packuswb  %%mm1, %%mm0         \n\t"
164
                "pcmpeqd %%mm2, %%mm2   \n\t"
165
                "paddb %%mm2, %%mm2     \n\t"
166
                PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
167
                "movq   %%mm1, (%2, %%"FF_REG_a")  \n\t"
168
        "add    %3, %%"FF_REG_a"           \n\t"
169
170
        "subl   $2, %0                  \n\t"
171
        "jnz    1b                      \n\t"
172
        :"+g"(h), "+S"(pixels)
173
        :"D"(block), "r"((x86_reg)line_size)
174
        :FF_REG_a, "memory");
175
}