| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | * Copyright (C) 2010 David Conrad | ||
| 3 | * | ||
| 4 | * This file is part of FFmpeg. | ||
| 5 | * | ||
| 6 | * FFmpeg is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU Lesser General Public | ||
| 8 | * License as published by the Free Software Foundation; either | ||
| 9 | * version 2.1 of the License, or (at your option) any later version. | ||
| 10 | * | ||
| 11 | * FFmpeg is distributed in the hope that it will be useful, | ||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 14 | * Lesser General Public License for more details. | ||
| 15 | * | ||
| 16 | * You should have received a copy of the GNU Lesser General Public | ||
| 17 | * License along with FFmpeg; if not, write to the Free Software | ||
| 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 19 | */ | ||
| 20 | |||
| 21 | #include "libavutil/x86/cpu.h" | ||
| 22 | #include "libavcodec/diracdsp.h" | ||
| 23 | #include "fpel.h" | ||
| 24 | |||
| 25 | void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); | ||
| 26 | |||
| 27 | void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
| 28 | void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
| 29 | void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); | ||
| 30 | |||
| 31 | void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
| 32 | void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | ||
| 33 | void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); | ||
| 34 | |||
| 35 | void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); | ||
| 36 | |||
| 37 | #if HAVE_X86ASM | ||
| 38 | |||
| 39 | #define HPEL_FILTER(MMSIZE, EXT) \ | ||
| 40 | void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \ | ||
| 41 | void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \ | ||
| 42 | \ | ||
| 43 | static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ | ||
| 44 | const uint8_t *src, int stride, int width, int height) \ | ||
| 45 | { \ | ||
| 46 | while( height-- ) \ | ||
| 47 | { \ | ||
| 48 | ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ | ||
| 49 | ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ | ||
| 50 | ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ | ||
| 51 | \ | ||
| 52 | dsth += stride; \ | ||
| 53 | dstv += stride; \ | ||
| 54 | dstc += stride; \ | ||
| 55 | src += stride; \ | ||
| 56 | } \ | ||
| 57 | } | ||
| 58 | |||
| 59 | #define DIRAC_PIXOP(OPNAME, EXT)\ | ||
| 60 | static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
| 61 | int stride, int h) \ | ||
| 62 | {\ | ||
| 63 | if (h&3)\ | ||
| 64 | ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\ | ||
| 65 | else\ | ||
| 66 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\ | ||
| 67 | }\ | ||
| 68 | static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \ | ||
| 69 | int stride, int h) \ | ||
| 70 | {\ | ||
| 71 | if (h&3) {\ | ||
| 72 | ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\ | ||
| 73 | } else {\ | ||
| 74 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\ | ||
| 75 | ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\ | ||
| 76 | }\ | ||
| 77 | } | ||
| 78 | |||
| 79 | ✗ | DIRAC_PIXOP(put, sse2) | |
| 80 | ✗ | DIRAC_PIXOP(avg, sse2) | |
| 81 | |||
| 82 | ✗ | HPEL_FILTER(16, sse2) | |
| 83 | |||
| 84 | #endif // HAVE_X86ASM | ||
| 85 | |||
| 86 | 109 | void ff_diracdsp_init_x86(DiracDSPContext* c) | |
| 87 | { | ||
| 88 | #if HAVE_X86ASM | ||
| 89 | 109 | int mm_flags = av_get_cpu_flags(); | |
| 90 | |||
| 91 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 82 times.
|
109 | if (EXTERNAL_SSE2(mm_flags)) { |
| 92 | 27 | c->dirac_hpel_filter = dirac_hpel_filter_sse2; | |
| 93 | 27 | c->add_rect_clamped = ff_add_rect_clamped_sse2; | |
| 94 | 27 | c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; | |
| 95 | |||
| 96 | 27 | c->add_dirac_obmc[0] = ff_add_dirac_obmc8_sse2; | |
| 97 | 27 | c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; | |
| 98 | 27 | c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; | |
| 99 | |||
| 100 | 27 | c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2; | |
| 101 | 27 | c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2; | |
| 102 | 27 | c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2; | |
| 103 | 27 | c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2; | |
| 104 | } | ||
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 91 times.
|
109 | if (EXTERNAL_SSE4(mm_flags)) { |
| 107 | 18 | c->dequant_subband[1] = ff_dequant_subband_32_sse4; | |
| 108 | 18 | c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; | |
| 109 | } | ||
| 110 | #endif // HAVE_X86ASM | ||
| 111 | 109 | } | |
| 112 |