LCOV - code coverage report
Current view: top level - libavutil - hwcontext_cuda.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 209 0.0 %
Date: 2017-10-18 21:45:51 Functions: 0 11 0.0 %

          Line data    Source code
       1             : /*
       2             :  * This file is part of FFmpeg.
       3             :  *
       4             :  * FFmpeg is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU Lesser General Public
       6             :  * License as published by the Free Software Foundation; either
       7             :  * version 2.1 of the License, or (at your option) any later version.
       8             :  *
       9             :  * FFmpeg is distributed in the hope that it will be useful,
      10             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             :  * Lesser General Public License for more details.
      13             :  *
      14             :  * You should have received a copy of the GNU Lesser General Public
      15             :  * License along with FFmpeg; if not, write to the Free Software
      16             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      17             :  */
      18             : 
      19             : #include "buffer.h"
      20             : #include "common.h"
      21             : #include "hwcontext.h"
      22             : #include "hwcontext_internal.h"
      23             : #include "hwcontext_cuda_internal.h"
      24             : #include "mem.h"
      25             : #include "pixdesc.h"
      26             : #include "pixfmt.h"
      27             : 
      28             : #define CUDA_FRAME_ALIGNMENT 256
      29             : 
      30             : typedef struct CUDAFramesContext {
      31             :     int shift_width, shift_height;
      32             : } CUDAFramesContext;
      33             : 
      34             : static const enum AVPixelFormat supported_formats[] = {
      35             :     AV_PIX_FMT_NV12,
      36             :     AV_PIX_FMT_YUV420P,
      37             :     AV_PIX_FMT_YUV444P,
      38             :     AV_PIX_FMT_P010,
      39             :     AV_PIX_FMT_P016,
      40             :     AV_PIX_FMT_YUV444P16,
      41             : };
      42             : 
      43           0 : static int cuda_frames_get_constraints(AVHWDeviceContext *ctx,
      44             :                                        const void *hwconfig,
      45             :                                        AVHWFramesConstraints *constraints)
      46             : {
      47             :     int i;
      48             : 
      49           0 :     constraints->valid_sw_formats = av_malloc_array(FF_ARRAY_ELEMS(supported_formats) + 1,
      50             :                                                     sizeof(*constraints->valid_sw_formats));
      51           0 :     if (!constraints->valid_sw_formats)
      52           0 :         return AVERROR(ENOMEM);
      53             : 
      54           0 :     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
      55           0 :         constraints->valid_sw_formats[i] = supported_formats[i];
      56           0 :     constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
      57             : 
      58           0 :     constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
      59           0 :     if (!constraints->valid_hw_formats)
      60           0 :         return AVERROR(ENOMEM);
      61             : 
      62           0 :     constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
      63           0 :     constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
      64             : 
      65           0 :     return 0;
      66             : }
      67             : 
      68           0 : static void cuda_buffer_free(void *opaque, uint8_t *data)
      69             : {
      70           0 :     AVHWFramesContext *ctx = opaque;
      71           0 :     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
      72           0 :     CudaFunctions *cu = hwctx->internal->cuda_dl;
      73             : 
      74             :     CUcontext dummy;
      75             : 
      76           0 :     cu->cuCtxPushCurrent(hwctx->cuda_ctx);
      77             : 
      78           0 :     cu->cuMemFree((CUdeviceptr)data);
      79             : 
      80           0 :     cu->cuCtxPopCurrent(&dummy);
      81           0 : }
      82             : 
      83           0 : static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
      84             : {
      85           0 :     AVHWFramesContext     *ctx = opaque;
      86           0 :     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
      87           0 :     CudaFunctions          *cu = hwctx->internal->cuda_dl;
      88             : 
      89           0 :     AVBufferRef *ret = NULL;
      90           0 :     CUcontext dummy = NULL;
      91             :     CUdeviceptr data;
      92             :     CUresult err;
      93             : 
      94           0 :     err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
      95           0 :     if (err != CUDA_SUCCESS) {
      96           0 :         av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
      97           0 :         return NULL;
      98             :     }
      99             : 
     100           0 :     err = cu->cuMemAlloc(&data, size);
     101           0 :     if (err != CUDA_SUCCESS)
     102           0 :         goto fail;
     103             : 
     104           0 :     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
     105           0 :     if (!ret) {
     106           0 :         cu->cuMemFree(data);
     107           0 :         goto fail;
     108             :     }
     109             : 
     110           0 : fail:
     111           0 :     cu->cuCtxPopCurrent(&dummy);
     112           0 :     return ret;
     113             : }
     114             : 
     115           0 : static int cuda_frames_init(AVHWFramesContext *ctx)
     116             : {
     117           0 :     CUDAFramesContext *priv = ctx->internal->priv;
     118           0 :     int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
     119             :     int i;
     120             : 
     121           0 :     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
     122           0 :         if (ctx->sw_format == supported_formats[i])
     123           0 :             break;
     124             :     }
     125           0 :     if (i == FF_ARRAY_ELEMS(supported_formats)) {
     126           0 :         av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
     127             :                av_get_pix_fmt_name(ctx->sw_format));
     128           0 :         return AVERROR(ENOSYS);
     129             :     }
     130             : 
     131           0 :     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
     132             : 
     133           0 :     if (!ctx->pool) {
     134             :         int size;
     135             : 
     136           0 :         switch (ctx->sw_format) {
     137           0 :         case AV_PIX_FMT_NV12:
     138             :         case AV_PIX_FMT_YUV420P:
     139           0 :             size = aligned_width * ctx->height * 3 / 2;
     140           0 :             break;
     141           0 :         case AV_PIX_FMT_YUV444P:
     142             :         case AV_PIX_FMT_P010:
     143             :         case AV_PIX_FMT_P016:
     144           0 :             size = aligned_width * ctx->height * 3;
     145           0 :             break;
     146           0 :         case AV_PIX_FMT_YUV444P16:
     147           0 :             size = aligned_width * ctx->height * 6;
     148           0 :             break;
     149           0 :         default:
     150           0 :             av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
     151           0 :             return AVERROR_BUG;
     152             :         }
     153             : 
     154           0 :         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
     155           0 :         if (!ctx->internal->pool_internal)
     156           0 :             return AVERROR(ENOMEM);
     157             :     }
     158             : 
     159           0 :     return 0;
     160             : }
     161             : 
     162           0 : static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
     163             : {
     164             :     int aligned_width;
     165           0 :     int width_in_bytes = ctx->width;
     166             : 
     167           0 :     if (ctx->sw_format == AV_PIX_FMT_P010 ||
     168           0 :         ctx->sw_format == AV_PIX_FMT_P016 ||
     169           0 :         ctx->sw_format == AV_PIX_FMT_YUV444P16) {
     170           0 :        width_in_bytes *= 2;
     171             :     }
     172           0 :     aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
     173             : 
     174           0 :     frame->buf[0] = av_buffer_pool_get(ctx->pool);
     175           0 :     if (!frame->buf[0])
     176           0 :         return AVERROR(ENOMEM);
     177             : 
     178           0 :     switch (ctx->sw_format) {
     179           0 :     case AV_PIX_FMT_NV12:
     180             :     case AV_PIX_FMT_P010:
     181             :     case AV_PIX_FMT_P016:
     182           0 :         frame->data[0]     = frame->buf[0]->data;
     183           0 :         frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
     184           0 :         frame->linesize[0] = aligned_width;
     185           0 :         frame->linesize[1] = aligned_width;
     186           0 :         break;
     187           0 :     case AV_PIX_FMT_YUV420P:
     188           0 :         frame->data[0]     = frame->buf[0]->data;
     189           0 :         frame->data[2]     = frame->data[0] + aligned_width * ctx->height;
     190           0 :         frame->data[1]     = frame->data[2] + aligned_width * ctx->height / 4;
     191           0 :         frame->linesize[0] = aligned_width;
     192           0 :         frame->linesize[1] = aligned_width / 2;
     193           0 :         frame->linesize[2] = aligned_width / 2;
     194           0 :         break;
     195           0 :     case AV_PIX_FMT_YUV444P:
     196             :     case AV_PIX_FMT_YUV444P16:
     197           0 :         frame->data[0]     = frame->buf[0]->data;
     198           0 :         frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
     199           0 :         frame->data[2]     = frame->data[1] + aligned_width * ctx->height;
     200           0 :         frame->linesize[0] = aligned_width;
     201           0 :         frame->linesize[1] = aligned_width;
     202           0 :         frame->linesize[2] = aligned_width;
     203           0 :         break;
     204           0 :     default:
     205           0 :         av_frame_unref(frame);
     206           0 :         return AVERROR_BUG;
     207             :     }
     208             : 
     209           0 :     frame->format = AV_PIX_FMT_CUDA;
     210           0 :     frame->width  = ctx->width;
     211           0 :     frame->height = ctx->height;
     212             : 
     213           0 :     return 0;
     214             : }
     215             : 
     216           0 : static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
     217             :                                      enum AVHWFrameTransferDirection dir,
     218             :                                      enum AVPixelFormat **formats)
     219             : {
     220             :     enum AVPixelFormat *fmts;
     221             : 
     222           0 :     fmts = av_malloc_array(2, sizeof(*fmts));
     223           0 :     if (!fmts)
     224           0 :         return AVERROR(ENOMEM);
     225             : 
     226           0 :     fmts[0] = ctx->sw_format;
     227           0 :     fmts[1] = AV_PIX_FMT_NONE;
     228             : 
     229           0 :     *formats = fmts;
     230             : 
     231           0 :     return 0;
     232             : }
     233             : 
     234           0 : static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
     235             :                                    const AVFrame *src)
     236             : {
     237           0 :     CUDAFramesContext           *priv = ctx->internal->priv;
     238           0 :     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
     239           0 :     CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
     240             : 
     241             :     CUcontext dummy;
     242             :     CUresult err;
     243             :     int i;
     244             : 
     245           0 :     err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     246           0 :     if (err != CUDA_SUCCESS)
     247           0 :         return AVERROR_UNKNOWN;
     248             : 
     249           0 :     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
     250           0 :         CUDA_MEMCPY2D cpy = {
     251             :             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
     252             :             .dstMemoryType = CU_MEMORYTYPE_HOST,
     253           0 :             .srcDevice     = (CUdeviceptr)src->data[i],
     254           0 :             .dstHost       = dst->data[i],
     255           0 :             .srcPitch      = src->linesize[i],
     256           0 :             .dstPitch      = dst->linesize[i],
     257           0 :             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
     258           0 :             .Height        = src->height >> (i ? priv->shift_height : 0),
     259             :         };
     260             : 
     261           0 :         err = cu->cuMemcpy2D(&cpy);
     262           0 :         if (err != CUDA_SUCCESS) {
     263           0 :             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
     264           0 :             return AVERROR_UNKNOWN;
     265             :         }
     266             :     }
     267             : 
     268           0 :     cu->cuCtxPopCurrent(&dummy);
     269             : 
     270           0 :     return 0;
     271             : }
     272             : 
     273           0 : static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     274             :                                  const AVFrame *src)
     275             : {
     276           0 :     CUDAFramesContext           *priv = ctx->internal->priv;
     277           0 :     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
     278           0 :     CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
     279             : 
     280             :     CUcontext dummy;
     281             :     CUresult err;
     282             :     int i;
     283             : 
     284           0 :     err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     285           0 :     if (err != CUDA_SUCCESS)
     286           0 :         return AVERROR_UNKNOWN;
     287             : 
     288           0 :     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
     289           0 :         CUDA_MEMCPY2D cpy = {
     290             :             .srcMemoryType = CU_MEMORYTYPE_HOST,
     291             :             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
     292           0 :             .srcHost       = src->data[i],
     293           0 :             .dstDevice     = (CUdeviceptr)dst->data[i],
     294           0 :             .srcPitch      = src->linesize[i],
     295           0 :             .dstPitch      = dst->linesize[i],
     296           0 :             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
     297           0 :             .Height        = src->height >> (i ? priv->shift_height : 0),
     298             :         };
     299             : 
     300           0 :         err = cu->cuMemcpy2D(&cpy);
     301           0 :         if (err != CUDA_SUCCESS) {
     302           0 :             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
     303           0 :             return AVERROR_UNKNOWN;
     304             :         }
     305             :     }
     306             : 
     307           0 :     cu->cuCtxPopCurrent(&dummy);
     308             : 
     309           0 :     return 0;
     310             : }
     311             : 
     312           0 : static void cuda_device_uninit(AVHWDeviceContext *ctx)
     313             : {
     314           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     315             : 
     316           0 :     if (hwctx->internal) {
     317           0 :         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
     318           0 :             hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
     319           0 :             hwctx->cuda_ctx = NULL;
     320             :         }
     321           0 :         cuda_free_functions(&hwctx->internal->cuda_dl);
     322             :     }
     323             : 
     324           0 :     av_freep(&hwctx->internal);
     325           0 : }
     326             : 
     327           0 : static int cuda_device_init(AVHWDeviceContext *ctx)
     328             : {
     329           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     330             :     int ret;
     331             : 
     332           0 :     if (!hwctx->internal) {
     333           0 :         hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
     334           0 :         if (!hwctx->internal)
     335           0 :             return AVERROR(ENOMEM);
     336             :     }
     337             : 
     338           0 :     if (!hwctx->internal->cuda_dl) {
     339           0 :         ret = cuda_load_functions(&hwctx->internal->cuda_dl);
     340           0 :         if (ret < 0) {
     341           0 :             av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
     342           0 :             goto error;
     343             :         }
     344             :     }
     345             : 
     346           0 :     return 0;
     347             : 
     348           0 : error:
     349           0 :     cuda_device_uninit(ctx);
     350           0 :     return ret;
     351             : }
     352             : 
     353           0 : static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
     354             :                               AVDictionary *opts, int flags)
     355             : {
     356           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     357             :     CudaFunctions *cu;
     358             :     CUdevice cu_device;
     359             :     CUcontext dummy;
     360             :     CUresult err;
     361           0 :     int device_idx = 0;
     362             : 
     363           0 :     if (device)
     364           0 :         device_idx = strtol(device, NULL, 0);
     365             : 
     366           0 :     if (cuda_device_init(ctx) < 0)
     367           0 :         goto error;
     368             : 
     369           0 :     cu = hwctx->internal->cuda_dl;
     370             : 
     371           0 :     err = cu->cuInit(0);
     372           0 :     if (err != CUDA_SUCCESS) {
     373           0 :         av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
     374           0 :         goto error;
     375             :     }
     376             : 
     377           0 :     err = cu->cuDeviceGet(&cu_device, device_idx);
     378           0 :     if (err != CUDA_SUCCESS) {
     379           0 :         av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
     380           0 :         goto error;
     381             :     }
     382             : 
     383           0 :     err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
     384           0 :     if (err != CUDA_SUCCESS) {
     385           0 :         av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
     386           0 :         goto error;
     387             :     }
     388             : 
     389           0 :     cu->cuCtxPopCurrent(&dummy);
     390             : 
     391           0 :     hwctx->internal->is_allocated = 1;
     392             : 
     393           0 :     return 0;
     394             : 
     395           0 : error:
     396           0 :     cuda_device_uninit(ctx);
     397           0 :     return AVERROR_UNKNOWN;
     398             : }
     399             : 
     400             : const HWContextType ff_hwcontext_type_cuda = {
     401             :     .type                 = AV_HWDEVICE_TYPE_CUDA,
     402             :     .name                 = "CUDA",
     403             : 
     404             :     .device_hwctx_size    = sizeof(AVCUDADeviceContext),
     405             :     .frames_priv_size     = sizeof(CUDAFramesContext),
     406             : 
     407             :     .device_create        = cuda_device_create,
     408             :     .device_init          = cuda_device_init,
     409             :     .device_uninit        = cuda_device_uninit,
     410             :     .frames_get_constraints = cuda_frames_get_constraints,
     411             :     .frames_init          = cuda_frames_init,
     412             :     .frames_get_buffer    = cuda_get_buffer,
     413             :     .transfer_get_formats = cuda_transfer_get_formats,
     414             :     .transfer_data_to     = cuda_transfer_data_to,
     415             :     .transfer_data_from   = cuda_transfer_data_from,
     416             : 
     417             :     .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
     418             : };

Generated by: LCOV version 1.13