LCOV - code coverage report
Current view: top level - src/libavutil - hwcontext_cuda.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 182 0.0 %
Date: 2017-01-21 09:32:20 Functions: 0 10 0.0 %

          Line data    Source code
       1             : /*
       2             :  * This file is part of FFmpeg.
       3             :  *
       4             :  * FFmpeg is free software; you can redistribute it and/or
       5             :  * modify it under the terms of the GNU Lesser General Public
       6             :  * License as published by the Free Software Foundation; either
       7             :  * version 2.1 of the License, or (at your option) any later version.
       8             :  *
       9             :  * FFmpeg is distributed in the hope that it will be useful,
      10             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             :  * Lesser General Public License for more details.
      13             :  *
      14             :  * You should have received a copy of the GNU Lesser General Public
      15             :  * License along with FFmpeg; if not, write to the Free Software
      16             :  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
      17             :  */
      18             : 
      19             : #include "buffer.h"
      20             : #include "common.h"
      21             : #include "hwcontext.h"
      22             : #include "hwcontext_internal.h"
      23             : #include "hwcontext_cuda_internal.h"
      24             : #include "mem.h"
      25             : #include "pixdesc.h"
      26             : #include "pixfmt.h"
      27             : 
      28             : #define CUDA_FRAME_ALIGNMENT 256
      29             : 
      30             : typedef struct CUDAFramesContext {
      31             :     int shift_width, shift_height;
      32             : } CUDAFramesContext;
      33             : 
      34             : static const enum AVPixelFormat supported_formats[] = {
      35             :     AV_PIX_FMT_NV12,
      36             :     AV_PIX_FMT_YUV420P,
      37             :     AV_PIX_FMT_YUV444P,
      38             :     AV_PIX_FMT_P010,
      39             :     AV_PIX_FMT_P016,
      40             : };
      41             : 
      42           0 : static void cuda_buffer_free(void *opaque, uint8_t *data)
      43             : {
      44           0 :     AVHWFramesContext *ctx = opaque;
      45           0 :     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
      46           0 :     CudaFunctions *cu = hwctx->internal->cuda_dl;
      47             : 
      48             :     CUcontext dummy;
      49             : 
      50           0 :     cu->cuCtxPushCurrent(hwctx->cuda_ctx);
      51             : 
      52           0 :     cu->cuMemFree((CUdeviceptr)data);
      53             : 
      54           0 :     cu->cuCtxPopCurrent(&dummy);
      55           0 : }
      56             : 
      57           0 : static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
      58             : {
      59           0 :     AVHWFramesContext     *ctx = opaque;
      60           0 :     AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
      61           0 :     CudaFunctions          *cu = hwctx->internal->cuda_dl;
      62             : 
      63           0 :     AVBufferRef *ret = NULL;
      64           0 :     CUcontext dummy = NULL;
      65             :     CUdeviceptr data;
      66             :     CUresult err;
      67             : 
      68           0 :     err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
      69           0 :     if (err != CUDA_SUCCESS) {
      70           0 :         av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
      71           0 :         return NULL;
      72             :     }
      73             : 
      74           0 :     err = cu->cuMemAlloc(&data, size);
      75           0 :     if (err != CUDA_SUCCESS)
      76           0 :         goto fail;
      77             : 
      78           0 :     ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
      79           0 :     if (!ret) {
      80           0 :         cu->cuMemFree(data);
      81           0 :         goto fail;
      82             :     }
      83             : 
      84             : fail:
      85           0 :     cu->cuCtxPopCurrent(&dummy);
      86           0 :     return ret;
      87             : }
      88             : 
      89           0 : static int cuda_frames_init(AVHWFramesContext *ctx)
      90             : {
      91           0 :     CUDAFramesContext *priv = ctx->internal->priv;
      92           0 :     int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
      93             :     int i;
      94             : 
      95           0 :     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
      96           0 :         if (ctx->sw_format == supported_formats[i])
      97           0 :             break;
      98             :     }
      99           0 :     if (i == FF_ARRAY_ELEMS(supported_formats)) {
     100           0 :         av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
     101             :                av_get_pix_fmt_name(ctx->sw_format));
     102           0 :         return AVERROR(ENOSYS);
     103             :     }
     104             : 
     105           0 :     av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height);
     106             : 
     107           0 :     if (!ctx->pool) {
     108             :         int size;
     109             : 
     110           0 :         switch (ctx->sw_format) {
     111             :         case AV_PIX_FMT_NV12:
     112             :         case AV_PIX_FMT_YUV420P:
     113           0 :             size = aligned_width * ctx->height * 3 / 2;
     114           0 :             break;
     115             :         case AV_PIX_FMT_YUV444P:
     116             :         case AV_PIX_FMT_P010:
     117             :         case AV_PIX_FMT_P016:
     118           0 :             size = aligned_width * ctx->height * 3;
     119           0 :             break;
     120             :         default:
     121           0 :             av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
     122           0 :             return AVERROR_BUG;
     123             :         }
     124             : 
     125           0 :         ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL);
     126           0 :         if (!ctx->internal->pool_internal)
     127           0 :             return AVERROR(ENOMEM);
     128             :     }
     129             : 
     130           0 :     return 0;
     131             : }
     132             : 
     133           0 : static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
     134             : {
     135             :     int aligned_width;
     136           0 :     int width_in_bytes = ctx->width;
     137             : 
     138           0 :     if (ctx->sw_format == AV_PIX_FMT_P010 ||
     139           0 :         ctx->sw_format == AV_PIX_FMT_P016) {
     140           0 :        width_in_bytes *= 2;
     141             :     }
     142           0 :     aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
     143             : 
     144           0 :     frame->buf[0] = av_buffer_pool_get(ctx->pool);
     145           0 :     if (!frame->buf[0])
     146           0 :         return AVERROR(ENOMEM);
     147             : 
     148           0 :     switch (ctx->sw_format) {
     149             :     case AV_PIX_FMT_NV12:
     150             :     case AV_PIX_FMT_P010:
     151             :     case AV_PIX_FMT_P016:
     152           0 :         frame->data[0]     = frame->buf[0]->data;
     153           0 :         frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
     154           0 :         frame->linesize[0] = aligned_width;
     155           0 :         frame->linesize[1] = aligned_width;
     156           0 :         break;
     157             :     case AV_PIX_FMT_YUV420P:
     158           0 :         frame->data[0]     = frame->buf[0]->data;
     159           0 :         frame->data[2]     = frame->data[0] + aligned_width * ctx->height;
     160           0 :         frame->data[1]     = frame->data[2] + aligned_width * ctx->height / 4;
     161           0 :         frame->linesize[0] = aligned_width;
     162           0 :         frame->linesize[1] = aligned_width / 2;
     163           0 :         frame->linesize[2] = aligned_width / 2;
     164           0 :         break;
     165             :     case AV_PIX_FMT_YUV444P:
     166           0 :         frame->data[0]     = frame->buf[0]->data;
     167           0 :         frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
     168           0 :         frame->data[2]     = frame->data[1] + aligned_width * ctx->height;
     169           0 :         frame->linesize[0] = aligned_width;
     170           0 :         frame->linesize[1] = aligned_width;
     171           0 :         frame->linesize[2] = aligned_width;
     172           0 :         break;
     173             :     default:
     174           0 :         av_frame_unref(frame);
     175           0 :         return AVERROR_BUG;
     176             :     }
     177             : 
     178           0 :     frame->format = AV_PIX_FMT_CUDA;
     179           0 :     frame->width  = ctx->width;
     180           0 :     frame->height = ctx->height;
     181             : 
     182           0 :     return 0;
     183             : }
     184             : 
     185           0 : static int cuda_transfer_get_formats(AVHWFramesContext *ctx,
     186             :                                      enum AVHWFrameTransferDirection dir,
     187             :                                      enum AVPixelFormat **formats)
     188             : {
     189             :     enum AVPixelFormat *fmts;
     190             : 
     191           0 :     fmts = av_malloc_array(2, sizeof(*fmts));
     192           0 :     if (!fmts)
     193           0 :         return AVERROR(ENOMEM);
     194             : 
     195           0 :     fmts[0] = ctx->sw_format;
     196           0 :     fmts[1] = AV_PIX_FMT_NONE;
     197             : 
     198           0 :     *formats = fmts;
     199             : 
     200           0 :     return 0;
     201             : }
     202             : 
     203           0 : static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst,
     204             :                                    const AVFrame *src)
     205             : {
     206           0 :     CUDAFramesContext           *priv = ctx->internal->priv;
     207           0 :     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
     208           0 :     CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
     209             : 
     210             :     CUcontext dummy;
     211             :     CUresult err;
     212             :     int i;
     213             : 
     214           0 :     err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     215           0 :     if (err != CUDA_SUCCESS)
     216           0 :         return AVERROR_UNKNOWN;
     217             : 
     218           0 :     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
     219           0 :         CUDA_MEMCPY2D cpy = {
     220             :             .srcMemoryType = CU_MEMORYTYPE_DEVICE,
     221             :             .dstMemoryType = CU_MEMORYTYPE_HOST,
     222           0 :             .srcDevice     = (CUdeviceptr)src->data[i],
     223           0 :             .dstHost       = dst->data[i],
     224           0 :             .srcPitch      = src->linesize[i],
     225           0 :             .dstPitch      = dst->linesize[i],
     226           0 :             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
     227           0 :             .Height        = src->height >> (i ? priv->shift_height : 0),
     228             :         };
     229             : 
     230           0 :         err = cu->cuMemcpy2D(&cpy);
     231           0 :         if (err != CUDA_SUCCESS) {
     232           0 :             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
     233           0 :             return AVERROR_UNKNOWN;
     234             :         }
     235             :     }
     236             : 
     237           0 :     cu->cuCtxPopCurrent(&dummy);
     238             : 
     239           0 :     return 0;
     240             : }
     241             : 
     242           0 : static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst,
     243             :                                  const AVFrame *src)
     244             : {
     245           0 :     CUDAFramesContext           *priv = ctx->internal->priv;
     246           0 :     AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
     247           0 :     CudaFunctions                 *cu = device_hwctx->internal->cuda_dl;
     248             : 
     249             :     CUcontext dummy;
     250             :     CUresult err;
     251             :     int i;
     252             : 
     253           0 :     err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
     254           0 :     if (err != CUDA_SUCCESS)
     255           0 :         return AVERROR_UNKNOWN;
     256             : 
     257           0 :     for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
     258           0 :         CUDA_MEMCPY2D cpy = {
     259             :             .srcMemoryType = CU_MEMORYTYPE_HOST,
     260             :             .dstMemoryType = CU_MEMORYTYPE_DEVICE,
     261           0 :             .srcHost       = src->data[i],
     262           0 :             .dstDevice     = (CUdeviceptr)dst->data[i],
     263           0 :             .srcPitch      = src->linesize[i],
     264           0 :             .dstPitch      = dst->linesize[i],
     265           0 :             .WidthInBytes  = FFMIN(src->linesize[i], dst->linesize[i]),
     266           0 :             .Height        = src->height >> (i ? priv->shift_height : 0),
     267             :         };
     268             : 
     269           0 :         err = cu->cuMemcpy2D(&cpy);
     270           0 :         if (err != CUDA_SUCCESS) {
     271           0 :             av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
     272           0 :             return AVERROR_UNKNOWN;
     273             :         }
     274             :     }
     275             : 
     276           0 :     cu->cuCtxPopCurrent(&dummy);
     277             : 
     278           0 :     return 0;
     279             : }
     280             : 
     281           0 : static void cuda_device_uninit(AVHWDeviceContext *ctx)
     282             : {
     283           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     284             : 
     285           0 :     if (hwctx->internal) {
     286           0 :         if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
     287           0 :             hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
     288           0 :             hwctx->cuda_ctx = NULL;
     289             :         }
     290           0 :         cuda_free_functions(&hwctx->internal->cuda_dl);
     291             :     }
     292             : 
     293           0 :     av_freep(&hwctx->internal);
     294           0 : }
     295             : 
     296           0 : static int cuda_device_init(AVHWDeviceContext *ctx)
     297             : {
     298           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     299             :     int ret;
     300             : 
     301           0 :     if (!hwctx->internal) {
     302           0 :         hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
     303           0 :         if (!hwctx->internal)
     304           0 :             return AVERROR(ENOMEM);
     305             :     }
     306             : 
     307           0 :     if (!hwctx->internal->cuda_dl) {
     308           0 :         ret = cuda_load_functions(&hwctx->internal->cuda_dl);
     309           0 :         if (ret < 0) {
     310           0 :             av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
     311           0 :             goto error;
     312             :         }
     313             :     }
     314             : 
     315           0 :     return 0;
     316             : 
     317             : error:
     318           0 :     cuda_device_uninit(ctx);
     319           0 :     return ret;
     320             : }
     321             : 
     322           0 : static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
     323             :                               AVDictionary *opts, int flags)
     324             : {
     325           0 :     AVCUDADeviceContext *hwctx = ctx->hwctx;
     326             :     CudaFunctions *cu;
     327             :     CUdevice cu_device;
     328             :     CUcontext dummy;
     329             :     CUresult err;
     330           0 :     int device_idx = 0;
     331             : 
     332           0 :     if (device)
     333           0 :         device_idx = strtol(device, NULL, 0);
     334             : 
     335           0 :     if (cuda_device_init(ctx) < 0)
     336           0 :         goto error;
     337             : 
     338           0 :     cu = hwctx->internal->cuda_dl;
     339             : 
     340           0 :     err = cu->cuInit(0);
     341           0 :     if (err != CUDA_SUCCESS) {
     342           0 :         av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
     343           0 :         goto error;
     344             :     }
     345             : 
     346           0 :     err = cu->cuDeviceGet(&cu_device, device_idx);
     347           0 :     if (err != CUDA_SUCCESS) {
     348           0 :         av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
     349           0 :         goto error;
     350             :     }
     351             : 
     352           0 :     err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
     353           0 :     if (err != CUDA_SUCCESS) {
     354           0 :         av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
     355           0 :         goto error;
     356             :     }
     357             : 
     358           0 :     cu->cuCtxPopCurrent(&dummy);
     359             : 
     360           0 :     hwctx->internal->is_allocated = 1;
     361             : 
     362           0 :     return 0;
     363             : 
     364             : error:
     365           0 :     cuda_device_uninit(ctx);
     366           0 :     return AVERROR_UNKNOWN;
     367             : }
     368             : 
     369             : const HWContextType ff_hwcontext_type_cuda = {
     370             :     .type                 = AV_HWDEVICE_TYPE_CUDA,
     371             :     .name                 = "CUDA",
     372             : 
     373             :     .device_hwctx_size    = sizeof(AVCUDADeviceContext),
     374             :     .frames_priv_size     = sizeof(CUDAFramesContext),
     375             : 
     376             :     .device_create        = cuda_device_create,
     377             :     .device_init          = cuda_device_init,
     378             :     .device_uninit        = cuda_device_uninit,
     379             :     .frames_init          = cuda_frames_init,
     380             :     .frames_get_buffer    = cuda_get_buffer,
     381             :     .transfer_get_formats = cuda_transfer_get_formats,
     382             :     .transfer_data_to     = cuda_transfer_data_to,
     383             :     .transfer_data_from   = cuda_transfer_data_from,
     384             : 
     385             :     .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
     386             : };

Generated by: LCOV version 1.12