0
0
mirror of https://github.com/mpv-player/mpv.git synced 2024-09-20 20:03:10 +02:00
mpv/video/out/hwdec/hwdec_cuda_gl.c
Philip Langdale 8c1f94f0e7 vo_gpu: hwdec_cuda: Synchronise OpenGL Interop
Previously, there appeared to be implicit synchronisation in the
GL interop path, and we never observed any visual glitches. However,
recently, I started seeing stuttering in the GL path and on closer
examination it looked like read-before-write behaviour where GL
would display an old frame again rather than the current one.

After verifying that disabling hwdec made the problem go away,
I tried adding a cuStreamSynchronize() after the memcpys and that
also resolved the problem, so it's clearly sync related.

cuStreamSynchronize() is a CPU sync and so more heavy-weight than
you want, but it's the only tool we have. There is no mechanism
defined for synchronising GL to CUDA (It looks like there is a way
to synchronise CUDA to EGL but it appears one way and so wouldn't
directly address this problem).

Anyway, empirically, the output now looks the same as with hwdec
off.
2019-09-28 19:24:24 +03:00

175 lines
5.0 KiB
C

/*
* Copyright (c) 2019 Philip Langdale <philipl@overt.org>
*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "hwdec_cuda.h"
#include "options/m_config.h"
#include "video/out/opengl/formats.h"
#include "video/out/opengl/ra_gl.h"
#include <libavutil/hwcontext.h>
#include <libavutil/hwcontext_cuda.h>
#include <unistd.h>
#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
struct ext_gl {
CUgraphicsResource cu_res;
};
static bool cuda_ext_gl_init(struct ra_hwdec_mapper *mapper,
const struct ra_format *format, int n)
{
struct cuda_hw_priv *p_owner = mapper->owner->priv;
struct cuda_mapper_priv *p = mapper->priv;
CudaFunctions *cu = p_owner->cu;
int ret = 0;
CUcontext dummy;
struct ext_gl *egl = talloc_ptrtype(NULL, egl);
p->ext[n] = egl;
struct ra_tex_params params = {
.dimensions = 2,
.w = mp_image_plane_w(&p->layout, n),
.h = mp_image_plane_h(&p->layout, n),
.d = 1,
.format = format,
.render_src = true,
.src_linear = format->linear_filter,
};
mapper->tex[n] = ra_tex_create(mapper->ra, &params);
if (!mapper->tex[n]) {
goto error;
}
GLuint texture;
GLenum target;
ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&egl->cu_res, texture, target,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
if (ret < 0)
goto error;
ret = CHECK_CU(cu->cuGraphicsMapResources(1, &egl->cu_res, 0));
if (ret < 0)
goto error;
ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], egl->cu_res,
0, 0));
if (ret < 0)
goto error;
ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &egl->cu_res, 0));
if (ret < 0)
goto error;
return true;
error:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return false;
}
static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n)
{
struct cuda_hw_priv *p_owner = mapper->owner->priv;
struct cuda_mapper_priv *p = mapper->priv;
CudaFunctions *cu = p_owner->cu;
struct ext_gl *egl = p->ext[n];
if (egl && egl->cu_res) {
CHECK_CU(cu->cuGraphicsUnregisterResource(egl->cu_res));
egl->cu_res = 0;
}
talloc_free(egl);
}
#undef CHECK_CU
#define CHECK_CU(x) check_cu(hw, (x), #x)
bool cuda_gl_init(const struct ra_hwdec *hw) {
int ret = 0;
struct cuda_hw_priv *p = hw->priv;
CudaFunctions *cu = p->cu;
if (ra_is_gl(hw->ra)) {
GL *gl = ra_gl_get(hw->ra);
if (gl->version < 210 && gl->es < 300) {
MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
return false;
}
} else {
// This is not an OpenGL RA.
return false;
}
CUdevice display_dev;
unsigned int device_count;
ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
CU_GL_DEVICE_LIST_ALL));
if (ret < 0)
return false;
ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
display_dev));
if (ret < 0)
return false;
p->decode_ctx = p->display_ctx;
int decode_dev_idx = -1;
mp_read_option_raw(hw->global, "cuda-decode-device", &m_option_type_choice,
&decode_dev_idx);
if (decode_dev_idx > -1) {
CUcontext dummy;
CUdevice decode_dev;
ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx));
if (ret < 0) {
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
return false;
}
if (decode_dev != display_dev) {
MP_INFO(hw, "Using separate decoder and display devices\n");
// Pop the display context. We won't use it again during init()
ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (ret < 0)
return false;
ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
decode_dev));
if (ret < 0)
return false;
}
}
// We don't have a way to do a GPU sync after copying
p->do_full_sync = true;
p->ext_init = cuda_ext_gl_init;
p->ext_uninit = cuda_ext_gl_uninit;
return true;
}