mirror of
https://github.com/mpv-player/mpv.git
synced 2024-09-20 12:02:23 +02:00
vo_opengl: refactor RA texture and buffer updates
- tex_uploads args are moved to a struct - the ability to directly upload texture data without going through a buffer is made explicit - the concept of buffer updates and buffer polling is made more explicit and generalized to buf_update as well (not just mapped buffers) - the ability to call tex_upload/buf_update on a tex/buf is made explicit during tex/buf creation - uploading from buffers now uses an explicit offset instead of implicitly comparing *src against buf->data, because not all buffers may actually be persistently mapped - the initial_data = immutable requirement is dropped. (May be re-added later for D3D11 if that ever becomes a thing) This change helps the vulkan abstraction immensely and also helps move common code (like the PBO pooling) out of ra_gl and into the opengl/utils.c This also technically has the side-benefit / side-constraint of using PBOs for OSD texture uploads as well, which actually seems to help performance on machines where --opengl-pbo is faster than the naive code path. Because of this, I decided to hook up the OSD code to the opengl-pbo option as well. One drawback of this refactor is that the GL_STREAM_COPY hack for texture uploads "got lost", but I think I'm happy with that going away anyway since DR almost fully deprecates it, and it's not the "right thing" anyway - but instead an nvidia-only hack to make this stuff work somewhat better on NUMA systems with discrete GPUs. Another change is that due to the way fencing works with ra_buf (we get one fence per ra_buf per upload) we have to use multiple ra_bufs instead of offsets into a shared buffer. But for OpenGL this is probably better anyway. It's possible that in future, we could support having independent “buffer slices” (each with their own fence/sync object), but this would be an optimization more than anything. I also think that we could address the underlying problem (memory closeness) differently by making the ra_vk memory allocator smart enough to chunk together allocations under the hood.
This commit is contained in:
parent
9ca5a2a5d8
commit
46d86da630
@ -309,6 +309,13 @@ static const struct gl_functions gl_functions[] = {
|
||||
{0}
|
||||
},
|
||||
},
|
||||
{
|
||||
.ver_core = 430,
|
||||
.functions = (const struct gl_function[]) {
|
||||
DEF_FN(InvalidateTexImage),
|
||||
{0}
|
||||
},
|
||||
},
|
||||
{
|
||||
.ver_core = 430,
|
||||
.ver_es_core = 300,
|
||||
|
@ -194,6 +194,7 @@ struct GL {
|
||||
void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
|
||||
const GLfloat *);
|
||||
|
||||
void (GLAPIENTRY *InvalidateTexImage)(GLuint, GLint);
|
||||
void (GLAPIENTRY *InvalidateFramebuffer)(GLenum, GLsizei, const GLenum *);
|
||||
|
||||
GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield);
|
||||
|
@ -269,72 +269,6 @@ void gl_set_debug_logger(GL *gl, struct mp_log *log)
|
||||
gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
|
||||
}
|
||||
|
||||
// Upload a texture, going through a PBO. PBO supposedly can facilitate
|
||||
// asynchronous copy from CPU to GPU, so this is an optimization. Note that
|
||||
// changing format/type/tex_w/tex_h or reusing the PBO in the same frame can
|
||||
// ruin performance.
|
||||
// This call is like gl_upload_tex(), plus PBO management/use.
|
||||
// target, format, type, dataptr, stride, x, y, w, h: texture upload params
|
||||
// (see gl_upload_tex())
|
||||
// tex_w, tex_h: maximum size of the used texture
|
||||
// use_pbo: for convenience, if false redirects the call to gl_upload_tex
|
||||
void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
|
||||
GLenum target, GLenum format, GLenum type,
|
||||
int tex_w, int tex_h, const void *dataptr, int stride,
|
||||
int x, int y, int w, int h)
|
||||
{
|
||||
assert(x >= 0 && y >= 0 && w >= 0 && h >= 0);
|
||||
assert(x + w <= tex_w && y + h <= tex_h);
|
||||
|
||||
if (!use_pbo) {
|
||||
gl_upload_tex(gl, target, format, type, dataptr, stride, x, y, w, h);
|
||||
return;
|
||||
}
|
||||
|
||||
// We align the buffer size to 4096 to avoid possible subregion
|
||||
// dependencies. This is not a strict requirement (the spec requires no
|
||||
// alignment), but a good precaution for performance reasons
|
||||
size_t needed_size = stride * h;
|
||||
size_t buffer_size = MP_ALIGN_UP(needed_size, 4096);
|
||||
|
||||
if (buffer_size != pbo->buffer_size)
|
||||
gl_pbo_upload_uninit(pbo);
|
||||
|
||||
if (!pbo->buffer) {
|
||||
pbo->gl = gl;
|
||||
pbo->buffer_size = buffer_size;
|
||||
gl->GenBuffers(1, &pbo->buffer);
|
||||
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
|
||||
// Magic time: Because we memcpy once from RAM to the buffer, and then
|
||||
// the GPU needs to read from this anyway, we actually *don't* want
|
||||
// this buffer to be allocated in RAM. If we allocate it in VRAM
|
||||
// instead, we can reduce this to a single copy: from RAM into VRAM.
|
||||
// Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
|
||||
// allocated on host memory instead of device memory, so we lie about
|
||||
// the usage to fool the driver into giving us a buffer in VRAM instead
|
||||
// of RAM, which can be significantly faster for our use case.
|
||||
// Seriously, fuck OpenGL.
|
||||
gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
|
||||
NULL, GL_STREAM_COPY);
|
||||
}
|
||||
|
||||
uintptr_t offset = buffer_size * pbo->index;
|
||||
pbo->index = (pbo->index + 1) % NUM_PBO_BUFFERS;
|
||||
|
||||
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
|
||||
gl->BufferSubData(GL_PIXEL_UNPACK_BUFFER, offset, needed_size, dataptr);
|
||||
gl_upload_tex(gl, target, format, type, (void *)offset, stride, x, y, w, h);
|
||||
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo)
|
||||
{
|
||||
if (pbo->gl)
|
||||
pbo->gl->DeleteBuffers(1, &pbo->buffer);
|
||||
|
||||
*pbo = (struct gl_pbo_upload){0};
|
||||
}
|
||||
|
||||
int gl_get_fb_depth(GL *gl, int fbo)
|
||||
{
|
||||
if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB))
|
||||
|
@ -51,21 +51,6 @@ void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
|
||||
|
||||
void gl_set_debug_logger(GL *gl, struct mp_log *log);
|
||||
|
||||
#define NUM_PBO_BUFFERS 3
|
||||
|
||||
struct gl_pbo_upload {
|
||||
GL *gl;
|
||||
int index;
|
||||
GLuint buffer;
|
||||
size_t buffer_size;
|
||||
};
|
||||
|
||||
void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
|
||||
GLenum target, GLenum format, GLenum type,
|
||||
int tex_w, int tex_h, const void *dataptr, int stride,
|
||||
int x, int y, int w, int h);
|
||||
void gl_pbo_upload_uninit(struct gl_pbo_upload *pbo);
|
||||
|
||||
int gl_get_fb_depth(GL *gl, int fbo);
|
||||
|
||||
#endif
|
||||
|
@ -54,6 +54,7 @@ struct mpgl_osd_part {
|
||||
enum sub_bitmap_format format;
|
||||
int change_id;
|
||||
struct ra_tex *texture;
|
||||
struct tex_upload pbo;
|
||||
int w, h;
|
||||
int num_subparts;
|
||||
int prev_num_subparts;
|
||||
@ -70,6 +71,7 @@ struct mpgl_osd {
|
||||
const struct ra_format *fmt_table[SUBBITMAP_COUNT];
|
||||
bool formats[SUBBITMAP_COUNT];
|
||||
bool change_flag; // for reporting to API user only
|
||||
bool want_pbo;
|
||||
// temporary
|
||||
int stereo_mode;
|
||||
struct mp_osd_res osd_res;
|
||||
@ -77,7 +79,7 @@ struct mpgl_osd {
|
||||
};
|
||||
|
||||
struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
|
||||
struct osd_state *osd)
|
||||
struct osd_state *osd, bool want_pbo)
|
||||
{
|
||||
struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx);
|
||||
*ctx = (struct mpgl_osd) {
|
||||
@ -86,6 +88,7 @@ struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
|
||||
.ra = ra,
|
||||
.change_flag = true,
|
||||
.scratch = talloc_zero_size(ctx, 1),
|
||||
.want_pbo = want_pbo,
|
||||
};
|
||||
|
||||
ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1);
|
||||
@ -108,6 +111,7 @@ void mpgl_osd_destroy(struct mpgl_osd *ctx)
|
||||
for (int n = 0; n < MAX_OSD_PARTS; n++) {
|
||||
struct mpgl_osd_part *p = ctx->parts[n];
|
||||
ra_tex_free(ctx->ra, &p->texture);
|
||||
tex_upload_uninit(ctx->ra, &p->pbo);
|
||||
}
|
||||
talloc_free(ctx);
|
||||
}
|
||||
@ -161,18 +165,22 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd,
|
||||
.format = fmt,
|
||||
.render_src = true,
|
||||
.src_linear = true,
|
||||
.host_mutable = true,
|
||||
};
|
||||
osd->texture = ra_tex_create(ra, ¶ms);
|
||||
if (!osd->texture)
|
||||
goto done;
|
||||
}
|
||||
|
||||
struct mp_rect rc = {0, 0, imgs->packed_w, imgs->packed_h};
|
||||
ra->fns->tex_upload(ra, osd->texture, imgs->packed->planes[0],
|
||||
imgs->packed->stride[0], &rc, RA_TEX_UPLOAD_DISCARD,
|
||||
NULL);
|
||||
struct ra_tex_upload_params params = {
|
||||
.tex = osd->texture,
|
||||
.src = imgs->packed->planes[0],
|
||||
.invalidate = true,
|
||||
.rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h},
|
||||
.stride = imgs->packed->stride[0],
|
||||
};
|
||||
|
||||
ok = true;
|
||||
ok = tex_upload(ra, &osd->pbo, ctx->want_pbo, ¶ms);
|
||||
|
||||
done:
|
||||
return ok;
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "sub/osd.h"
|
||||
|
||||
struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
|
||||
struct osd_state *osd);
|
||||
struct osd_state *osd, bool want_pbo);
|
||||
void mpgl_osd_destroy(struct mpgl_osd *ctx);
|
||||
|
||||
void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts,
|
||||
|
@ -30,11 +30,6 @@ struct ra {
|
||||
// formats should have a lower index. (E.g. GLES3 should put rg8 before la.)
|
||||
struct ra_format **formats;
|
||||
int num_formats;
|
||||
|
||||
// GL-specific: if set, accelerate texture upload by using an additional
|
||||
// buffer (i.e. uses more memory). Does not affect uploads done by
|
||||
// ra_tex_create (if initial_data is set). Set by the RA user.
|
||||
bool use_pbo;
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -42,7 +37,7 @@ enum {
|
||||
RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader inputs)
|
||||
RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit
|
||||
RA_CAP_COMPUTE = 1 << 3, // supports compute shaders
|
||||
RA_CAP_PBO = 1 << 4, // supports ra.use_pbo
|
||||
RA_CAP_DIRECT_UPLOAD = 1 << 4, // supports tex_upload without ra_buf
|
||||
RA_CAP_BUF_RW = 1 << 5, // supports RA_VARTYPE_BUF_RW
|
||||
RA_CAP_NESTED_ARRAY = 1 << 6, // supports nested arrays
|
||||
};
|
||||
@ -92,6 +87,7 @@ struct ra_tex_params {
|
||||
bool render_dst; // must be useable as target texture in a shader
|
||||
bool blit_src; // must be usable as a blit source
|
||||
bool blit_dst; // must be usable as a blit destination
|
||||
bool host_mutable; // texture may be updated with tex_upload
|
||||
// When used as render source texture.
|
||||
bool src_linear; // if false, use nearest sampling (whether this can
|
||||
// be true depends on ra_format.linear_filter)
|
||||
@ -100,8 +96,9 @@ struct ra_tex_params {
|
||||
bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy
|
||||
// always set to false, except in OSX code
|
||||
bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy
|
||||
// If non-NULL, the texture will be created with these contents, and is
|
||||
// considered immutable afterwards (no upload, mapping, or rendering to it).
|
||||
// If non-NULL, the texture will be created with these contents. Using
|
||||
// this does *not* require setting host_mutable. Otherwise, the initial
|
||||
// data is undefined.
|
||||
void *initial_data;
|
||||
};
|
||||
|
||||
@ -118,6 +115,19 @@ struct ra_tex {
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct ra_tex_upload_params {
|
||||
struct ra_tex *tex; // Texture to upload to
|
||||
bool invalidate; // Discard pre-existing data not in the region uploaded
|
||||
// Uploading from buffer:
|
||||
struct ra_buf *buf; // Buffer to upload from (mutually exclusive with `src`)
|
||||
size_t buf_offset; // Start of data within buffer (bytes)
|
||||
// Uploading directly: (requires RA_CAP_DIRECT_UPLOAD)
|
||||
const void *src; // Address of data
|
||||
// For 2D textures only:
|
||||
struct mp_rect *rc; // Region to upload. NULL means entire image
|
||||
ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!)
|
||||
};
|
||||
|
||||
// Buffer type hint. Setting this may result in more or less efficient
|
||||
// operation, although it shouldn't technically prohibit anything
|
||||
enum ra_buf_type {
|
||||
@ -129,8 +139,8 @@ enum ra_buf_type {
|
||||
struct ra_buf_params {
|
||||
enum ra_buf_type type;
|
||||
size_t size;
|
||||
// Creates a read-writable persistent mapping (ra_buf.data)
|
||||
bool host_mapped;
|
||||
bool host_mapped; // create a read-writable persistent mapping (ra_buf.data)
|
||||
bool host_mutable; // contents may be updated via buf_update()
|
||||
// If non-NULL, the buffer will be created with these contents. Otherwise,
|
||||
// the initial data is undefined.
|
||||
void *initial_data;
|
||||
@ -288,11 +298,6 @@ struct ra_renderpass_run_params {
|
||||
int compute_groups[3];
|
||||
};
|
||||
|
||||
enum {
|
||||
// Flags for the texture_upload flags parameter.
|
||||
RA_TEX_UPLOAD_DISCARD = 1 << 0, // discard pre-existing data not in the region
|
||||
};
|
||||
|
||||
// This is an opaque type provided by the implementation, but we want to at
|
||||
// least give it a saner name than void* for code readability purposes.
|
||||
typedef void ra_timer;
|
||||
@ -311,27 +316,13 @@ struct ra_fns {
|
||||
|
||||
void (*tex_destroy)(struct ra *ra, struct ra_tex *tex);
|
||||
|
||||
// Copy from CPU RAM to the texture. This is an extremely common operation.
|
||||
// Unlike with OpenGL, the src data has to have exactly the same format as
|
||||
// the texture, and no conversion is supported.
|
||||
// region can be NULL - if it's not NULL, then the provided pointer only
|
||||
// contains data for the given region. Only part of the texture data is
|
||||
// updated, and ptr points to the first pixel in the region. If
|
||||
// RA_TEX_UPLOAD_DISCARD is set, data outside of the region can return to
|
||||
// an uninitialized state. The region is always strictly within the texture
|
||||
// and has a size >0 in both dimensions. 2D textures only.
|
||||
// For 1D textures, stride is ignored, and region must be NULL.
|
||||
// For 3D textures, stride is not supported. All data is fully packed with
|
||||
// no padding, and stride is ignored, and region must be NULL.
|
||||
// If buf is not NULL, then src must be within the provided buffer. The
|
||||
// operation is implied to have dramatically better performance, but
|
||||
// requires correct flushing and fencing operations by the caller to deal
|
||||
// with asynchronous host/GPU behavior. If any of these conditions are not
|
||||
// met, undefined behavior will result.
|
||||
void (*tex_upload)(struct ra *ra, struct ra_tex *tex,
|
||||
const void *src, ptrdiff_t stride,
|
||||
struct mp_rect *region, uint64_t flags,
|
||||
struct ra_buf *buf);
|
||||
// Copy the contents of a buffer to a texture. This is an extremely common
|
||||
// operation. The contents of the buffer must exactly match the format of
|
||||
// the image - conversions between bit depth etc. are not supported.
|
||||
// The buffer *may* be marked as "in use" while this operation is going on,
|
||||
// and the contents must not be touched again by the API user until
|
||||
// buf_poll returns true.
|
||||
void (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params);
|
||||
|
||||
// Create a buffer. This can be used as a persistently mapped buffer,
|
||||
// a uniform buffer, a shader storage buffer or possibly others.
|
||||
@ -341,13 +332,18 @@ struct ra_fns {
|
||||
|
||||
void (*buf_destroy)(struct ra *ra, struct ra_buf *buf);
|
||||
|
||||
// Essentially a fence: once the GPU uses the mapping for read-access (e.g.
|
||||
// by starting a texture upload), the host must not write to the mapped
|
||||
// data until an internal object has been signalled. This call returns
|
||||
// whether it was signalled yet. If true, write accesses are allowed again.
|
||||
// Optional, may be NULL if unavailable. This is only usable for buffers
|
||||
// which have been persistently mapped.
|
||||
bool (*poll_mapped_buffer)(struct ra *ra, struct ra_buf *buf);
|
||||
// Update the contents of a buffer, starting at a given offset and up to a
|
||||
// given size, with the contents of *data. This is an extremely common
|
||||
// operation. Calling this while the buffer is considered "in use" is an
|
||||
// error. (See: buf_poll)
|
||||
void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
|
||||
const void *data, size_t size);
|
||||
|
||||
// Returns if a buffer is currently "in use" or not. Updating the contents
|
||||
// of a buffer (via buf_update or writing to buf->data) while it is still
|
||||
// in use is an error and may result in graphical corruption. Optional, if
|
||||
// NULL then all buffers are always usable.
|
||||
bool (*buf_poll)(struct ra *ra, struct ra_buf *buf);
|
||||
|
||||
// Clear the dst with the given color (rgba) and within the given scissor.
|
||||
// dst must have dst->params.render_dst==true. Content outside of the
|
||||
|
@ -23,11 +23,11 @@ struct ra_tex_gl {
|
||||
GLint internal_format;
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
struct gl_pbo_upload pbo;
|
||||
};
|
||||
|
||||
// For ra_buf.priv
|
||||
struct ra_buf_gl {
|
||||
GLenum target;
|
||||
GLuint buffer;
|
||||
GLsync fence;
|
||||
};
|
||||
@ -90,7 +90,7 @@ static int ra_init_gl(struct ra *ra, GL *gl)
|
||||
ra_gl_set_debug(ra, true);
|
||||
|
||||
ra->fns = &ra_fns_gl;
|
||||
ra->caps = 0;
|
||||
ra->caps = RA_CAP_DIRECT_UPLOAD;
|
||||
if (gl->mpgl_caps & MPGL_CAP_1D_TEX)
|
||||
ra->caps |= RA_CAP_TEX_1D;
|
||||
if (gl->mpgl_caps & MPGL_CAP_3D_TEX)
|
||||
@ -99,8 +99,6 @@ static int ra_init_gl(struct ra *ra, GL *gl)
|
||||
ra->caps |= RA_CAP_BLIT;
|
||||
if (gl->mpgl_caps & MPGL_CAP_COMPUTE_SHADER)
|
||||
ra->caps |= RA_CAP_COMPUTE;
|
||||
if (gl->MapBufferRange)
|
||||
ra->caps |= RA_CAP_PBO;
|
||||
if (gl->mpgl_caps & MPGL_CAP_NESTED_ARRAY)
|
||||
ra->caps |= RA_CAP_NESTED_ARRAY;
|
||||
if (gl->mpgl_caps & MPGL_CAP_SSBO)
|
||||
@ -226,7 +224,6 @@ static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex)
|
||||
|
||||
gl->DeleteTextures(1, &tex_gl->texture);
|
||||
}
|
||||
gl_pbo_upload_uninit(&tex_gl->pbo);
|
||||
talloc_free(tex_gl);
|
||||
talloc_free(tex);
|
||||
}
|
||||
@ -427,40 +424,42 @@ bool ra_is_gl(struct ra *ra)
|
||||
return ra->fns == &ra_fns_gl;
|
||||
}
|
||||
|
||||
static void gl_tex_upload(struct ra *ra, struct ra_tex *tex,
|
||||
const void *src, ptrdiff_t stride,
|
||||
struct mp_rect *rc, uint64_t flags,
|
||||
struct ra_buf *buf)
|
||||
static void gl_tex_upload(struct ra *ra,
|
||||
const struct ra_tex_upload_params *params)
|
||||
{
|
||||
GL *gl = ra_gl_get(ra);
|
||||
struct ra_tex *tex = params->tex;
|
||||
struct ra_buf *buf = params->buf;
|
||||
struct ra_tex_gl *tex_gl = tex->priv;
|
||||
struct ra_buf_gl *buf_gl = NULL;
|
||||
struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
|
||||
struct ra_buf_gl *buf_gl = buf ? buf->priv : NULL;
|
||||
assert(tex->params.host_mutable);
|
||||
assert(!params->buf || !params->src);
|
||||
|
||||
const void *src = params->src;
|
||||
if (buf) {
|
||||
buf_gl = buf->priv;
|
||||
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer);
|
||||
src = (void *)((uintptr_t)src - (uintptr_t)buf->data);
|
||||
src = (void *)params->buf_offset;
|
||||
}
|
||||
|
||||
gl->BindTexture(tex_gl->target, tex_gl->texture);
|
||||
if (params->invalidate && gl->InvalidateTexImage)
|
||||
gl->InvalidateTexImage(tex_gl->texture, 0);
|
||||
|
||||
switch (tex->params.dimensions) {
|
||||
case 1:
|
||||
assert(!rc);
|
||||
gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format,
|
||||
tex->params.w, 0, tex_gl->format, tex_gl->type, src);
|
||||
break;
|
||||
case 2:
|
||||
if (!rc)
|
||||
rc = &full;
|
||||
gl_pbo_upload_tex(&tex_gl->pbo, gl, ra->use_pbo && !buf,
|
||||
tex_gl->target, tex_gl->format, tex_gl->type,
|
||||
tex->params.w, tex->params.h, src, stride,
|
||||
rc->x0, rc->y0, rc->x1 - rc->x0, rc->y1 - rc->y0);
|
||||
case 2: {
|
||||
struct mp_rect rc = {0, 0, tex->params.w, tex->params.h};
|
||||
if (params->rc)
|
||||
rc = *params->rc;
|
||||
gl_upload_tex(gl, tex_gl->target, tex_gl->format, tex_gl->type,
|
||||
src, params->stride, rc.x0, rc.y0, rc.x1 - rc.x0,
|
||||
rc.y1 - rc.y0);
|
||||
break;
|
||||
}
|
||||
case 3:
|
||||
assert(!rc);
|
||||
gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w,
|
||||
tex->params.h, tex->params.d, 0, tex_gl->format,
|
||||
@ -473,11 +472,13 @@ static void gl_tex_upload(struct ra *ra, struct ra_tex *tex,
|
||||
|
||||
if (buf) {
|
||||
gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
// Make sure the PBO is not reused until GL is done with it. If a
|
||||
// previous operation is pending, "update" it by creating a new
|
||||
// fence that will cover the previous operation as well.
|
||||
gl->DeleteSync(buf_gl->fence);
|
||||
buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
if (buf->params.host_mapped) {
|
||||
// Make sure the PBO is not reused until GL is done with it. If a
|
||||
// previous operation is pending, "update" it by creating a new
|
||||
// fence that will cover the previous operation as well.
|
||||
gl->DeleteSync(buf_gl->fence);
|
||||
buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -491,10 +492,9 @@ static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf)
|
||||
|
||||
gl->DeleteSync(buf_gl->fence);
|
||||
if (buf->data) {
|
||||
// The target type used here doesn't matter at all to OpenGL
|
||||
gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer);
|
||||
gl->UnmapBuffer(GL_ARRAY_BUFFER);
|
||||
gl->BindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
gl->BindBuffer(buf_gl->target, buf_gl->buffer);
|
||||
gl->UnmapBuffer(buf_gl->target);
|
||||
gl->BindBuffer(buf_gl->target, 0);
|
||||
}
|
||||
gl->DeleteBuffers(1, &buf_gl->buffer);
|
||||
|
||||
@ -517,14 +517,13 @@ static struct ra_buf *gl_buf_create(struct ra *ra,
|
||||
struct ra_buf_gl *buf_gl = buf->priv = talloc_zero(NULL, struct ra_buf_gl);
|
||||
gl->GenBuffers(1, &buf_gl->buffer);
|
||||
|
||||
GLenum target;
|
||||
switch (params->type) {
|
||||
case RA_BUF_TYPE_TEX_UPLOAD: target = GL_PIXEL_UNPACK_BUFFER; break;
|
||||
case RA_BUF_TYPE_SHADER_STORAGE: target = GL_SHADER_STORAGE_BUFFER; break;
|
||||
case RA_BUF_TYPE_TEX_UPLOAD: buf_gl->target = GL_PIXEL_UNPACK_BUFFER; break;
|
||||
case RA_BUF_TYPE_SHADER_STORAGE: buf_gl->target = GL_SHADER_STORAGE_BUFFER; break;
|
||||
default: abort();
|
||||
};
|
||||
|
||||
gl->BindBuffer(target, buf_gl->buffer);
|
||||
gl->BindBuffer(buf_gl->target, buf_gl->buffer);
|
||||
|
||||
if (params->host_mapped) {
|
||||
unsigned flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT |
|
||||
@ -534,8 +533,9 @@ static struct ra_buf *gl_buf_create(struct ra *ra,
|
||||
if (params->type == RA_BUF_TYPE_TEX_UPLOAD)
|
||||
storflags |= GL_CLIENT_STORAGE_BIT;
|
||||
|
||||
gl->BufferStorage(target, params->size, params->initial_data, storflags);
|
||||
buf->data = gl->MapBufferRange(target, 0, params->size, flags);
|
||||
gl->BufferStorage(buf_gl->target, params->size, params->initial_data,
|
||||
storflags);
|
||||
buf->data = gl->MapBufferRange(buf_gl->target, 0, params->size, flags);
|
||||
if (!buf->data) {
|
||||
gl_check_error(gl, ra->log, "mapping buffer");
|
||||
gl_buf_destroy(ra, buf);
|
||||
@ -549,16 +549,31 @@ static struct ra_buf *gl_buf_create(struct ra *ra,
|
||||
default: abort();
|
||||
}
|
||||
|
||||
gl->BufferData(target, params->size, params->initial_data, hint);
|
||||
gl->BufferData(buf_gl->target, params->size, params->initial_data, hint);
|
||||
}
|
||||
|
||||
gl->BindBuffer(target, 0);
|
||||
gl->BindBuffer(buf_gl->target, 0);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static bool gl_poll_mapped_buffer(struct ra *ra, struct ra_buf *buf)
|
||||
static void gl_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
assert(buf->data);
|
||||
GL *gl = ra_gl_get(ra);
|
||||
struct ra_buf_gl *buf_gl = buf->priv;
|
||||
assert(buf->params.host_mutable);
|
||||
|
||||
gl->BindBuffer(buf_gl->target, buf_gl->buffer);
|
||||
gl->BufferSubData(buf_gl->target, offset, size, data);
|
||||
gl->BindBuffer(buf_gl->target, 0);
|
||||
}
|
||||
|
||||
static bool gl_buf_poll(struct ra *ra, struct ra_buf *buf)
|
||||
{
|
||||
// Non-persistently mapped buffers are always implicitly reusable in OpenGL,
|
||||
// the implementation will create more buffers under the hood if needed.
|
||||
if (!buf->data)
|
||||
return true;
|
||||
|
||||
GL *gl = ra_gl_get(ra);
|
||||
struct ra_buf_gl *buf_gl = buf->priv;
|
||||
@ -1080,7 +1095,8 @@ static struct ra_fns ra_fns_gl = {
|
||||
.tex_upload = gl_tex_upload,
|
||||
.buf_create = gl_buf_create,
|
||||
.buf_destroy = gl_buf_destroy,
|
||||
.poll_mapped_buffer = gl_poll_mapped_buffer,
|
||||
.buf_update = gl_buf_update,
|
||||
.buf_poll = gl_buf_poll,
|
||||
.clear = gl_clear,
|
||||
.blit = gl_blit,
|
||||
.renderpass_create = gl_renderpass_create,
|
||||
|
@ -120,6 +120,66 @@ void fbotex_uninit(struct fbotex *fbo)
|
||||
}
|
||||
}
|
||||
|
||||
bool tex_upload(struct ra *ra, struct tex_upload *pbo, bool want_pbo,
|
||||
const struct ra_tex_upload_params *params)
|
||||
{
|
||||
if (!(ra->caps & RA_CAP_DIRECT_UPLOAD))
|
||||
want_pbo = true;
|
||||
|
||||
if (!want_pbo || params->buf) {
|
||||
ra->fns->tex_upload(ra, params);
|
||||
return true;
|
||||
}
|
||||
|
||||
struct ra_tex *tex = params->tex;
|
||||
size_t row_size = tex->params.dimensions == 2 ? params->stride :
|
||||
tex->params.w * tex->params.format->pixel_size;
|
||||
size_t needed_size = row_size * tex->params.h * tex->params.d;
|
||||
|
||||
if (needed_size > pbo->buffer_size)
|
||||
tex_upload_uninit(ra, pbo);
|
||||
|
||||
if (!pbo->buffers[0]) {
|
||||
struct ra_buf_params bufparams = {
|
||||
.type = RA_BUF_TYPE_TEX_UPLOAD,
|
||||
.size = needed_size,
|
||||
.host_mutable = true,
|
||||
};
|
||||
|
||||
pbo->buffer_size = bufparams.size;
|
||||
for (int i = 0; i < NUM_PBO_BUFFERS; i++) {
|
||||
pbo->buffers[i] = ra_buf_create(ra, &bufparams);
|
||||
if (!pbo->buffers[i])
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct ra_buf *buf = pbo->buffers[pbo->index++];
|
||||
pbo->index %= NUM_PBO_BUFFERS;
|
||||
|
||||
if (!ra->fns->buf_poll(ra, buf)) {
|
||||
MP_WARN(ra, "Texture upload buffer was not free to use! Try "
|
||||
"increasing NUM_PBO_BUFFERS.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
ra->fns->buf_update(ra, buf, 0, params->src, needed_size);
|
||||
|
||||
struct ra_tex_upload_params newparams = *params;
|
||||
newparams.buf = buf;
|
||||
newparams.src = NULL;
|
||||
|
||||
ra->fns->tex_upload(ra, &newparams);
|
||||
return true;
|
||||
}
|
||||
|
||||
void tex_upload_uninit(struct ra *ra, struct tex_upload *pbo)
|
||||
{
|
||||
for (int i = 0; i < NUM_PBO_BUFFERS; i++)
|
||||
ra_buf_free(ra, &pbo->buffers[i]);
|
||||
*pbo = (struct tex_upload){0};
|
||||
}
|
||||
|
||||
struct timer_pool {
|
||||
struct ra *ra;
|
||||
ra_timer *timer;
|
||||
|
@ -83,6 +83,21 @@ bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log,
|
||||
#define FBOTEX_FUZZY_H 2
|
||||
#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H)
|
||||
|
||||
#define NUM_PBO_BUFFERS 3
|
||||
|
||||
// A wrapper around tex_upload that uses PBOs internally if requested or
|
||||
// required
|
||||
struct tex_upload {
|
||||
size_t buffer_size;
|
||||
struct ra_buf *buffers[NUM_PBO_BUFFERS];
|
||||
int index;
|
||||
};
|
||||
|
||||
bool tex_upload(struct ra *ra, struct tex_upload *pbo, bool want_pbo,
|
||||
const struct ra_tex_upload_params *params);
|
||||
|
||||
void tex_upload_uninit(struct ra *ra, struct tex_upload *pbo);
|
||||
|
||||
// A wrapper around ra_timer that does result pooling, averaging etc.
|
||||
struct timer_pool;
|
||||
|
||||
|
@ -84,6 +84,7 @@ static const struct ra_renderpass_input vertex_vao[] = {
|
||||
|
||||
struct texplane {
|
||||
struct ra_tex *tex;
|
||||
struct tex_upload pbo;
|
||||
int w, h;
|
||||
bool flipped;
|
||||
};
|
||||
@ -493,7 +494,7 @@ static void reinit_osd(struct gl_video *p)
|
||||
mpgl_osd_destroy(p->osd);
|
||||
p->osd = NULL;
|
||||
if (p->osd_state)
|
||||
p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state);
|
||||
p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state, p->opts.pbo);
|
||||
}
|
||||
|
||||
static void uninit_rendering(struct gl_video *p)
|
||||
@ -882,6 +883,7 @@ static void init_video(struct gl_video *p)
|
||||
.render_src = true,
|
||||
.src_linear = format->linear_filter,
|
||||
.non_normalized = p->opts.use_rectangle,
|
||||
.host_mutable = true,
|
||||
};
|
||||
|
||||
MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n,
|
||||
@ -935,7 +937,7 @@ again:;
|
||||
if (!buffer->mpi)
|
||||
continue;
|
||||
|
||||
bool res = p->ra->fns->poll_mapped_buffer(p->ra, buffer->buf);
|
||||
bool res = p->ra->fns->buf_poll(p->ra, buffer->buf);
|
||||
if (res || force) {
|
||||
// Unreferencing the image could cause gl_video_dr_free_buffer()
|
||||
// to be called by the talloc destructor (if it was the last
|
||||
@ -984,8 +986,8 @@ static void uninit_video(struct gl_video *p)
|
||||
|
||||
for (int n = 0; n < p->plane_count; n++) {
|
||||
struct texplane *plane = &vimg->planes[n];
|
||||
|
||||
ra_tex_free(p->ra, &plane->tex);
|
||||
tex_upload_uninit(p->ra, &plane->pbo);
|
||||
}
|
||||
*vimg = (struct video_image){0};
|
||||
|
||||
@ -3269,19 +3271,33 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t
|
||||
|
||||
plane->flipped = mpi->stride[0] < 0;
|
||||
|
||||
struct ra_tex_upload_params params = {
|
||||
.tex = plane->tex,
|
||||
.src = mpi->planes[n],
|
||||
.invalidate = true,
|
||||
.stride = mpi->stride[n],
|
||||
};
|
||||
|
||||
struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]);
|
||||
|
||||
p->ra->fns->tex_upload(p->ra, plane->tex, mpi->planes[n],
|
||||
mpi->stride[n], NULL, 0,
|
||||
mapped ? mapped->buf : NULL);
|
||||
|
||||
if (mapped && !mapped->mpi)
|
||||
mapped->mpi = mp_image_new_ref(mpi);
|
||||
if (mapped) {
|
||||
params.buf = mapped->buf;
|
||||
params.buf_offset = (uintptr_t)params.src -
|
||||
(uintptr_t)mapped->buf->data;
|
||||
params.src = NULL;
|
||||
}
|
||||
|
||||
if (p->using_dr_path != !!mapped) {
|
||||
p->using_dr_path = !!mapped;
|
||||
MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
|
||||
}
|
||||
|
||||
if (!tex_upload(p->ra, &plane->pbo, p->opts.pbo, ¶ms)) {
|
||||
timer_pool_stop(p->upload_timer);
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (mapped && !mapped->mpi)
|
||||
mapped->mpi = mp_image_new_ref(mpi);
|
||||
}
|
||||
timer_pool_stop(p->upload_timer);
|
||||
const char *mode = p->using_dr_path ? "DR" : p->opts.pbo ? "PBO" : "naive";
|
||||
@ -3367,11 +3383,6 @@ static void check_gl_features(struct gl_video *p)
|
||||
}
|
||||
}
|
||||
|
||||
if (!(ra->caps & RA_CAP_PBO) && p->opts.pbo) {
|
||||
p->opts.pbo = 0;
|
||||
MP_WARN(p, "Disabling PBOs (GL2.1/GLES2 unsupported).\n");
|
||||
}
|
||||
|
||||
p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg;
|
||||
bool voluntarily_dumb = check_dumb_mode(p);
|
||||
if (p->forced_dumb_mode || voluntarily_dumb) {
|
||||
@ -3628,7 +3639,6 @@ static void reinit_from_options(struct gl_video *p)
|
||||
check_gl_features(p);
|
||||
uninit_rendering(p);
|
||||
gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir);
|
||||
p->ra->use_pbo = p->opts.pbo;
|
||||
gl_video_setup_hooks(p);
|
||||
reinit_osd(p);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user