2013-10-01 04:37:13 +02:00
|
|
|
/******************************************************************************
|
2023-05-19 02:37:26 +02:00
|
|
|
Copyright (C) 2023 by Lain Bailey <lain@obsproject.com>
|
2013-10-01 04:37:13 +02:00
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2013-12-03 06:24:38 +01:00
|
|
|
the Free Software Foundation, either version 2 of the License, or
|
2013-10-01 04:37:13 +02:00
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
******************************************************************************/
|
|
|
|
|
2017-10-04 03:48:12 +02:00
|
|
|
#include <time.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2013-10-01 04:37:13 +02:00
|
|
|
#include "obs.h"
|
2014-01-27 02:48:14 +01:00
|
|
|
#include "obs-internal.h"
|
2013-10-01 04:37:13 +02:00
|
|
|
#include "graphics/vec4.h"
|
2014-02-09 13:51:06 +01:00
|
|
|
#include "media-io/format-conversion.h"
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
#include "media-io/video-frame.h"
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2019-11-28 01:38:35 +01:00
|
|
|
#ifdef _WIN32
|
|
|
|
#define WIN32_MEAN_AND_LEAN
|
|
|
|
#include <windows.h>
|
|
|
|
#endif
|
|
|
|
|
2014-04-19 15:33:11 +02:00
|
|
|
static uint64_t tick_sources(uint64_t cur_time, uint64_t last_time)
|
2013-10-01 04:37:13 +02:00
|
|
|
{
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
struct obs_core_data *data = &obs->data;
|
2019-06-23 07:13:45 +02:00
|
|
|
struct obs_source *source;
|
|
|
|
uint64_t delta_time;
|
|
|
|
float seconds;
|
2013-10-01 04:37:13 +02:00
|
|
|
|
|
|
|
if (!last_time)
|
2022-06-15 19:07:50 +02:00
|
|
|
last_time = cur_time - obs->video.video_frame_interval_ns;
|
2014-08-06 00:07:54 +02:00
|
|
|
|
2014-04-19 15:33:11 +02:00
|
|
|
delta_time = cur_time - last_time;
|
2013-10-01 04:37:13 +02:00
|
|
|
seconds = (float)((double)delta_time / 1000000000.0);
|
|
|
|
|
2017-12-07 08:13:56 +01:00
|
|
|
/* ------------------------------------- */
|
|
|
|
/* call tick callbacks */
|
|
|
|
|
2023-06-09 08:51:45 +02:00
|
|
|
pthread_mutex_lock(&data->draw_callbacks_mutex);
|
2017-12-07 08:13:56 +01:00
|
|
|
|
2023-06-09 08:51:45 +02:00
|
|
|
for (size_t i = data->tick_callbacks.num; i > 0; i--) {
|
2017-12-07 08:13:56 +01:00
|
|
|
struct tick_callback *callback;
|
2023-06-09 08:51:45 +02:00
|
|
|
callback = data->tick_callbacks.array + (i - 1);
|
2017-12-07 08:13:56 +01:00
|
|
|
callback->tick(callback->param, seconds);
|
|
|
|
}
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2023-06-09 08:51:45 +02:00
|
|
|
pthread_mutex_unlock(&data->draw_callbacks_mutex);
|
2017-12-07 08:13:56 +01:00
|
|
|
|
|
|
|
/* ------------------------------------- */
|
2023-06-09 08:51:45 +02:00
|
|
|
/* get an array of all sources to tick */
|
|
|
|
|
2023-06-10 23:30:44 +02:00
|
|
|
da_clear(data->sources_to_tick);
|
2017-12-07 08:13:56 +01:00
|
|
|
|
|
|
|
pthread_mutex_lock(&data->sources_mutex);
|
|
|
|
|
2023-02-09 08:34:08 +01:00
|
|
|
source = data->sources;
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
while (source) {
|
2021-12-19 18:55:51 +01:00
|
|
|
obs_source_t *s = obs_source_get_ref(source);
|
2023-06-09 08:51:45 +02:00
|
|
|
if (s)
|
|
|
|
da_push_back(data->sources_to_tick, &s);
|
2023-02-09 08:34:08 +01:00
|
|
|
source = (struct obs_source *)source->context.hh_uuid.next;
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&data->sources_mutex);
|
|
|
|
|
2023-06-09 08:51:45 +02:00
|
|
|
/* ------------------------------------- */
|
|
|
|
/* call the tick function of each source */
|
|
|
|
|
|
|
|
for (size_t i = 0; i < data->sources_to_tick.num; i++) {
|
|
|
|
obs_source_t *s = data->sources_to_tick.array[i];
|
2023-08-13 15:28:46 +02:00
|
|
|
const uint64_t start = source_profiler_source_tick_start();
|
2023-06-09 08:51:45 +02:00
|
|
|
obs_source_video_tick(s, seconds);
|
2023-08-13 15:28:46 +02:00
|
|
|
source_profiler_source_tick_end(s, start);
|
2023-06-09 08:51:45 +02:00
|
|
|
obs_source_release(s);
|
|
|
|
}
|
|
|
|
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
return cur_time;
|
2013-10-01 04:37:13 +02:00
|
|
|
}
|
|
|
|
|
2014-02-13 16:58:31 +01:00
|
|
|
/* in obs-display.c */
|
|
|
|
extern void render_display(struct obs_display *display);
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2013-10-19 05:25:13 +02:00
|
|
|
static inline void render_displays(void)
|
|
|
|
{
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
struct obs_display *display;
|
|
|
|
|
2014-01-24 01:00:42 +01:00
|
|
|
if (!obs->data.valid)
|
|
|
|
return;
|
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_enter_context(obs->video.graphics);
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2013-11-20 23:00:16 +01:00
|
|
|
/* render extra displays/swaps */
|
|
|
|
pthread_mutex_lock(&obs->data.displays_mutex);
|
2013-10-19 05:25:13 +02:00
|
|
|
|
libobs: Add services API, reduce repeated code
Add API for streaming services. The services API simplifies the
creation of custom service features and user interface.
Custom streaming services later on will be able to do things such as:
- Be able to use service-specific APIs via modules, allowing a more
direct means of communicating with the service and requesting or
setting service-specific information
- Get URL/stream key via other means of authentication such as OAuth,
or be able to build custom URLs for services that require that sort
of thing.
- Query information (such as viewer count, chat, follower
notifications, and other information)
- Set channel information (such as current game, current channel title,
activating commercials)
Also, I reduce some repeated code that was used for all libobs objects.
This includes the name of the object, the private data, settings, as
well as the signal and procedure handlers.
I also switched to using linked lists for the global object lists,
rather than using an array of pointers (you could say it was..
pointless.) ..Anyway, the linked list info is also stored in the shared
context data structure.
2014-04-20 05:38:53 +02:00
|
|
|
display = obs->data.first_display;
|
|
|
|
while (display) {
|
|
|
|
render_display(display);
|
|
|
|
display = display->next;
|
|
|
|
}
|
2013-10-19 05:25:13 +02:00
|
|
|
|
2013-11-20 23:00:16 +01:00
|
|
|
pthread_mutex_unlock(&obs->data.displays_mutex);
|
2013-10-19 05:25:13 +02:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_leave_context();
|
2013-10-19 05:25:13 +02:00
|
|
|
}
|
|
|
|
|
2014-02-06 04:36:21 +01:00
|
|
|
static inline void set_render_size(uint32_t width, uint32_t height)
|
2013-10-01 04:37:13 +02:00
|
|
|
{
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_enable_depth_test(false);
|
|
|
|
gs_set_cull_mode(GS_NEITHER);
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2014-02-06 04:36:21 +01:00
|
|
|
gs_ortho(0.0f, (float)width, 0.0f, (float)height, -100.0f, 100.0f);
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_set_viewport(0, 0, width, height);
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void unmap_last_surface(struct obs_core_video_mix *video)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2019-07-27 08:21:41 +02:00
|
|
|
for (int c = 0; c < NUM_CHANNELS; ++c) {
|
|
|
|
if (video->mapped_surfaces[c]) {
|
|
|
|
gs_stagesurface_unmap(video->mapped_surfaces[c]);
|
|
|
|
video->mapped_surfaces[c] = NULL;
|
|
|
|
}
|
2013-10-01 04:37:13 +02:00
|
|
|
}
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2023-11-28 14:42:06 +01:00
|
|
|
static inline bool can_reuse_mix_texture(const struct obs_core_video_mix *mix,
|
|
|
|
size_t *idx)
|
|
|
|
{
|
|
|
|
for (size_t i = 0, num = obs->video.mixes.num; i < num; i++) {
|
|
|
|
const struct obs_core_video_mix *other =
|
|
|
|
obs->video.mixes.array[i];
|
|
|
|
if (other == mix)
|
|
|
|
break;
|
|
|
|
if (other->view != mix->view)
|
|
|
|
continue;
|
|
|
|
if (other->render_space != mix->render_space)
|
|
|
|
continue;
|
|
|
|
if (other->ovi.base_width != mix->ovi.base_width ||
|
|
|
|
other->ovi.base_height != mix->ovi.base_height)
|
|
|
|
continue;
|
|
|
|
if (!other->texture_rendered)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
*idx = i;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void draw_mix_texture(const size_t mix_idx)
|
|
|
|
{
|
|
|
|
gs_texture_t *tex = obs->video.mixes.array[mix_idx]->render_texture;
|
|
|
|
gs_effect_t *effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
|
|
|
|
gs_eparam_t *param = gs_effect_get_param_by_name(effect, "image");
|
|
|
|
gs_effect_set_texture_srgb(param, tex);
|
|
|
|
|
|
|
|
gs_enable_framebuffer_srgb(true);
|
|
|
|
while (gs_effect_loop(effect, "Draw"))
|
|
|
|
gs_draw_sprite(tex, 0, 0, 0);
|
|
|
|
gs_enable_framebuffer_srgb(false);
|
|
|
|
}
|
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
static const char *render_main_texture_name = "render_main_texture";
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void render_main_texture(struct obs_core_video_mix *video)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2022-11-10 06:05:32 +01:00
|
|
|
uint32_t base_width = video->ovi.base_width;
|
|
|
|
uint32_t base_height = video->ovi.base_height;
|
2022-06-01 23:34:13 +02:00
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_start(render_main_texture_name);
|
2019-04-03 08:23:37 +02:00
|
|
|
GS_DEBUG_MARKER_BEGIN(GS_DEBUG_COLOR_MAIN_TEXTURE,
|
2019-06-23 07:13:45 +02:00
|
|
|
render_main_texture_name);
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2014-02-06 04:36:21 +01:00
|
|
|
struct vec4 clear_color;
|
2019-04-25 17:36:41 +02:00
|
|
|
vec4_set(&clear_color, 0.0f, 0.0f, 0.0f, 0.0f);
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2022-03-15 07:50:45 +01:00
|
|
|
gs_set_render_target_with_color_space(video->render_texture, NULL,
|
|
|
|
video->render_space);
|
2014-02-06 04:36:21 +01:00
|
|
|
gs_clear(GS_CLEAR_COLOR, &clear_color, 1.0f, 0);
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
set_render_size(base_width, base_height);
|
2017-04-24 12:22:19 +02:00
|
|
|
|
|
|
|
pthread_mutex_lock(&obs->data.draw_callbacks_mutex);
|
|
|
|
|
2017-12-07 08:13:56 +01:00
|
|
|
for (size_t i = obs->data.draw_callbacks.num; i > 0; i--) {
|
2022-09-10 20:29:54 +02:00
|
|
|
struct draw_callback *const callback =
|
|
|
|
obs->data.draw_callbacks.array + (i - 1);
|
2022-06-15 19:07:50 +02:00
|
|
|
callback->draw(callback->param, base_width, base_height);
|
2017-04-24 12:22:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&obs->data.draw_callbacks_mutex);
|
|
|
|
|
2023-11-28 14:42:06 +01:00
|
|
|
/* In some cases we can reuse a previous mix's texture and save re-rendering everything */
|
|
|
|
size_t reuse_idx;
|
|
|
|
if (can_reuse_mix_texture(video, &reuse_idx))
|
|
|
|
draw_mix_texture(reuse_idx);
|
|
|
|
else
|
|
|
|
obs_view_render(video->view);
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2019-05-24 10:03:21 +02:00
|
|
|
video->texture_rendered = true;
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2022-09-10 20:29:54 +02:00
|
|
|
pthread_mutex_lock(&obs->data.draw_callbacks_mutex);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < obs->data.rendered_callbacks.num; ++i) {
|
|
|
|
struct rendered_callback *const callback =
|
|
|
|
&obs->data.rendered_callbacks.array[i];
|
|
|
|
callback->rendered(callback->param);
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_unlock(&obs->data.draw_callbacks_mutex);
|
|
|
|
|
2019-04-03 08:23:37 +02:00
|
|
|
GS_DEBUG_MARKER_END();
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(render_main_texture_name);
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2019-06-23 07:13:45 +02:00
|
|
|
static inline gs_effect_t *
|
2022-06-01 23:34:13 +02:00
|
|
|
get_scale_effect_internal(struct obs_core_video_mix *mix)
|
2014-12-15 08:45:44 +01:00
|
|
|
{
|
2022-06-01 23:34:13 +02:00
|
|
|
struct obs_core_video *video = &obs->video;
|
2022-06-15 19:07:50 +02:00
|
|
|
const struct video_output_info *info =
|
|
|
|
video_output_get_info(mix->video);
|
2022-06-01 23:34:13 +02:00
|
|
|
|
2015-04-06 16:35:09 +02:00
|
|
|
/* if the dimension is under half the size of the original image,
|
|
|
|
* bicubic/lanczos can't sample enough pixels to create an accurate
|
|
|
|
* image, so use the bilinear low resolution effect instead */
|
2022-11-10 06:05:32 +01:00
|
|
|
if (info->width < (mix->ovi.base_width / 2) &&
|
|
|
|
info->height < (mix->ovi.base_height / 2)) {
|
2015-04-06 16:35:09 +02:00
|
|
|
return video->bilinear_lowres_effect;
|
|
|
|
}
|
|
|
|
|
2022-11-10 06:05:32 +01:00
|
|
|
switch (mix->ovi.scale_type) {
|
2019-06-23 07:13:45 +02:00
|
|
|
case OBS_SCALE_BILINEAR:
|
|
|
|
return video->default_effect;
|
|
|
|
case OBS_SCALE_LANCZOS:
|
|
|
|
return video->lanczos_effect;
|
2019-08-15 07:29:30 +02:00
|
|
|
case OBS_SCALE_AREA:
|
|
|
|
return video->area_effect;
|
2016-06-29 15:08:54 +02:00
|
|
|
case OBS_SCALE_BICUBIC:
|
|
|
|
default:;
|
2014-12-15 08:45:44 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return video->bicubic_effect;
|
|
|
|
}
|
|
|
|
|
2022-11-10 06:05:32 +01:00
|
|
|
static inline bool resolution_close(struct obs_core_video_mix *mix,
|
2019-06-23 07:13:45 +02:00
|
|
|
uint32_t width, uint32_t height)
|
2014-12-15 08:45:44 +01:00
|
|
|
{
|
2022-11-10 06:05:32 +01:00
|
|
|
long width_cmp = (long)mix->ovi.base_width - (long)width;
|
|
|
|
long height_cmp = (long)mix->ovi.base_height - (long)height;
|
2014-12-15 08:45:44 +01:00
|
|
|
|
|
|
|
return labs(width_cmp) <= 16 && labs(height_cmp) <= 16;
|
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline gs_effect_t *get_scale_effect(struct obs_core_video_mix *mix,
|
2019-06-23 07:13:45 +02:00
|
|
|
uint32_t width, uint32_t height)
|
2014-12-15 08:45:44 +01:00
|
|
|
{
|
2022-06-01 23:34:13 +02:00
|
|
|
struct obs_core_video *video = &obs->video;
|
|
|
|
|
2022-11-10 06:05:32 +01:00
|
|
|
if (resolution_close(mix, width, height)) {
|
2014-12-15 08:45:44 +01:00
|
|
|
return video->default_effect;
|
|
|
|
} else {
|
|
|
|
/* if the scale method couldn't be loaded, use either bicubic
|
|
|
|
* or bilinear by default */
|
2022-06-01 23:34:13 +02:00
|
|
|
gs_effect_t *effect = get_scale_effect_internal(mix);
|
2014-12-15 08:45:44 +01:00
|
|
|
if (!effect)
|
2019-06-23 07:13:45 +02:00
|
|
|
effect = !!video->bicubic_effect
|
|
|
|
? video->bicubic_effect
|
|
|
|
: video->default_effect;
|
2014-12-15 08:45:44 +01:00
|
|
|
return effect;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
static const char *render_output_texture_name = "render_output_texture";
|
2022-06-15 19:07:50 +02:00
|
|
|
static inline gs_texture_t *
|
|
|
|
render_output_texture(struct obs_core_video_mix *mix)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2022-09-08 09:35:26 +02:00
|
|
|
struct obs_video_info *const ovi = &mix->ovi;
|
2022-06-01 23:34:13 +02:00
|
|
|
gs_texture_t *texture = mix->render_texture;
|
|
|
|
gs_texture_t *target = mix->output_texture;
|
2022-09-08 09:35:26 +02:00
|
|
|
const uint32_t width = gs_texture_get_width(target);
|
|
|
|
const uint32_t height = gs_texture_get_height(target);
|
|
|
|
if ((width == ovi->base_width) && (height == ovi->base_height))
|
|
|
|
return texture;
|
2018-06-24 00:21:07 +02:00
|
|
|
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
profile_start(render_output_texture_name);
|
|
|
|
|
2022-09-08 09:35:26 +02:00
|
|
|
gs_effect_t *effect = get_scale_effect(mix, width, height);
|
|
|
|
gs_technique_t *tech = gs_effect_get_technique(effect, "Draw");
|
|
|
|
|
2019-06-23 07:13:45 +02:00
|
|
|
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
2019-07-18 06:11:18 +02:00
|
|
|
gs_eparam_t *bres =
|
|
|
|
gs_effect_get_param_by_name(effect, "base_dimension");
|
2019-06-23 07:13:45 +02:00
|
|
|
gs_eparam_t *bres_i =
|
|
|
|
gs_effect_get_param_by_name(effect, "base_dimension_i");
|
|
|
|
size_t passes, i;
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_set_render_target(target, NULL);
|
2014-02-06 04:36:21 +01:00
|
|
|
set_render_size(width, height);
|
|
|
|
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
if (bres) {
|
|
|
|
struct vec2 base;
|
2022-11-10 06:05:32 +01:00
|
|
|
vec2_set(&base, (float)mix->ovi.base_width,
|
|
|
|
(float)mix->ovi.base_height);
|
2019-07-18 06:11:18 +02:00
|
|
|
gs_effect_set_vec2(bres, &base);
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (bres_i) {
|
|
|
|
struct vec2 base_i;
|
2022-11-10 06:05:32 +01:00
|
|
|
vec2_set(&base_i, 1.0f / (float)mix->ovi.base_width,
|
|
|
|
1.0f / (float)mix->ovi.base_height);
|
2014-12-15 08:45:44 +01:00
|
|
|
gs_effect_set_vec2(bres_i, &base_i);
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
}
|
2014-12-15 08:45:44 +01:00
|
|
|
|
2019-09-11 05:06:17 +02:00
|
|
|
gs_effect_set_texture_srgb(image, texture);
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2019-09-11 05:06:17 +02:00
|
|
|
gs_enable_framebuffer_srgb(true);
|
2014-10-14 17:40:34 +02:00
|
|
|
gs_enable_blending(false);
|
2014-08-08 08:42:07 +02:00
|
|
|
passes = gs_technique_begin(tech);
|
2014-02-06 04:36:21 +01:00
|
|
|
for (i = 0; i < passes; i++) {
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_begin_pass(tech, i);
|
2014-02-06 04:36:21 +01:00
|
|
|
gs_draw_sprite(texture, 0, width, height);
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_end_pass(tech);
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_end(tech);
|
2014-10-14 17:40:34 +02:00
|
|
|
gs_enable_blending(true);
|
2019-09-11 05:06:17 +02:00
|
|
|
gs_enable_framebuffer_srgb(false);
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(render_output_texture_name);
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
|
|
|
|
return target;
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2019-08-31 07:13:03 +02:00
|
|
|
static void render_convert_plane(gs_effect_t *effect, gs_texture_t *target,
|
|
|
|
const char *tech_name)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_technique_t *tech = gs_effect_get_technique(effect, tech_name);
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
const uint32_t width = gs_texture_get_width(target);
|
|
|
|
const uint32_t height = gs_texture_get_height(target);
|
2014-02-17 03:28:21 +01:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_set_render_target(target, NULL);
|
2019-07-27 08:21:41 +02:00
|
|
|
set_render_size(width, height);
|
2014-02-17 03:28:21 +01:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
size_t passes = gs_technique_begin(tech);
|
|
|
|
for (size_t i = 0; i < passes; i++) {
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_begin_pass(tech, i);
|
2019-06-02 15:49:38 +02:00
|
|
|
gs_draw(GS_TRIS, 0, 3);
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_end_pass(tech);
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_technique_end(tech);
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
static const char *render_convert_texture_name = "render_convert_texture";
|
2022-06-01 23:34:13 +02:00
|
|
|
static void render_convert_texture(struct obs_core_video_mix *video,
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_texture_t *const *const convert_textures,
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_texture_t *texture)
|
2018-10-06 05:18:15 +02:00
|
|
|
{
|
2019-07-27 08:21:41 +02:00
|
|
|
profile_start(render_convert_texture_name);
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
gs_effect_t *effect = obs->video.conversion_effect;
|
2019-08-10 05:43:14 +02:00
|
|
|
gs_eparam_t *color_vec0 =
|
|
|
|
gs_effect_get_param_by_name(effect, "color_vec0");
|
|
|
|
gs_eparam_t *color_vec1 =
|
|
|
|
gs_effect_get_param_by_name(effect, "color_vec1");
|
|
|
|
gs_eparam_t *color_vec2 =
|
|
|
|
gs_effect_get_param_by_name(effect, "color_vec2");
|
2019-06-23 07:13:45 +02:00
|
|
|
gs_eparam_t *image = gs_effect_get_param_by_name(effect, "image");
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_eparam_t *width_i = gs_effect_get_param_by_name(effect, "width_i");
|
2022-04-03 09:00:38 +02:00
|
|
|
gs_eparam_t *height_i = gs_effect_get_param_by_name(effect, "height_i");
|
|
|
|
gs_eparam_t *sdr_white_nits_over_maximum = gs_effect_get_param_by_name(
|
|
|
|
effect, "sdr_white_nits_over_maximum");
|
2022-05-22 10:19:17 +02:00
|
|
|
gs_eparam_t *hdr_lw = gs_effect_get_param_by_name(effect, "hdr_lw");
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2019-08-10 05:43:14 +02:00
|
|
|
struct vec4 vec0, vec1, vec2;
|
|
|
|
vec4_set(&vec0, video->color_matrix[4], video->color_matrix[5],
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
video->color_matrix[6], video->color_matrix[7]);
|
2019-08-10 05:43:14 +02:00
|
|
|
vec4_set(&vec1, video->color_matrix[0], video->color_matrix[1],
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
video->color_matrix[2], video->color_matrix[3]);
|
2019-08-10 05:43:14 +02:00
|
|
|
vec4_set(&vec2, video->color_matrix[8], video->color_matrix[9],
|
libobs: Rework RGB to YUV conversion
RGB to YUV converison was previously baked into every scale shader, but
this work has been moved to the YUV packing shaders. The scale shaders
now write RGBA instead. In the case where base and output resolutions
are identical, the render texture is forwarded directly to the YUV pack
step, skipping an entire fullscreen pass.
Intel GPA, SetStablePowerState, Intel HD Graphics 530, NV12
1920x1080, Before:
RGBA -> UYVX: ~321 us
UYVX -> Y: ~480 us
UYVX -> UV: ~127 us
1920x1080, After:
[forward render texture]
RGBA -> Y: ~487 us
RGBA -> UV: ~131 us
1920x1080 -> 1280x720, Before:
RGBA -> UYVX: ~268 us
UYVX -> Y: ~209 us
UYVX -> UV: ~57 us
1920x1080 -> 1280x720, After:
RGBA -> RGBA (rescale): ~268 us
RGBA -> Y: ~210 us
RGBA -> UV: ~58 us
2019-07-22 10:12:35 +02:00
|
|
|
video->color_matrix[10], video->color_matrix[11]);
|
2018-10-06 05:18:15 +02:00
|
|
|
|
|
|
|
gs_enable_blending(false);
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
if (convert_textures[0]) {
|
2022-04-10 21:31:08 +02:00
|
|
|
const float hdr_nominal_peak_level =
|
2022-06-01 23:34:13 +02:00
|
|
|
obs->video.hdr_nominal_peak_level;
|
2022-04-03 09:00:38 +02:00
|
|
|
const float multiplier =
|
2022-04-10 21:31:08 +02:00
|
|
|
obs_get_video_sdr_white_level() / 10000.f;
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_effect_set_texture(image, texture);
|
2019-08-10 05:43:14 +02:00
|
|
|
gs_effect_set_vec4(color_vec0, &vec0);
|
2022-04-03 09:00:38 +02:00
|
|
|
gs_effect_set_float(sdr_white_nits_over_maximum, multiplier);
|
2022-05-22 10:19:17 +02:00
|
|
|
gs_effect_set_float(hdr_lw, hdr_nominal_peak_level);
|
2022-03-03 07:19:51 +01:00
|
|
|
render_convert_plane(effect, convert_textures[0],
|
2019-07-27 08:21:41 +02:00
|
|
|
video->conversion_techs[0]);
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
if (convert_textures[1]) {
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_effect_set_texture(image, texture);
|
2019-08-10 05:43:14 +02:00
|
|
|
gs_effect_set_vec4(color_vec1, &vec1);
|
2022-03-03 07:19:51 +01:00
|
|
|
if (!convert_textures[2])
|
2019-08-10 05:43:14 +02:00
|
|
|
gs_effect_set_vec4(color_vec2, &vec2);
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_effect_set_float(width_i, video->conversion_width_i);
|
2022-04-03 09:00:38 +02:00
|
|
|
gs_effect_set_float(height_i,
|
|
|
|
video->conversion_height_i);
|
|
|
|
gs_effect_set_float(sdr_white_nits_over_maximum,
|
|
|
|
multiplier);
|
2022-05-22 10:19:17 +02:00
|
|
|
gs_effect_set_float(hdr_lw, hdr_nominal_peak_level);
|
2022-03-03 07:19:51 +01:00
|
|
|
render_convert_plane(effect, convert_textures[1],
|
2019-07-27 08:21:41 +02:00
|
|
|
video->conversion_techs[1]);
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
if (convert_textures[2]) {
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_effect_set_texture(image, texture);
|
2019-08-10 05:43:14 +02:00
|
|
|
gs_effect_set_vec4(color_vec2, &vec2);
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_effect_set_float(width_i,
|
|
|
|
video->conversion_width_i);
|
2022-04-03 09:00:38 +02:00
|
|
|
gs_effect_set_float(height_i,
|
|
|
|
video->conversion_height_i);
|
|
|
|
gs_effect_set_float(sdr_white_nits_over_maximum,
|
|
|
|
multiplier);
|
2022-05-22 10:19:17 +02:00
|
|
|
gs_effect_set_float(hdr_lw,
|
2022-04-10 21:31:08 +02:00
|
|
|
hdr_nominal_peak_level);
|
2019-07-27 08:21:41 +02:00
|
|
|
render_convert_plane(
|
2022-03-03 07:19:51 +01:00
|
|
|
effect, convert_textures[2],
|
2019-07-27 08:21:41 +02:00
|
|
|
video->conversion_techs[2]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
gs_enable_blending(true);
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2019-05-24 10:03:21 +02:00
|
|
|
video->texture_converted = true;
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
profile_end(render_convert_texture_name);
|
2018-10-06 05:18:15 +02:00
|
|
|
}
|
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
static const char *stage_output_texture_name = "stage_output_texture";
|
2022-03-03 07:19:51 +01:00
|
|
|
static inline void
|
2022-06-01 23:34:13 +02:00
|
|
|
stage_output_texture(struct obs_core_video_mix *video, int cur_texture,
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_texture_t *const *const convert_textures,
|
2022-09-08 09:35:26 +02:00
|
|
|
gs_texture_t *output_texture,
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_stagesurf_t *const *const copy_surfaces,
|
|
|
|
size_t channel_count)
|
2014-02-17 03:28:21 +01:00
|
|
|
{
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_start(stage_output_texture_name);
|
|
|
|
|
2014-02-17 03:28:21 +01:00
|
|
|
unmap_last_surface(video);
|
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
if (!video->gpu_conversion) {
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_stagesurf_t *copy = copy_surfaces[0];
|
2022-06-03 06:58:42 +02:00
|
|
|
if (copy)
|
2022-09-08 09:35:26 +02:00
|
|
|
gs_stage_texture(copy, output_texture);
|
2022-06-03 06:58:42 +02:00
|
|
|
video->active_copy_surfaces[cur_texture][0] = copy;
|
|
|
|
|
|
|
|
for (size_t i = 1; i < NUM_CHANNELS; ++i)
|
|
|
|
video->active_copy_surfaces[cur_texture][i] = NULL;
|
2019-07-27 08:21:41 +02:00
|
|
|
|
|
|
|
video->textures_copied[cur_texture] = true;
|
|
|
|
} else if (video->texture_converted) {
|
2022-04-04 09:20:28 +02:00
|
|
|
for (size_t i = 0; i < channel_count; i++) {
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_stagesurf_t *copy = copy_surfaces[i];
|
2022-06-03 06:58:42 +02:00
|
|
|
if (copy)
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_stage_texture(copy, convert_textures[i]);
|
2022-06-03 06:58:42 +02:00
|
|
|
video->active_copy_surfaces[cur_texture][i] = copy;
|
2019-07-27 08:21:41 +02:00
|
|
|
}
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2022-06-03 06:58:42 +02:00
|
|
|
for (size_t i = channel_count; i < NUM_CHANNELS; ++i)
|
|
|
|
video->active_copy_surfaces[cur_texture][i] = NULL;
|
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
video->textures_copied[cur_texture] = true;
|
|
|
|
}
|
2015-07-11 08:04:46 +02:00
|
|
|
|
|
|
|
profile_end(stage_output_texture_name);
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2022-06-15 19:07:50 +02:00
|
|
|
static inline bool queue_frame(struct obs_core_video_mix *video,
|
|
|
|
bool raw_active,
|
2019-06-23 07:13:45 +02:00
|
|
|
struct obs_vframe_info *vframe_info)
|
2019-02-06 02:37:40 +01:00
|
|
|
{
|
2019-06-23 07:13:45 +02:00
|
|
|
bool duplicate =
|
|
|
|
!video->gpu_encoder_avail_queue.size ||
|
2019-02-06 02:37:40 +01:00
|
|
|
(video->gpu_encoder_queue.size && vframe_info->count > 1);
|
|
|
|
|
|
|
|
if (duplicate) {
|
2023-11-30 16:11:18 +01:00
|
|
|
struct obs_tex_frame *tf =
|
|
|
|
deque_data(&video->gpu_encoder_queue,
|
|
|
|
video->gpu_encoder_queue.size - sizeof(*tf));
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
/* texture-based encoding is stopping */
|
|
|
|
if (!tf) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
tf->count++;
|
|
|
|
os_sem_post(video->gpu_encode_semaphore);
|
|
|
|
goto finish;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct obs_tex_frame tf;
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_pop_front(&video->gpu_encoder_avail_queue, &tf, sizeof(tf));
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
if (tf.released) {
|
2023-02-07 06:13:08 +01:00
|
|
|
#ifdef _WIN32
|
2019-02-06 02:37:40 +01:00
|
|
|
gs_texture_acquire_sync(tf.tex, tf.lock_key, GS_WAIT_INFINITE);
|
2023-02-07 06:13:08 +01:00
|
|
|
#endif
|
2019-02-06 02:37:40 +01:00
|
|
|
tf.released = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* the vframe_info->count > 1 case causing a copy can only happen if by
|
|
|
|
* some chance the very first frame has to be duplicated for whatever
|
|
|
|
* reason. otherwise, it goes to the 'duplicate' case above, which
|
|
|
|
* will ensure better performance. */
|
|
|
|
if (raw_active || vframe_info->count > 1) {
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_copy_texture(tf.tex, video->convert_textures_encode[0]);
|
2024-01-28 04:18:12 +01:00
|
|
|
#ifndef _WIN32
|
|
|
|
/* Y and UV textures are views of the same texture on D3D, and
|
|
|
|
* gs_copy_texture will copy all views of the underlying
|
|
|
|
* texture. On other platforms, these are two distinct textures
|
|
|
|
* that must be copied separately. */
|
|
|
|
gs_copy_texture(tf.tex_uv, video->convert_textures_encode[1]);
|
|
|
|
#endif
|
2019-02-06 02:37:40 +01:00
|
|
|
} else {
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_texture_t *tex = video->convert_textures_encode[0];
|
|
|
|
gs_texture_t *tex_uv = video->convert_textures_encode[1];
|
2019-02-06 02:37:40 +01:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
video->convert_textures_encode[0] = tf.tex;
|
|
|
|
video->convert_textures_encode[1] = tf.tex_uv;
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
tf.tex = tex;
|
|
|
|
tf.tex_uv = tex_uv;
|
|
|
|
}
|
|
|
|
|
|
|
|
tf.count = 1;
|
|
|
|
tf.timestamp = vframe_info->timestamp;
|
|
|
|
tf.released = true;
|
2023-02-07 06:13:08 +01:00
|
|
|
#ifdef _WIN32
|
2019-03-03 21:36:31 +01:00
|
|
|
tf.handle = gs_texture_get_shared_handle(tf.tex);
|
2019-02-06 02:37:40 +01:00
|
|
|
gs_texture_release_sync(tf.tex, ++tf.lock_key);
|
2023-02-07 06:13:08 +01:00
|
|
|
#endif
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_push_back(&video->gpu_encoder_queue, &tf, sizeof(tf));
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
os_sem_post(video->gpu_encode_semaphore);
|
|
|
|
|
|
|
|
finish:
|
|
|
|
return --vframe_info->count;
|
|
|
|
}
|
|
|
|
|
|
|
|
extern void full_stop(struct obs_encoder *encoder);
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void encode_gpu(struct obs_core_video_mix *video, bool raw_active,
|
2019-06-23 07:13:45 +02:00
|
|
|
struct obs_vframe_info *vframe_info)
|
2019-02-06 02:37:40 +01:00
|
|
|
{
|
2019-06-23 07:13:45 +02:00
|
|
|
while (queue_frame(video, raw_active, vframe_info))
|
|
|
|
;
|
2019-02-06 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const char *output_gpu_encoders_name = "output_gpu_encoders";
|
2022-06-15 19:07:50 +02:00
|
|
|
static void output_gpu_encoders(struct obs_core_video_mix *video,
|
|
|
|
bool raw_active)
|
2019-02-06 02:37:40 +01:00
|
|
|
{
|
|
|
|
profile_start(output_gpu_encoders_name);
|
|
|
|
|
2019-05-24 10:03:21 +02:00
|
|
|
if (!video->texture_converted)
|
2019-02-06 02:37:40 +01:00
|
|
|
goto end;
|
2019-02-11 07:22:31 +01:00
|
|
|
if (!video->vframe_info_buffer_gpu.size)
|
|
|
|
goto end;
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
struct obs_vframe_info vframe_info;
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_pop_front(&video->vframe_info_buffer_gpu, &vframe_info,
|
|
|
|
sizeof(vframe_info));
|
2019-02-06 02:37:40 +01:00
|
|
|
|
|
|
|
pthread_mutex_lock(&video->gpu_encoder_mutex);
|
2019-05-24 10:03:21 +02:00
|
|
|
encode_gpu(video, raw_active, &vframe_info);
|
2019-02-06 02:37:40 +01:00
|
|
|
pthread_mutex_unlock(&video->gpu_encoder_mutex);
|
|
|
|
|
|
|
|
end:
|
|
|
|
profile_end(output_gpu_encoders_name);
|
|
|
|
}
|
|
|
|
|
2022-06-15 19:07:50 +02:00
|
|
|
static inline void render_video(struct obs_core_video_mix *video,
|
|
|
|
bool raw_active, const bool gpu_active,
|
|
|
|
int cur_texture)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_begin_scene();
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_enable_depth_test(false);
|
|
|
|
gs_set_cull_mode(GS_NEITHER);
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2019-05-24 10:03:21 +02:00
|
|
|
render_main_texture(video);
|
2014-02-17 03:28:21 +01:00
|
|
|
|
2019-02-06 02:37:40 +01:00
|
|
|
if (raw_active || gpu_active) {
|
2022-03-03 07:19:51 +01:00
|
|
|
gs_texture_t *const *convert_textures = video->convert_textures;
|
|
|
|
gs_stagesurf_t *const *copy_surfaces =
|
|
|
|
video->copy_surfaces[cur_texture];
|
|
|
|
size_t channel_count = NUM_CHANNELS;
|
2022-09-08 09:35:26 +02:00
|
|
|
gs_texture_t *output_texture = render_output_texture(video);
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
if (gpu_active) {
|
|
|
|
convert_textures = video->convert_textures_encode;
|
2023-02-07 06:13:08 +01:00
|
|
|
#ifdef _WIN32
|
2022-03-03 07:19:51 +01:00
|
|
|
copy_surfaces = video->copy_surfaces_encode;
|
|
|
|
channel_count = 1;
|
2023-02-07 06:13:08 +01:00
|
|
|
#endif
|
2019-02-06 02:37:40 +01:00
|
|
|
gs_flush();
|
2022-03-03 07:19:51 +01:00
|
|
|
}
|
2019-02-06 02:37:40 +01:00
|
|
|
|
2022-09-08 09:35:26 +02:00
|
|
|
if (video->gpu_conversion) {
|
2022-03-03 07:19:51 +01:00
|
|
|
render_convert_texture(video, convert_textures,
|
2022-09-08 09:35:26 +02:00
|
|
|
output_texture);
|
|
|
|
}
|
2018-02-01 03:54:36 +01:00
|
|
|
|
2019-02-06 02:37:40 +01:00
|
|
|
if (gpu_active) {
|
|
|
|
gs_flush();
|
2019-05-24 10:03:21 +02:00
|
|
|
output_gpu_encoders(video, raw_active);
|
2019-02-06 02:37:40 +01:00
|
|
|
}
|
2019-07-27 08:21:41 +02:00
|
|
|
|
2022-09-08 09:35:26 +02:00
|
|
|
if (raw_active) {
|
2022-03-03 07:19:51 +01:00
|
|
|
stage_output_texture(video, cur_texture,
|
2022-09-08 09:35:26 +02:00
|
|
|
convert_textures, output_texture,
|
|
|
|
copy_surfaces, channel_count);
|
|
|
|
}
|
2018-02-01 03:54:36 +01:00
|
|
|
}
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_set_render_target(NULL, NULL);
|
2014-02-17 03:28:21 +01:00
|
|
|
gs_enable_blending(true);
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_end_scene();
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline bool download_frame(struct obs_core_video_mix *video,
|
2019-06-23 07:13:45 +02:00
|
|
|
int prev_texture, struct video_data *frame)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
|
|
|
if (!video->textures_copied[prev_texture])
|
2014-02-09 13:51:06 +01:00
|
|
|
return false;
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
for (int channel = 0; channel < NUM_CHANNELS; ++channel) {
|
|
|
|
gs_stagesurf_t *surface =
|
2022-03-03 07:19:51 +01:00
|
|
|
video->active_copy_surfaces[prev_texture][channel];
|
2019-07-27 08:21:41 +02:00
|
|
|
if (surface) {
|
|
|
|
if (!gs_stagesurface_map(surface, &frame->data[channel],
|
|
|
|
&frame->linesize[channel]))
|
|
|
|
return false;
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
video->mapped_surfaces[channel] = surface;
|
|
|
|
}
|
|
|
|
}
|
2014-02-09 13:51:06 +01:00
|
|
|
return true;
|
|
|
|
}
|
2014-02-06 04:36:21 +01:00
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
static const uint8_t *set_gpu_converted_plane(uint32_t width, uint32_t height,
|
|
|
|
uint32_t linesize_input,
|
|
|
|
uint32_t linesize_output,
|
|
|
|
const uint8_t *in, uint8_t *out)
|
2014-02-17 03:28:21 +01:00
|
|
|
{
|
2019-07-27 08:21:41 +02:00
|
|
|
if ((width == linesize_input) && (width == linesize_output)) {
|
2020-05-21 09:23:26 +02:00
|
|
|
size_t total = (size_t)width * (size_t)height;
|
2019-07-27 08:21:41 +02:00
|
|
|
memcpy(out, in, total);
|
|
|
|
in += total;
|
|
|
|
} else {
|
|
|
|
for (size_t y = 0; y < height; y++) {
|
|
|
|
memcpy(out, in, width);
|
|
|
|
out += linesize_output;
|
|
|
|
in += linesize_input;
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-27 08:21:41 +02:00
|
|
|
return in;
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
|
|
|
|
2022-07-20 08:07:04 +02:00
|
|
|
static void set_gpu_converted_data(struct video_frame *output,
|
2019-06-23 07:13:45 +02:00
|
|
|
const struct video_data *input,
|
|
|
|
const struct video_output_info *info)
|
2014-02-17 03:28:21 +01:00
|
|
|
{
|
2022-03-03 07:19:51 +01:00
|
|
|
switch (info->format) {
|
|
|
|
case VIDEO_FORMAT_I420: {
|
2019-07-27 08:21:41 +02:00
|
|
|
const uint32_t width = info->width;
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
2019-07-27 08:21:41 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
const uint32_t width_d2 = width / 2;
|
2019-07-27 08:21:41 +02:00
|
|
|
const uint32_t height_d2 = height / 2;
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width_d2, height_d2, input->linesize[1],
|
|
|
|
output->linesize[1], input->data[1],
|
|
|
|
output->data[1]);
|
2014-02-17 03:28:21 +01:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width_d2, height_d2, input->linesize[2],
|
|
|
|
output->linesize[2], input->data[2],
|
|
|
|
output->data[2]);
|
2019-07-27 08:21:41 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VIDEO_FORMAT_NV12: {
|
|
|
|
const uint32_t width = info->width;
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
const uint32_t height_d2 = height / 2;
|
|
|
|
if (input->linesize[1]) {
|
2019-07-27 08:21:41 +02:00
|
|
|
set_gpu_converted_plane(width, height,
|
|
|
|
input->linesize[0],
|
|
|
|
output->linesize[0],
|
|
|
|
input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
set_gpu_converted_plane(width, height_d2,
|
|
|
|
input->linesize[1],
|
|
|
|
output->linesize[1],
|
|
|
|
input->data[1],
|
|
|
|
output->data[1]);
|
2022-03-03 07:19:51 +01:00
|
|
|
} else {
|
|
|
|
const uint8_t *const in_uv = set_gpu_converted_plane(
|
|
|
|
width, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
set_gpu_converted_plane(width, height_d2,
|
|
|
|
input->linesize[0],
|
|
|
|
output->linesize[1], in_uv,
|
|
|
|
output->data[1]);
|
2019-07-27 08:21:41 +02:00
|
|
|
}
|
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VIDEO_FORMAT_I444: {
|
|
|
|
const uint32_t width = info->width;
|
|
|
|
const uint32_t height = info->height;
|
2019-07-27 08:21:41 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
2019-07-27 08:21:41 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width, height, input->linesize[1],
|
|
|
|
output->linesize[1], input->data[1],
|
|
|
|
output->data[1]);
|
2018-10-06 05:18:15 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
set_gpu_converted_plane(width, height, input->linesize[2],
|
|
|
|
output->linesize[2], input->data[2],
|
|
|
|
output->data[2]);
|
2019-08-31 07:13:03 +02:00
|
|
|
|
2022-03-03 07:19:51 +01:00
|
|
|
break;
|
|
|
|
}
|
2022-04-03 09:00:38 +02:00
|
|
|
case VIDEO_FORMAT_I010: {
|
|
|
|
const uint32_t width = info->width;
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
|
|
|
|
set_gpu_converted_plane(width * 2, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
|
|
|
|
const uint32_t height_d2 = height / 2;
|
|
|
|
|
|
|
|
set_gpu_converted_plane(width, height_d2, input->linesize[1],
|
|
|
|
output->linesize[1], input->data[1],
|
|
|
|
output->data[1]);
|
|
|
|
|
|
|
|
set_gpu_converted_plane(width, height_d2, input->linesize[2],
|
|
|
|
output->linesize[2], input->data[2],
|
|
|
|
output->data[2]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VIDEO_FORMAT_P010: {
|
|
|
|
const uint32_t width_x2 = info->width * 2;
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
const uint32_t height_d2 = height / 2;
|
|
|
|
if (input->linesize[1]) {
|
|
|
|
set_gpu_converted_plane(width_x2, height,
|
|
|
|
input->linesize[0],
|
|
|
|
output->linesize[0],
|
|
|
|
input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
set_gpu_converted_plane(width_x2, height_d2,
|
|
|
|
input->linesize[1],
|
|
|
|
output->linesize[1],
|
|
|
|
input->data[1],
|
|
|
|
output->data[1]);
|
|
|
|
} else {
|
|
|
|
const uint8_t *const in_uv = set_gpu_converted_plane(
|
|
|
|
width_x2, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
set_gpu_converted_plane(width_x2, height_d2,
|
|
|
|
input->linesize[0],
|
|
|
|
output->linesize[1], in_uv,
|
|
|
|
output->data[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2022-11-21 04:11:27 +01:00
|
|
|
case VIDEO_FORMAT_P216: {
|
|
|
|
const uint32_t width_x2 = info->width * 2;
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
|
|
|
|
set_gpu_converted_plane(width_x2, height, input->linesize[0],
|
|
|
|
output->linesize[0], input->data[0],
|
|
|
|
output->data[0]);
|
|
|
|
|
|
|
|
set_gpu_converted_plane(width_x2, height, input->linesize[1],
|
|
|
|
output->linesize[1], input->data[1],
|
|
|
|
output->data[1]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VIDEO_FORMAT_P416: {
|
|
|
|
const uint32_t height = info->height;
|
|
|
|
|
|
|
|
set_gpu_converted_plane(info->width * 2, height,
|
|
|
|
input->linesize[0], output->linesize[0],
|
|
|
|
input->data[0], output->data[0]);
|
|
|
|
|
|
|
|
set_gpu_converted_plane(info->width * 4, height,
|
|
|
|
input->linesize[1], output->linesize[1],
|
|
|
|
input->data[1], output->data[1]);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2022-03-03 07:19:51 +01:00
|
|
|
|
|
|
|
case VIDEO_FORMAT_NONE:
|
|
|
|
case VIDEO_FORMAT_YVYU:
|
|
|
|
case VIDEO_FORMAT_YUY2:
|
|
|
|
case VIDEO_FORMAT_UYVY:
|
|
|
|
case VIDEO_FORMAT_RGBA:
|
|
|
|
case VIDEO_FORMAT_BGRA:
|
|
|
|
case VIDEO_FORMAT_BGRX:
|
|
|
|
case VIDEO_FORMAT_Y800:
|
|
|
|
case VIDEO_FORMAT_BGR3:
|
2022-04-19 19:37:07 +02:00
|
|
|
case VIDEO_FORMAT_I412:
|
2022-03-03 07:19:51 +01:00
|
|
|
case VIDEO_FORMAT_I422:
|
2022-04-19 19:37:07 +02:00
|
|
|
case VIDEO_FORMAT_I210:
|
2022-03-03 07:19:51 +01:00
|
|
|
case VIDEO_FORMAT_I40A:
|
|
|
|
case VIDEO_FORMAT_I42A:
|
|
|
|
case VIDEO_FORMAT_YUVA:
|
2022-04-19 19:37:07 +02:00
|
|
|
case VIDEO_FORMAT_YA2L:
|
2022-03-03 07:19:51 +01:00
|
|
|
case VIDEO_FORMAT_AYUV:
|
2023-03-26 16:03:30 +02:00
|
|
|
case VIDEO_FORMAT_V210:
|
2023-04-03 02:49:35 +02:00
|
|
|
case VIDEO_FORMAT_R10L:
|
2022-03-03 07:19:51 +01:00
|
|
|
/* unimplemented */
|
|
|
|
;
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-23 07:13:45 +02:00
|
|
|
static inline void copy_rgbx_frame(struct video_frame *output,
|
|
|
|
const struct video_data *input,
|
|
|
|
const struct video_output_info *info)
|
2015-04-16 03:41:09 +02:00
|
|
|
{
|
|
|
|
uint8_t *in_ptr = input->data[0];
|
|
|
|
uint8_t *out_ptr = output->data[0];
|
|
|
|
|
2015-07-21 17:58:37 +02:00
|
|
|
/* if the line sizes match, do a single copy */
|
|
|
|
if (input->linesize[0] == output->linesize[0]) {
|
2020-05-21 09:23:26 +02:00
|
|
|
memcpy(out_ptr, in_ptr,
|
|
|
|
(size_t)input->linesize[0] * (size_t)info->height);
|
2015-07-21 17:58:37 +02:00
|
|
|
} else {
|
2020-05-21 09:23:26 +02:00
|
|
|
const size_t copy_size = (size_t)info->width * 4;
|
2015-07-21 17:58:37 +02:00
|
|
|
for (size_t y = 0; y < info->height; y++) {
|
2020-05-21 09:23:26 +02:00
|
|
|
memcpy(out_ptr, in_ptr, copy_size);
|
2015-07-21 17:58:37 +02:00
|
|
|
in_ptr += input->linesize[0];
|
|
|
|
out_ptr += output->linesize[0];
|
|
|
|
}
|
2015-04-16 03:41:09 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void output_video_data(struct obs_core_video_mix *video,
|
2019-06-23 07:13:45 +02:00
|
|
|
struct video_data *input_frame, int count)
|
2014-02-09 13:51:06 +01:00
|
|
|
{
|
|
|
|
const struct video_output_info *info;
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
struct video_frame output_frame;
|
|
|
|
bool locked;
|
|
|
|
|
2014-08-06 00:07:54 +02:00
|
|
|
info = video_output_get_info(video->video);
|
2014-02-09 13:51:06 +01:00
|
|
|
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
locked = video_output_lock_frame(video->video, &output_frame, count,
|
2019-06-23 07:13:45 +02:00
|
|
|
input_frame->timestamp);
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
if (locked) {
|
|
|
|
if (video->gpu_conversion) {
|
2022-07-20 08:07:04 +02:00
|
|
|
set_gpu_converted_data(&output_frame, input_frame,
|
|
|
|
info);
|
2015-04-16 03:41:09 +02:00
|
|
|
} else {
|
|
|
|
copy_rgbx_frame(&output_frame, input_frame, info);
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
}
|
2014-02-17 03:28:21 +01:00
|
|
|
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
video_output_unlock_frame(video->video);
|
2014-02-17 03:28:21 +01:00
|
|
|
}
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
}
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2023-09-06 18:59:12 +02:00
|
|
|
void add_ready_encoder_group(obs_encoder_t *encoder)
|
|
|
|
{
|
|
|
|
obs_weak_encoder_t *weak = obs_encoder_get_weak_encoder(encoder);
|
|
|
|
pthread_mutex_lock(&obs->video.encoder_group_mutex);
|
|
|
|
da_push_back(obs->video.ready_encoder_groups, &weak);
|
|
|
|
pthread_mutex_unlock(&obs->video.encoder_group_mutex);
|
|
|
|
}
|
|
|
|
|
2022-06-15 19:07:50 +02:00
|
|
|
static inline void video_sleep(struct obs_core_video *video, uint64_t *p_time,
|
2019-06-23 07:13:45 +02:00
|
|
|
uint64_t interval_ns)
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
{
|
|
|
|
struct obs_vframe_info vframe_info;
|
|
|
|
uint64_t cur_time = *p_time;
|
|
|
|
uint64_t t = cur_time + interval_ns;
|
|
|
|
int count;
|
|
|
|
|
2015-01-05 23:07:22 +01:00
|
|
|
if (os_sleepto_ns(t)) {
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
*p_time = t;
|
|
|
|
count = 1;
|
|
|
|
} else {
|
2022-02-28 18:15:31 +01:00
|
|
|
const uint64_t udiff = os_gettime_ns() - cur_time;
|
|
|
|
int64_t diff;
|
|
|
|
memcpy(&diff, &udiff, sizeof(diff));
|
2022-03-18 08:37:36 +01:00
|
|
|
const uint64_t clamped_diff = (diff > (int64_t)interval_ns)
|
|
|
|
? (uint64_t)diff
|
|
|
|
: interval_ns;
|
2022-02-28 18:15:31 +01:00
|
|
|
count = (int)(clamped_diff / interval_ns);
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
*p_time = cur_time + interval_ns * count;
|
|
|
|
}
|
|
|
|
|
2016-01-25 12:58:51 +01:00
|
|
|
video->total_frames += count;
|
|
|
|
video->lagged_frames += count - 1;
|
|
|
|
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
vframe_info.timestamp = cur_time;
|
|
|
|
vframe_info.count = count;
|
2019-02-06 02:37:40 +01:00
|
|
|
|
2023-09-06 18:59:12 +02:00
|
|
|
pthread_mutex_lock(&video->encoder_group_mutex);
|
|
|
|
for (size_t i = 0; i < video->ready_encoder_groups.num; i++) {
|
|
|
|
obs_encoder_t *encoder = obs_weak_encoder_get_encoder(
|
|
|
|
video->ready_encoder_groups.array[i]);
|
|
|
|
obs_weak_encoder_release(video->ready_encoder_groups.array[i]);
|
|
|
|
if (!encoder)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (encoder->encoder_group) {
|
2024-05-08 05:49:23 +02:00
|
|
|
struct obs_encoder_group *group =
|
|
|
|
encoder->encoder_group;
|
2023-09-06 18:59:12 +02:00
|
|
|
pthread_mutex_lock(&group->mutex);
|
2024-05-08 07:31:23 +02:00
|
|
|
if (group->num_encoders_started >=
|
|
|
|
group->encoders.num &&
|
|
|
|
!group->start_timestamp)
|
2023-09-06 18:59:12 +02:00
|
|
|
group->start_timestamp = *p_time;
|
|
|
|
pthread_mutex_unlock(&group->mutex);
|
|
|
|
}
|
|
|
|
obs_encoder_release(encoder);
|
|
|
|
}
|
|
|
|
da_clear(video->ready_encoder_groups);
|
|
|
|
pthread_mutex_unlock(&video->encoder_group_mutex);
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
pthread_mutex_lock(&obs->video.mixes_mutex);
|
|
|
|
for (size_t i = 0, num = obs->video.mixes.num; i < num; i++) {
|
2022-08-12 03:01:29 +02:00
|
|
|
struct obs_core_video_mix *video = obs->video.mixes.array[i];
|
2022-06-01 23:34:13 +02:00
|
|
|
bool raw_active = video->raw_was_active;
|
|
|
|
bool gpu_active = video->gpu_was_active;
|
|
|
|
|
2022-06-15 19:07:50 +02:00
|
|
|
if (raw_active)
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_push_back(&video->vframe_info_buffer,
|
|
|
|
&vframe_info, sizeof(vframe_info));
|
2022-06-15 19:07:50 +02:00
|
|
|
if (gpu_active)
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_push_back(&video->vframe_info_buffer_gpu,
|
|
|
|
&vframe_info, sizeof(vframe_info));
|
2022-06-01 23:34:13 +02:00
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&obs->video.mixes_mutex);
|
2014-02-06 04:36:21 +01:00
|
|
|
}
|
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
static const char *output_frame_gs_context_name = "gs_context(video->graphics)";
|
|
|
|
static const char *output_frame_render_video_name = "render_video";
|
|
|
|
static const char *output_frame_download_frame_name = "download_frame";
|
|
|
|
static const char *output_frame_gs_flush_name = "gs_flush";
|
|
|
|
static const char *output_frame_output_video_data_name = "output_video_data";
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void output_frame(struct obs_core_video_mix *video)
|
2014-02-06 04:36:21 +01:00
|
|
|
{
|
2022-06-01 23:34:13 +02:00
|
|
|
const bool raw_active = video->raw_was_active;
|
|
|
|
const bool gpu_active = video->gpu_was_active;
|
|
|
|
|
2019-06-23 07:13:45 +02:00
|
|
|
int cur_texture = video->cur_texture;
|
|
|
|
int prev_texture = cur_texture == 0 ? NUM_TEXTURES - 1
|
|
|
|
: cur_texture - 1;
|
2014-02-18 21:37:56 +01:00
|
|
|
struct video_data frame;
|
2019-02-20 23:26:21 +01:00
|
|
|
bool frame_ready = 0;
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2014-02-18 21:37:56 +01:00
|
|
|
memset(&frame, 0, sizeof(struct video_data));
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_start(output_frame_gs_context_name);
|
2022-06-01 23:34:13 +02:00
|
|
|
gs_enter_context(obs->video.graphics);
|
2015-07-11 08:04:46 +02:00
|
|
|
|
|
|
|
profile_start(output_frame_render_video_name);
|
2019-04-03 08:23:37 +02:00
|
|
|
GS_DEBUG_MARKER_BEGIN(GS_DEBUG_COLOR_RENDER_VIDEO,
|
2019-06-23 07:13:45 +02:00
|
|
|
output_frame_render_video_name);
|
2019-05-24 10:03:21 +02:00
|
|
|
render_video(video, raw_active, gpu_active, cur_texture);
|
2019-04-03 08:23:37 +02:00
|
|
|
GS_DEBUG_MARKER_END();
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(output_frame_render_video_name);
|
|
|
|
|
2018-02-01 03:54:36 +01:00
|
|
|
if (raw_active) {
|
|
|
|
profile_start(output_frame_download_frame_name);
|
|
|
|
frame_ready = download_frame(video, prev_texture, &frame);
|
|
|
|
profile_end(output_frame_download_frame_name);
|
|
|
|
}
|
2015-07-11 08:04:46 +02:00
|
|
|
|
|
|
|
profile_start(output_frame_gs_flush_name);
|
2014-12-04 07:14:23 +01:00
|
|
|
gs_flush();
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(output_frame_gs_flush_name);
|
|
|
|
|
2014-08-08 08:42:07 +02:00
|
|
|
gs_leave_context();
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(output_frame_gs_context_name);
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2018-02-01 03:54:36 +01:00
|
|
|
if (raw_active && frame_ready) {
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
struct obs_vframe_info vframe_info;
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_pop_front(&video->vframe_info_buffer, &vframe_info,
|
|
|
|
sizeof(vframe_info));
|
2014-10-22 05:08:39 +02:00
|
|
|
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
frame.timestamp = vframe_info.timestamp;
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_start(output_frame_output_video_data_name);
|
libobs: Redesign/optimize frame encoding handling
Previously, the design for the interaction between the encoder thread
and the graphics thread was that the encoder thread would signal to the
graphics thread when to start drawing each frame. The original idea
behind this was to prevent mutually cascading stalls of encoding or
graphics rendering (i.e., if rendering took too long, then encoding
would have to catch up, then rendering would have to catch up again, and
so on, cascading upon each other). The ultimate goal was to prevent
encoding from impacting graphics and vise versa.
However, eventually it was realized that there were some fundamental
flaws with this design.
1. Stray frame duplication. You could not guarantee that a frame would
render on time, so sometimes frames would unintentionally be lost if
there was any sort of minor hiccup or if the thread took too long to
be scheduled I'm guessing.
2. Frame timing in the rendering thread was less accurate. The only
place where frame timing was accurate was in the encoder thread, and
the graphics thread was at the whim of thread scheduling. On higher
end computers it was typically fine, but it was just generally not
guaranteed that a frame would be rendered when it was supposed to be
rendered.
So the solution (originally proposed by r1ch and paibox) is to instead
keep the encoding and graphics threads separate as usual, but instead of
the encoder thread controlling the graphics thread, the graphics thread
now controls the encoder thread. The encoder thread keeps a limited
cache of frames, then the graphics thread copies frames in to the cache
and increments a semaphore to schedule the encoder thread to encode that
data.
In the cache, each frame has an encode counter. If the frame cache is
full (e.g., the encoder taking too long to return frames), it will not
cache a new frame, but instead will just increment the counter on the
last frame in the cache to schedule that frame to encode again, ensuring
that frames are on time and reducing CPU usage by lowering video
complexity. If the graphics thread takes too long to render a frame,
then it will add that frame with the count value set to the total amount
of frames that were missed (actual legitimately duplicated frames).
Because the cache gives many frames of breathing room for the encoder to
encode frames, this design helps improve results especially when using
encoding presets that have higher complexity and CPU usage, minimizing
the risk of needlessly skipped or duplicated frames.
I also managed to sneak in what should be a bit of an optimization to
reduce copying of frame data, though how much of an optimization it
ultimately ends up being is debatable.
So to sum it up, this commit increases accuracy of frame timing,
completely removes stray frame duplication, gives better results for
higher complexity encoding presets, and potentially optimizes the frame
pipeline a tiny bit.
2014-12-31 10:53:13 +01:00
|
|
|
output_video_data(video, &frame, vframe_info.count);
|
2015-07-11 08:04:46 +02:00
|
|
|
profile_end(output_frame_output_video_data_name);
|
2014-10-22 05:08:39 +02:00
|
|
|
}
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2014-02-06 04:36:21 +01:00
|
|
|
if (++video->cur_texture == NUM_TEXTURES)
|
|
|
|
video->cur_texture = 0;
|
2013-10-01 04:37:13 +02:00
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void output_frames(void)
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&obs->video.mixes_mutex);
|
|
|
|
for (size_t i = 0, num = obs->video.mixes.num; i < num; i++) {
|
2022-08-12 03:01:29 +02:00
|
|
|
struct obs_core_video_mix *mix = obs->video.mixes.array[i];
|
2022-06-01 23:34:13 +02:00
|
|
|
if (mix->view) {
|
|
|
|
output_frame(mix);
|
|
|
|
} else {
|
2022-08-12 03:01:29 +02:00
|
|
|
obs->video.mixes.array[i] = NULL;
|
2022-06-01 23:34:13 +02:00
|
|
|
obs_free_video_mix(mix);
|
|
|
|
da_erase(obs->video.mixes, i);
|
|
|
|
i--;
|
|
|
|
num--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&obs->video.mixes_mutex);
|
|
|
|
}
|
|
|
|
|
2015-10-15 10:00:14 +02:00
|
|
|
#define NBSP "\xC2\xA0"
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static void clear_base_frame_data(struct obs_core_video_mix *video)
|
2018-02-01 03:54:36 +01:00
|
|
|
{
|
2019-05-24 10:03:21 +02:00
|
|
|
video->texture_rendered = false;
|
|
|
|
video->texture_converted = false;
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_free(&video->vframe_info_buffer);
|
2018-02-01 03:54:36 +01:00
|
|
|
video->cur_texture = 0;
|
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static void clear_raw_frame_data(struct obs_core_video_mix *video)
|
2019-02-06 02:37:40 +01:00
|
|
|
{
|
|
|
|
memset(video->textures_copied, 0, sizeof(video->textures_copied));
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_free(&video->vframe_info_buffer);
|
2019-02-06 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
static void clear_gpu_frame_data(struct obs_core_video_mix *video)
|
2019-02-06 02:37:40 +01:00
|
|
|
{
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_free(&video->vframe_info_buffer_gpu);
|
2019-02-06 02:37:40 +01:00
|
|
|
}
|
|
|
|
|
2020-03-14 18:39:24 +01:00
|
|
|
extern THREAD_LOCAL bool is_graphics_thread;
|
|
|
|
|
|
|
|
static void execute_graphics_tasks(void)
|
|
|
|
{
|
|
|
|
struct obs_core_video *video = &obs->video;
|
|
|
|
bool tasks_remaining = true;
|
|
|
|
|
|
|
|
while (tasks_remaining) {
|
|
|
|
pthread_mutex_lock(&video->task_mutex);
|
|
|
|
if (video->tasks.size) {
|
|
|
|
struct obs_task_info info;
|
2023-11-30 16:11:18 +01:00
|
|
|
deque_pop_front(&video->tasks, &info, sizeof(info));
|
2020-03-14 18:39:24 +01:00
|
|
|
info.task(info.param);
|
|
|
|
}
|
|
|
|
tasks_remaining = !!video->tasks.size;
|
|
|
|
pthread_mutex_unlock(&video->task_mutex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-21 09:23:26 +02:00
|
|
|
#ifdef _WIN32
|
|
|
|
|
|
|
|
struct winrt_exports {
|
|
|
|
void (*winrt_initialize)();
|
|
|
|
void (*winrt_uninitialize)();
|
|
|
|
struct winrt_disaptcher *(*winrt_dispatcher_init)();
|
|
|
|
void (*winrt_dispatcher_free)(struct winrt_disaptcher *dispatcher);
|
|
|
|
void (*winrt_capture_thread_start)();
|
|
|
|
void (*winrt_capture_thread_stop)();
|
|
|
|
};
|
|
|
|
|
|
|
|
#define WINRT_IMPORT(func) \
|
|
|
|
do { \
|
|
|
|
exports->func = os_dlsym(module, #func); \
|
|
|
|
if (!exports->func) { \
|
|
|
|
success = false; \
|
|
|
|
blog(LOG_ERROR, \
|
|
|
|
"Could not load function '%s' from " \
|
|
|
|
"module '%s'", \
|
|
|
|
#func, module_name); \
|
|
|
|
} \
|
|
|
|
} while (false)
|
|
|
|
|
|
|
|
static bool load_winrt_imports(struct winrt_exports *exports, void *module,
|
|
|
|
const char *module_name)
|
|
|
|
{
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
WINRT_IMPORT(winrt_initialize);
|
|
|
|
WINRT_IMPORT(winrt_uninitialize);
|
|
|
|
WINRT_IMPORT(winrt_dispatcher_init);
|
|
|
|
WINRT_IMPORT(winrt_dispatcher_free);
|
|
|
|
WINRT_IMPORT(winrt_capture_thread_start);
|
|
|
|
WINRT_IMPORT(winrt_capture_thread_stop);
|
|
|
|
|
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct winrt_state {
|
|
|
|
bool loaded;
|
|
|
|
void *winrt_module;
|
|
|
|
struct winrt_exports exports;
|
|
|
|
struct winrt_disaptcher *dispatcher;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void init_winrt_state(struct winrt_state *winrt)
|
|
|
|
{
|
|
|
|
static const char *const module_name = "libobs-winrt";
|
|
|
|
|
|
|
|
winrt->winrt_module = os_dlopen(module_name);
|
|
|
|
winrt->loaded = winrt->winrt_module &&
|
|
|
|
load_winrt_imports(&winrt->exports, winrt->winrt_module,
|
|
|
|
module_name);
|
|
|
|
winrt->dispatcher = NULL;
|
|
|
|
if (winrt->loaded) {
|
|
|
|
winrt->exports.winrt_initialize();
|
|
|
|
winrt->dispatcher = winrt->exports.winrt_dispatcher_init();
|
|
|
|
|
|
|
|
gs_enter_context(obs->video.graphics);
|
|
|
|
winrt->exports.winrt_capture_thread_start();
|
|
|
|
gs_leave_context();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void uninit_winrt_state(struct winrt_state *winrt)
|
|
|
|
{
|
|
|
|
if (winrt->winrt_module) {
|
|
|
|
if (winrt->loaded) {
|
|
|
|
winrt->exports.winrt_capture_thread_stop();
|
|
|
|
if (winrt->dispatcher)
|
|
|
|
winrt->exports.winrt_dispatcher_free(
|
|
|
|
winrt->dispatcher);
|
|
|
|
winrt->exports.winrt_uninitialize();
|
|
|
|
}
|
|
|
|
|
|
|
|
os_dlclose(winrt->winrt_module);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif // #ifdef _WIN32
|
|
|
|
|
2015-07-11 08:04:46 +02:00
|
|
|
static const char *tick_sources_name = "tick_sources";
|
|
|
|
static const char *render_displays_name = "render_displays";
|
|
|
|
static const char *output_frame_name = "output_frame";
|
2022-06-01 23:34:13 +02:00
|
|
|
static inline void update_active_state(struct obs_core_video_mix *video)
|
2013-10-01 04:37:13 +02:00
|
|
|
{
|
2022-06-01 23:34:13 +02:00
|
|
|
const bool raw_was_active = video->raw_was_active;
|
|
|
|
const bool gpu_was_active = video->gpu_was_active;
|
|
|
|
const bool was_active = video->was_active;
|
2020-05-21 09:23:26 +02:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
bool raw_active = os_atomic_load_long(&video->raw_active) > 0;
|
2022-01-16 19:08:41 +01:00
|
|
|
const bool gpu_active =
|
2022-06-01 23:34:13 +02:00
|
|
|
os_atomic_load_long(&video->gpu_encoder_active) > 0;
|
2020-06-08 18:38:00 +02:00
|
|
|
const bool active = raw_active || gpu_active;
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
if (!was_active && active)
|
|
|
|
clear_base_frame_data(video);
|
|
|
|
if (!raw_was_active && raw_active)
|
|
|
|
clear_raw_frame_data(video);
|
|
|
|
if (!gpu_was_active && gpu_active)
|
|
|
|
clear_gpu_frame_data(video);
|
2020-03-14 18:39:24 +01:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
video->gpu_was_active = gpu_active;
|
|
|
|
video->raw_was_active = raw_active;
|
|
|
|
video->was_active = active;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void update_active_states(void)
|
|
|
|
{
|
|
|
|
pthread_mutex_lock(&obs->video.mixes_mutex);
|
|
|
|
for (size_t i = 0, num = obs->video.mixes.num; i < num; i++)
|
2022-08-12 03:01:29 +02:00
|
|
|
update_active_state(obs->video.mixes.array[i]);
|
2022-06-01 23:34:13 +02:00
|
|
|
pthread_mutex_unlock(&obs->video.mixes_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool stop_requested(void)
|
|
|
|
{
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&obs->video.mixes_mutex);
|
|
|
|
for (size_t i = 0, num = obs->video.mixes.num; i < num; i++)
|
2022-08-12 03:01:29 +02:00
|
|
|
if (!video_output_stopped(obs->video.mixes.array[i]->video))
|
2022-06-01 23:34:13 +02:00
|
|
|
success = false;
|
|
|
|
pthread_mutex_unlock(&obs->video.mixes_mutex);
|
|
|
|
|
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool obs_graphics_thread_loop(struct obs_graphics_context *context)
|
|
|
|
{
|
|
|
|
uint64_t frame_start = os_gettime_ns();
|
|
|
|
uint64_t frame_time_ns;
|
|
|
|
|
|
|
|
update_active_states();
|
2015-06-05 01:48:56 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_start(context->video_thread_name);
|
2023-08-13 15:28:46 +02:00
|
|
|
source_profiler_frame_begin();
|
2015-01-02 14:36:09 +01:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
gs_enter_context(obs->video.graphics);
|
|
|
|
gs_begin_frame();
|
|
|
|
gs_leave_context();
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_start(tick_sources_name);
|
|
|
|
context->last_time =
|
|
|
|
tick_sources(obs->video.video_time, context->last_time);
|
|
|
|
profile_end(tick_sources_name);
|
2017-10-04 03:48:12 +02:00
|
|
|
|
2019-02-06 02:37:40 +01:00
|
|
|
#ifdef _WIN32
|
2020-06-08 18:38:00 +02:00
|
|
|
MSG msg;
|
|
|
|
while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) {
|
|
|
|
TranslateMessage(&msg);
|
|
|
|
DispatchMessage(&msg);
|
|
|
|
}
|
2019-02-06 02:37:40 +01:00
|
|
|
#endif
|
|
|
|
|
2023-08-13 15:28:46 +02:00
|
|
|
source_profiler_render_begin();
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_start(output_frame_name);
|
2022-06-01 23:34:13 +02:00
|
|
|
output_frames();
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_end(output_frame_name);
|
2019-02-20 23:26:21 +01:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_start(render_displays_name);
|
|
|
|
render_displays();
|
|
|
|
profile_end(render_displays_name);
|
2023-08-13 15:28:46 +02:00
|
|
|
source_profiler_render_end();
|
2017-05-13 01:21:51 +02:00
|
|
|
|
2021-12-24 03:06:13 +01:00
|
|
|
execute_graphics_tasks();
|
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
frame_time_ns = os_gettime_ns() - frame_start;
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2023-08-13 15:28:46 +02:00
|
|
|
source_profiler_frame_collect();
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_end(context->video_thread_name);
|
2019-10-11 06:06:01 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
profile_reenable_thread();
|
2014-02-09 13:51:06 +01:00
|
|
|
|
2022-06-15 19:07:50 +02:00
|
|
|
video_sleep(&obs->video, &obs->video.video_time, context->interval);
|
2020-03-14 18:39:24 +01:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
context->frame_time_total_ns += frame_time_ns;
|
|
|
|
context->fps_total_ns += (obs->video.video_time - context->last_time);
|
|
|
|
context->fps_total_frames++;
|
2019-11-28 01:38:35 +01:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
if (context->fps_total_ns >= 1000000000ULL) {
|
|
|
|
obs->video.video_fps =
|
|
|
|
(double)context->fps_total_frames /
|
|
|
|
((double)context->fps_total_ns / 1000000000.0);
|
|
|
|
obs->video.video_avg_frame_time_ns =
|
|
|
|
context->frame_time_total_ns /
|
|
|
|
(uint64_t)context->fps_total_frames;
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
context->frame_time_total_ns = 0;
|
|
|
|
context->fps_total_ns = 0;
|
|
|
|
context->fps_total_frames = 0;
|
|
|
|
}
|
2018-01-02 03:33:44 +01:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
return !stop_requested();
|
2020-06-08 18:38:00 +02:00
|
|
|
}
|
2017-05-13 01:21:51 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
void *obs_graphics_thread(void *param)
|
|
|
|
{
|
|
|
|
#ifdef _WIN32
|
|
|
|
struct winrt_state winrt;
|
|
|
|
init_winrt_state(&winrt);
|
|
|
|
#endif // #ifdef _WIN32
|
2015-07-11 08:04:46 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
is_graphics_thread = true;
|
2015-07-08 14:25:07 +02:00
|
|
|
|
2022-06-01 23:34:13 +02:00
|
|
|
const uint64_t interval = obs->video.video_frame_interval_ns;
|
2016-08-22 21:04:23 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
obs->video.video_time = os_gettime_ns();
|
2016-08-22 21:04:23 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
os_set_thread_name("libobs: graphics thread");
|
2017-05-13 01:21:51 +02:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
const char *video_thread_name = profile_store_name(
|
|
|
|
obs_get_profiler_name_store(),
|
|
|
|
"obs_graphics_thread(%g" NBSP "ms)", interval / 1000000.);
|
|
|
|
profile_register_root(video_thread_name, interval);
|
2020-02-24 04:43:10 +01:00
|
|
|
|
2020-06-08 18:38:00 +02:00
|
|
|
srand((unsigned int)time(NULL));
|
|
|
|
|
|
|
|
struct obs_graphics_context context;
|
2022-06-01 23:34:13 +02:00
|
|
|
context.interval = interval;
|
2020-06-08 18:38:00 +02:00
|
|
|
context.frame_time_total_ns = 0;
|
|
|
|
context.fps_total_ns = 0;
|
|
|
|
context.fps_total_frames = 0;
|
|
|
|
context.last_time = 0;
|
|
|
|
context.video_thread_name = video_thread_name;
|
|
|
|
|
|
|
|
#ifdef __APPLE__
|
|
|
|
while (obs_graphics_thread_loop_autorelease(&context))
|
|
|
|
#else
|
|
|
|
while (obs_graphics_thread_loop(&context))
|
|
|
|
#endif
|
|
|
|
;
|
2013-10-01 04:37:13 +02:00
|
|
|
|
2020-05-21 09:23:26 +02:00
|
|
|
#ifdef _WIN32
|
|
|
|
uninit_winrt_state(&winrt);
|
|
|
|
#endif
|
|
|
|
|
2014-02-14 23:13:36 +01:00
|
|
|
UNUSED_PARAMETER(param);
|
2013-10-01 04:37:13 +02:00
|
|
|
return NULL;
|
|
|
|
}
|