mirror of
https://github.com/obsproject/obs-studio.git
synced 2024-09-20 13:08:50 +02:00
obs-filters: Reduce 3D LUT calculations
Single tap is good enough. Intel UHD Graphics 750: 860 µs -> 591 µs
This commit is contained in:
parent
8e8c29d84c
commit
873e3a0aae
@ -292,10 +292,12 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
|
||||
filter->target = make_clut_texture_png(
|
||||
filter->image.format, filter->image.cx,
|
||||
filter->image.cy, filter->image.texture_data);
|
||||
const float clut_scale = (float)(LUT_WIDTH - 1);
|
||||
const float width_i = 1.0f / (float)LUT_WIDTH;
|
||||
const float clut_scale = 1.0f - width_i;
|
||||
const float offset = 0.5f * width_i;
|
||||
vec3_set(&filter->clut_scale, clut_scale, clut_scale,
|
||||
clut_scale);
|
||||
vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f);
|
||||
vec3_set(&filter->clut_offset, offset, offset, offset);
|
||||
} else if (filter->cube_data) {
|
||||
const uint32_t width = filter->cube_width;
|
||||
if (filter->clut_dim == CLUT_1D) {
|
||||
@ -324,16 +326,13 @@ static void color_grade_filter_update(void *data, obs_data_t *settings)
|
||||
vec3_mul(&filter->clut_offset, &filter->clut_offset,
|
||||
&filter->clut_scale);
|
||||
|
||||
/* 1D shader wants normalized UVW */
|
||||
if (filter->clut_dim == CLUT_1D) {
|
||||
vec3_divf(&filter->clut_scale,
|
||||
&filter->clut_scale, (float)width);
|
||||
|
||||
vec3_addf(&filter->clut_offset,
|
||||
&filter->clut_offset, 0.5f);
|
||||
vec3_divf(&filter->clut_offset,
|
||||
&filter->clut_offset, (float)width);
|
||||
}
|
||||
/* want normalized UVW */
|
||||
vec3_divf(&filter->clut_scale, &filter->clut_scale,
|
||||
(float)width);
|
||||
vec3_addf(&filter->clut_offset, &filter->clut_offset,
|
||||
0.5f);
|
||||
vec3_divf(&filter->clut_offset, &filter->clut_offset,
|
||||
(float)width);
|
||||
}
|
||||
}
|
||||
|
||||
@ -455,9 +454,6 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect)
|
||||
param = gs_effect_get_param_by_name(filter->effect, "domain_max");
|
||||
gs_effect_set_vec3(param, &filter->domain_max);
|
||||
|
||||
param = gs_effect_get_param_by_name(filter->effect, "cube_width_i");
|
||||
gs_effect_set_float(param, 1.0f / filter->cube_width);
|
||||
|
||||
gs_blend_state_push();
|
||||
gs_blend_function(GS_BLEND_ONE, GS_BLEND_INVSRCALPHA);
|
||||
|
||||
|
@ -8,7 +8,6 @@ uniform float3 clut_scale;
|
||||
uniform float3 clut_offset;
|
||||
uniform float3 domain_min;
|
||||
uniform float3 domain_max;
|
||||
uniform float cube_width_i;
|
||||
|
||||
sampler_state textureSampler {
|
||||
Filter = Linear;
|
||||
@ -85,78 +84,8 @@ float4 LUT3D(VertDataOut v_in) : TARGET
|
||||
g >= domain_min.g && g <= domain_max.g &&
|
||||
b >= domain_min.b && b <= domain_max.b)
|
||||
{
|
||||
float3 clut_pos = nonlinear * clut_scale + clut_offset;
|
||||
float3 floor_pos = floor(clut_pos);
|
||||
|
||||
float3 fracRGB = clut_pos - floor_pos;
|
||||
|
||||
float3 uvw0 = (floor_pos + 0.5) * cube_width_i;
|
||||
float3 uvw3 = (floor_pos + 1.5) * cube_width_i;
|
||||
|
||||
float fracL, fracM, fracS;
|
||||
float3 uvw1, uvw2;
|
||||
if (fracRGB.r < fracRGB.g) {
|
||||
if (fracRGB.r < fracRGB.b) {
|
||||
if (fracRGB.g < fracRGB.b) {
|
||||
// f(R) < f(G) < f(B)
|
||||
fracL = fracRGB.b;
|
||||
fracM = fracRGB.g;
|
||||
fracS = fracRGB.r;
|
||||
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
|
||||
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
|
||||
} else {
|
||||
// f(R) < f(B) <= f(G)
|
||||
fracL = fracRGB.g;
|
||||
fracM = fracRGB.b;
|
||||
fracS = fracRGB.r;
|
||||
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
|
||||
uvw2 = float3(uvw0.x, uvw3.y, uvw3.z);
|
||||
}
|
||||
} else {
|
||||
// f(B) <= f(R) < f(G)
|
||||
fracL = fracRGB.g;
|
||||
fracM = fracRGB.r;
|
||||
fracS = fracRGB.b;
|
||||
uvw1 = float3(uvw0.x, uvw3.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
|
||||
}
|
||||
} else if (fracRGB.r < fracRGB.b) {
|
||||
// f(G) <= f(R) < f(B)
|
||||
fracL = fracRGB.b;
|
||||
fracM = fracRGB.r;
|
||||
fracS = fracRGB.g;
|
||||
uvw1 = float3(uvw0.x, uvw0.y, uvw3.z);
|
||||
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
|
||||
} else if (fracRGB.g < fracRGB.b) {
|
||||
// f(G) < f(B) <= f(R)
|
||||
fracL = fracRGB.r;
|
||||
fracM = fracRGB.b;
|
||||
fracS = fracRGB.g;
|
||||
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw0.y, uvw3.z);
|
||||
} else {
|
||||
// f(B) <= f(G) <= f(R)
|
||||
fracL = fracRGB.r;
|
||||
fracM = fracRGB.g;
|
||||
fracS = fracRGB.b;
|
||||
uvw1 = float3(uvw3.x, uvw0.y, uvw0.z);
|
||||
uvw2 = float3(uvw3.x, uvw3.y, uvw0.z);
|
||||
}
|
||||
|
||||
/* use filtering to collapse 4 taps to 2 */
|
||||
/* use max to kill potential zero-divide NaN */
|
||||
|
||||
float coeff01 = (1.0 - fracM);
|
||||
float weight01 = max((fracL - fracM) / coeff01, 0.0);
|
||||
float3 uvw01 = lerp(uvw0, uvw1, weight01);
|
||||
float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb;
|
||||
|
||||
float coeff23 = fracM;
|
||||
float weight23 = max(fracS / coeff23, 0.0);
|
||||
float3 uvw23 = lerp(uvw2, uvw3, weight23);
|
||||
float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb;
|
||||
|
||||
float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23);
|
||||
float3 clut_uvw = nonlinear * clut_scale + clut_offset;
|
||||
float3 luttedColor = clut_3d.Sample(textureSampler, clut_uvw).rgb;
|
||||
textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user