From 873e3a0aaec9c341a403bc0a946644823f7c45ee Mon Sep 17 00:00:00 2001 From: jpark37 Date: Wed, 16 Feb 2022 23:13:19 -0800 Subject: [PATCH] obs-filters: Reduce 3D LUT calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single tap is good enough. Intel UHD Graphics 750: 860 µs -> 591 µs --- plugins/obs-filters/color-grade-filter.c | 26 +++---- .../data/color_grade_filter.effect | 75 +------------------ 2 files changed, 13 insertions(+), 88 deletions(-) diff --git a/plugins/obs-filters/color-grade-filter.c b/plugins/obs-filters/color-grade-filter.c index 42f4e2e31..7daf39111 100644 --- a/plugins/obs-filters/color-grade-filter.c +++ b/plugins/obs-filters/color-grade-filter.c @@ -292,10 +292,12 @@ static void color_grade_filter_update(void *data, obs_data_t *settings) filter->target = make_clut_texture_png( filter->image.format, filter->image.cx, filter->image.cy, filter->image.texture_data); - const float clut_scale = (float)(LUT_WIDTH - 1); + const float width_i = 1.0f / (float)LUT_WIDTH; + const float clut_scale = 1.0f - width_i; + const float offset = 0.5f * width_i; vec3_set(&filter->clut_scale, clut_scale, clut_scale, clut_scale); - vec3_set(&filter->clut_offset, 0.f, 0.f, 0.f); + vec3_set(&filter->clut_offset, offset, offset, offset); } else if (filter->cube_data) { const uint32_t width = filter->cube_width; if (filter->clut_dim == CLUT_1D) { @@ -324,16 +326,13 @@ static void color_grade_filter_update(void *data, obs_data_t *settings) vec3_mul(&filter->clut_offset, &filter->clut_offset, &filter->clut_scale); - /* 1D shader wants normalized UVW */ - if (filter->clut_dim == CLUT_1D) { - vec3_divf(&filter->clut_scale, - &filter->clut_scale, (float)width); - - vec3_addf(&filter->clut_offset, - &filter->clut_offset, 0.5f); - vec3_divf(&filter->clut_offset, - &filter->clut_offset, (float)width); - } + /* want normalized UVW */ + vec3_divf(&filter->clut_scale, &filter->clut_scale, + (float)width); + vec3_addf(&filter->clut_offset, &filter->clut_offset, + 0.5f); + vec3_divf(&filter->clut_offset, &filter->clut_offset, + (float)width); } } @@ -455,9 +454,6 @@ static void color_grade_filter_render(void *data, gs_effect_t *effect) param = gs_effect_get_param_by_name(filter->effect, "domain_max"); gs_effect_set_vec3(param, &filter->domain_max); - param = gs_effect_get_param_by_name(filter->effect, "cube_width_i"); - gs_effect_set_float(param, 1.0f / filter->cube_width); - gs_blend_state_push(); gs_blend_function(GS_BLEND_ONE, GS_BLEND_INVSRCALPHA); diff --git a/plugins/obs-filters/data/color_grade_filter.effect b/plugins/obs-filters/data/color_grade_filter.effect index 7ca3229fe..f265be338 100644 --- a/plugins/obs-filters/data/color_grade_filter.effect +++ b/plugins/obs-filters/data/color_grade_filter.effect @@ -8,7 +8,6 @@ uniform float3 clut_scale; uniform float3 clut_offset; uniform float3 domain_min; uniform float3 domain_max; -uniform float cube_width_i; sampler_state textureSampler { Filter = Linear; @@ -85,78 +84,8 @@ float4 LUT3D(VertDataOut v_in) : TARGET g >= domain_min.g && g <= domain_max.g && b >= domain_min.b && b <= domain_max.b) { - float3 clut_pos = nonlinear * clut_scale + clut_offset; - float3 floor_pos = floor(clut_pos); - - float3 fracRGB = clut_pos - floor_pos; - - float3 uvw0 = (floor_pos + 0.5) * cube_width_i; - float3 uvw3 = (floor_pos + 1.5) * cube_width_i; - - float fracL, fracM, fracS; - float3 uvw1, uvw2; - if (fracRGB.r < fracRGB.g) { - if (fracRGB.r < fracRGB.b) { - if (fracRGB.g < fracRGB.b) { - // f(R) < f(G) < f(B) - fracL = fracRGB.b; - fracM = fracRGB.g; - fracS = fracRGB.r; - uvw1 = float3(uvw0.x, uvw0.y, uvw3.z); - uvw2 = float3(uvw0.x, uvw3.y, uvw3.z); - } else { - // f(R) < f(B) <= f(G) - fracL = fracRGB.g; - fracM = fracRGB.b; - fracS = fracRGB.r; - uvw1 = float3(uvw0.x, uvw3.y, uvw0.z); - uvw2 = float3(uvw0.x, uvw3.y, uvw3.z); - } - } else { - // f(B) <= f(R) < f(G) - fracL = fracRGB.g; - fracM = fracRGB.r; - fracS = fracRGB.b; - uvw1 = float3(uvw0.x, uvw3.y, uvw0.z); - uvw2 = float3(uvw3.x, uvw3.y, uvw0.z); - } - } else if (fracRGB.r < fracRGB.b) { - // f(G) <= f(R) < f(B) - fracL = fracRGB.b; - fracM = fracRGB.r; - fracS = fracRGB.g; - uvw1 = float3(uvw0.x, uvw0.y, uvw3.z); - uvw2 = float3(uvw3.x, uvw0.y, uvw3.z); - } else if (fracRGB.g < fracRGB.b) { - // f(G) < f(B) <= f(R) - fracL = fracRGB.r; - fracM = fracRGB.b; - fracS = fracRGB.g; - uvw1 = float3(uvw3.x, uvw0.y, uvw0.z); - uvw2 = float3(uvw3.x, uvw0.y, uvw3.z); - } else { - // f(B) <= f(G) <= f(R) - fracL = fracRGB.r; - fracM = fracRGB.g; - fracS = fracRGB.b; - uvw1 = float3(uvw3.x, uvw0.y, uvw0.z); - uvw2 = float3(uvw3.x, uvw3.y, uvw0.z); - } - - /* use filtering to collapse 4 taps to 2 */ - /* use max to kill potential zero-divide NaN */ - - float coeff01 = (1.0 - fracM); - float weight01 = max((fracL - fracM) / coeff01, 0.0); - float3 uvw01 = lerp(uvw0, uvw1, weight01); - float3 sample01 = clut_3d.Sample(textureSampler, uvw01).rgb; - - float coeff23 = fracM; - float weight23 = max(fracS / coeff23, 0.0); - float3 uvw23 = lerp(uvw2, uvw3, weight23); - float3 sample23 = clut_3d.Sample(textureSampler, uvw23).rgb; - - float3 luttedColor = (coeff01 * sample01) + (coeff23 * sample23); + float3 clut_uvw = nonlinear * clut_scale + clut_offset; + float3 luttedColor = clut_3d.Sample(textureSampler, clut_uvw).rgb; textureColor.rgb = lerp(textureColor.rgb, luttedColor, clut_amount); }