0
0
mirror of https://github.com/obsproject/obs-studio.git synced 2024-09-20 21:13:04 +02:00
obs-studio/libobs/data/format_conversion.effect
jp9000 ca8a9fb5a7 libobs: Fix conversion shader D3D display bug
Just for a quick background: D3D's fmod intrinsic is very imprecise.
Naturally floating points aren't precise at all, and when the numbers
you're dealing with become very large, it can often be off by 0.1 or
more.

However, apparently 0.1 isn't enough of an offset to ensure a proper
value when using the fmod intrinsic and then flooring the value.  0.2
seems to fix the issue and make the image display properly.
2014-12-09 14:21:01 -08:00

310 lines
7.7 KiB
Plaintext

/******************************************************************************
Copyright (C) 2014 by Hugh Bailey <obs.jim@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
******************************************************************************/
//#define DEBUGGING
uniform float4x4 ViewProj;
uniform float u_plane_offset;
uniform float v_plane_offset;
uniform float width;
uniform float height;
uniform float width_i;
uniform float height_i;
uniform float width_d2;
uniform float height_d2;
uniform float width_d2_i;
uniform float height_d2_i;
uniform float input_width;
uniform float input_height;
uniform float input_width_i;
uniform float input_height_i;
uniform float input_width_i_d2;
uniform float input_height_i_d2;
uniform texture2d image;
sampler_state def_sampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
};
struct VertInOut {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VertInOut VSDefault(VertInOut vert_in)
{
VertInOut vert_out;
vert_out.pos = mul(float4(vert_in.pos.xyz, 1.0), ViewProj);
vert_out.uv = vert_in.uv;
return vert_out;
}
/* used to prevent internal GPU precision issues width fmod in particular */
#define PRECISION_OFFSET 0.2
float4 PSNV12(VertInOut vert_in) : TARGET
{
#ifdef _OPENGL
float v_mul = floor((1.0 - vert_in.uv.y) * input_height);
#else
float v_mul = floor(vert_in.uv.y * input_height);
#endif
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
return transpose(out_val)[1];
} else {
#ifdef DEBUGGING
return float4(0.5, 0.2, 0.5, 0.2);
#endif
float new_offset = byte_offset - u_plane_offset;
float ch_u = floor(fmod(new_offset, width)) * width_i;
float ch_v = floor(new_offset * width_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u + width_i2, ch_v);
return float4(
image.Sample(def_sampler, sample_pos[0]).rb,
image.Sample(def_sampler, sample_pos[1]).rb
);
}
}
float4 PSPlanar420(VertInOut vert_in) : TARGET
{
#ifdef _OPENGL
float v_mul = floor((1.0 - vert_in.uv.y) * input_height);
#else
float v_mul = floor(vert_in.uv.y * input_height);
#endif
float byte_offset = floor((v_mul + vert_in.uv.x) * width) * 4.0;
byte_offset += PRECISION_OFFSET;
float2 sample_pos[4];
if (byte_offset < u_plane_offset) {
#ifdef DEBUGGING
return float4(1.0, 1.0, 1.0, 1.0);
#endif
float lum_u = floor(fmod(byte_offset, width)) * width_i;
float lum_v = floor(byte_offset * width_i) * height_i;
/* move to texel centers to sample the 4 pixels properly */
lum_u += width_i * 0.5;
lum_v += height_i * 0.5;
sample_pos[0] = float2(lum_u, lum_v);
sample_pos[1] = float2(lum_u += width_i, lum_v);
sample_pos[2] = float2(lum_u += width_i, lum_v);
sample_pos[3] = float2(lum_u + width_i, lum_v);
} else {
#ifdef DEBUGGING
return ((byte_offset < v_plane_offset) ?
float4(0.5, 0.5, 0.5, 0.5) :
float4(0.2, 0.2, 0.2, 0.2));
#endif
float new_offset = byte_offset -
((byte_offset < v_plane_offset) ?
u_plane_offset : v_plane_offset);
float ch_u = floor(fmod(new_offset, width_d2)) * width_d2_i;
float ch_v = floor(new_offset * width_d2_i) * height_d2_i;
float width_i2 = width_i*2.0;
/* move to the borders of each set of 4 pixels to force it
* to do bilinear averaging */
ch_u += width_i;
ch_v += height_i;
sample_pos[0] = float2(ch_u, ch_v);
sample_pos[1] = float2(ch_u += width_i2, ch_v);
sample_pos[2] = float2(ch_u += width_i2, ch_v);
sample_pos[3] = float2(ch_u + width_i2, ch_v);
}
float4x4 out_val = float4x4(
image.Sample(def_sampler, sample_pos[0]),
image.Sample(def_sampler, sample_pos[1]),
image.Sample(def_sampler, sample_pos[2]),
image.Sample(def_sampler, sample_pos[3])
);
out_val = transpose(out_val);
if (byte_offset < u_plane_offset)
return out_val[1];
else if (byte_offset < v_plane_offset)
return out_val[0];
else
return out_val[2];
}
float4 PSPacked422_Reverse(VertInOut vert_in, int u_pos, int v_pos,
int y0_pos, int y1_pos) : TARGET
{
float y = vert_in.uv.y;
#ifdef _OPENGL
y = 1. - y;
#endif
float odd = floor(fmod(width * vert_in.uv.x + PRECISION_OFFSET, 2.0));
float x = floor(width_d2 * vert_in.uv.x + PRECISION_OFFSET) *
width_d2_i;
x += input_width_i_d2;
float4 texel = image.Sample(def_sampler, float2(x, y));
return float4(odd > 0.5 ? texel[y1_pos] : texel[y0_pos],
texel[u_pos], texel[v_pos], 1.0);
}
float GetOffsetColor(float offset)
{
float2 uv;
offset += PRECISION_OFFSET;
uv.x = floor(fmod(offset, input_width)) * input_width_i;
uv.y = floor(offset * input_width_i) * input_height_i;
uv.xy += float2(input_width_i_d2, input_height_i_d2);
return image.Sample(def_sampler, uv).r;
}
float4 PSPlanar420_Reverse(VertInOut vert_in) : TARGET
{
float x = vert_in.uv.x;
float y = vert_in.uv.y;
#ifdef _OPENGL
y = 1. - y;
#endif
float x_offset = floor(x * width + PRECISION_OFFSET);
float y_offset = floor(y * height + PRECISION_OFFSET);
float lum_offset = y_offset * width + x_offset + PRECISION_OFFSET;
lum_offset = floor(lum_offset);
float ch_offset = floor(y_offset * 0.5 + PRECISION_OFFSET) * width_d2 +
(x_offset * 0.5) + PRECISION_OFFSET;
ch_offset = floor(ch_offset);
return float4(
GetOffsetColor(lum_offset),
GetOffsetColor(u_plane_offset + ch_offset),
GetOffsetColor(v_plane_offset + ch_offset),
1.0
);
}
technique Planar420
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420(vert_in);
}
}
technique NV12
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSNV12(vert_in);
}
}
technique UYVY_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 2, 0, 1, 3);
}
}
technique YUY2_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 1, 3, 2, 0);
}
}
technique YVYU_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPacked422_Reverse(vert_in, 3, 1, 2, 0);
}
}
technique I420_Reverse
{
pass
{
vertex_shader = VSDefault(vert_in);
pixel_shader = PSPlanar420_Reverse(vert_in);
}
}