0
0
mirror of https://github.com/mpv-player/mpv.git synced 2024-09-20 03:52:22 +02:00

libvo: add vo_gl3

This new vo is heavily based on vo_gl.c. It provides better scale
filters, dithering, and optional color management with LittleCMS2.
It requires OpenGL 3.

Many features are enabled by default, so it will be slower than vo_gl.
However, it can be tuned to behave almost as vo_gl.
This commit is contained in:
wm4 2012-03-31 01:13:38 +02:00
parent b00c1335c8
commit 98052873da
10 changed files with 3112 additions and 1 deletions

2
.gitignore vendored
View File

@ -17,3 +17,5 @@
/TAGS /TAGS
/locale /locale
/po /po
libvo/vo_gl3_shaders.h

View File

@ -451,7 +451,7 @@ SRCS_MPLAYER-$(ESD) += libao2/ao_esd.c
SRCS_MPLAYER-$(FBDEV) += libvo/vo_fbdev.c libvo/vo_fbdev2.c SRCS_MPLAYER-$(FBDEV) += libvo/vo_fbdev.c libvo/vo_fbdev2.c
SRCS_MPLAYER-$(GGI) += libvo/vo_ggi.c SRCS_MPLAYER-$(GGI) += libvo/vo_ggi.c
SRCS_MPLAYER-$(GIF) += libvo/vo_gif89a.c SRCS_MPLAYER-$(GIF) += libvo/vo_gif89a.c
SRCS_MPLAYER-$(GL) += libvo/gl_common.c libvo/vo_gl.c \ SRCS_MPLAYER-$(GL) += libvo/gl_common.c libvo/vo_gl.c libvo/vo_gl3.c \
pnm_loader.c pnm_loader.c
SRCS_MPLAYER-$(GL_COCOA) += libvo/cocoa_common.m SRCS_MPLAYER-$(GL_COCOA) += libvo/cocoa_common.m
SRCS_MPLAYER-$(GL_SDL) += libvo/sdl_common.c SRCS_MPLAYER-$(GL_SDL) += libvo/sdl_common.c
@ -510,6 +510,7 @@ SRCS_MPLAYER = command.c \
libao2/audio_out.c \ libao2/audio_out.c \
libvo/aspect.c \ libvo/aspect.c \
libvo/csputils.c \ libvo/csputils.c \
libvo/filter_kernels.c \
libvo/geometry.c \ libvo/geometry.c \
libvo/old_vo_wrapper.c \ libvo/old_vo_wrapper.c \
libvo/spuenc.c \ libvo/spuenc.c \
@ -605,6 +606,11 @@ codec-cfg$(EXESUF): codec-cfg.c codec-cfg.h
codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf
./$^ > $@ ./$^ > $@
libvo/vo_gl3_shaders.h: libvo/vo_gl3_shaders.glsl
python ./bin_to_header.py $^ $@
libvo/vo_gl3.c: libvo/vo_gl3_shaders.h
# ./configure must be rerun if it changed # ./configure must be rerun if it changed
config.mak: configure config.mak: configure
@echo "############################################################" @echo "############################################################"

21
configure vendored
View File

@ -338,6 +338,7 @@ Optional features:
--enable-smb enable Samba (SMB) input [autodetect] --enable-smb enable Samba (SMB) input [autodetect]
--enable-live enable LIVE555 Streaming Media [disable] --enable-live enable LIVE555 Streaming Media [disable]
--enable-nemesi enable Nemesi Streaming Media [autodetect] --enable-nemesi enable Nemesi Streaming Media [autodetect]
--enable-lcms2 enable LCMS2 support [autodetect]
--disable-vcd disable VCD support [autodetect] --disable-vcd disable VCD support [autodetect]
--disable-bluray disable Blu-ray support [autodetect] --disable-bluray disable Blu-ray support [autodetect]
--disable-dvdnav disable libdvdnav [autodetect] --disable-dvdnav disable libdvdnav [autodetect]
@ -637,6 +638,7 @@ _xanim=auto
_real=auto _real=auto
_live=no _live=no
_nemesi=auto _nemesi=auto
_lcms2=auto
_native_rtsp=yes _native_rtsp=yes
_xinerama=auto _xinerama=auto
_mga=auto _mga=auto
@ -990,6 +992,8 @@ for ac_option do
--disable-live) _live=no ;; --disable-live) _live=no ;;
--enable-nemesi) _nemesi=yes ;; --enable-nemesi) _nemesi=yes ;;
--disable-nemesi) _nemesi=no ;; --disable-nemesi) _nemesi=no ;;
--enable-lcms2) _lcms2=yes ;;
--disable-lcms2) _lcms2=no ;;
--enable-xinerama) _xinerama=yes ;; --enable-xinerama) _xinerama=yes ;;
--disable-xinerama) _xinerama=no ;; --disable-xinerama) _xinerama=no ;;
--enable-mga) _mga=yes ;; --enable-mga) _mga=yes ;;
@ -5726,6 +5730,20 @@ else
fi fi
echores "$_qtx" echores "$_qtx"
echocheck "LCMS2 support"
if test "$_lcms2" = auto ; then
_lcms2=no
if pkg_config_add lcms2 ; then
_lcms2=yes
fi
fi
if test "$_lcms2" = yes; then
def_lcms2="#define CONFIG_LCMS2 1"
else
def_lcms2="#undef CONFIG_LCMS2"
fi
echores "$_lcms2"
echocheck "Nemesi Streaming Media libraries" echocheck "Nemesi Streaming Media libraries"
if test "$_nemesi" = auto && test "$networking" = yes ; then if test "$_nemesi" = auto && test "$networking" = yes ; then
_nemesi=no _nemesi=no
@ -6518,6 +6536,7 @@ LIBDV = $_libdv
LIBDVDCSS_INTERNAL = $_libdvdcss_internal LIBDVDCSS_INTERNAL = $_libdvdcss_internal
LIBMAD = $_mad LIBMAD = $_mad
LIBNEMESI = $_nemesi LIBNEMESI = $_nemesi
LCMS2 = $_lcms2
LIBNUT = $_libnut LIBNUT = $_libnut
LIBPOSTPROC = $libpostproc LIBPOSTPROC = $libpostproc
LIBSMBCLIENT = $_smb LIBSMBCLIENT = $_smb
@ -6874,6 +6893,8 @@ $def_smb
$def_socklen_t $def_socklen_t
$def_vstream $def_vstream
$def_lcms2
/* libvo options */ /* libvo options */
$def_3dfx $def_3dfx

279
libvo/filter_kernels.c Normal file
View File

@ -0,0 +1,279 @@
/*
* This file is part of mplayer2.
*
* Most code for computing the weights is taken from Anti-Grain Geometry (AGG)
* (licensed under GPL 2 or later), with modifications.
* Copyright (C) 2002-2006 Maxim Shemanarev
* http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h?view=markup
*
* Also see glumpy (BSD licensed), contains the same code in Python:
* http://code.google.com/p/glumpy/source/browse/glumpy/image/filter.py
*
* Also see: Paul Heckbert's "zoom"
*
* Also see XBMC: ConvolutionKernels.cpp etc.
*
* mplayer2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mplayer2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with mplayer2; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <stddef.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include "filter_kernels.h"
// NOTE: all filters are separable, symmetric, and are intended for use with
// a lookup table/texture.
const struct filter_kernel *mp_find_filter_kernel(const char *name)
{
for (const struct filter_kernel *k = mp_filter_kernels; k->name; k++) {
if (strcmp(k->name, name) == 0)
return k;
}
return NULL;
}
// sizes = sorted list of available filter sizes, terminated with size 0
// inv_scale = source_size / dest_size
bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
double inv_scale)
{
// only downscaling requires widening the filter
filter->inv_scale = inv_scale >= 1.0 ? inv_scale : 1.0;
double support = filter->radius * filter->inv_scale;
int size = ceil(2.0 * support);
// round up to smallest available size that's still large enough
if (size < sizes[0])
size = sizes[0];
const int *cursize = sizes;
while (size > *cursize && *cursize)
cursize++;
if (*cursize) {
filter->size = *cursize;
return true;
} else {
// The filter doesn't fit - instead of failing completely, use the
// largest filter available. This is incorrect, but better than refusing
// to do anything.
filter->size = cursize[-1];
filter->inv_scale = filter->size / 2.0 / filter->radius;
return false;
}
}
// Calculate the 1D filtering kernel for N sample points.
// N = number of samples, which is filter->size
// The weights will be stored in out_w[0] to out_w[N - 1]
// f = x0 - abs(x0), subpixel position in the range [0,1) or [0,1].
void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w)
{
assert(filter->size > 0);
double sum = 0;
for (int n = 0; n < filter->size; n++) {
double x = f - (n - filter->size / 2 + 1);
double w = filter->weight(filter, fabs(x) / filter->inv_scale);
out_w[n] = w;
sum += w;
}
//normalize
for (int n = 0; n < filter->size; n++)
out_w[n] /= sum;
}
// Fill the given array with weights for the range [0.0, 1.0]. The array is
// interpreted as rectangular array of count * filter->size items.
void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
{
for (int n = 0; n < count; n++) {
mp_compute_weights(filter, n / (double)(count - 1),
out_array + filter->size * n);
}
}
typedef struct filter_kernel kernel;
static double bilinear(kernel *k, double x)
{
return 1.0 - x;
}
static double hanning(kernel *k, double x)
{
return 0.5 + 0.5 * cos(M_PI * x);
}
static double hamming(kernel *k, double x)
{
return 0.54 + 0.46 * cos(M_PI * x);
}
static double hermite(kernel *k, double x)
{
return (2.0 * x - 3.0) * x * x + 1.0;
}
static double quadric(kernel *k, double x)
{
// NOTE: glumpy uses 0.75, AGG uses 0.5
if (x < 0.5)
return 0.75 - x * x;
if (x < 1.5)
return 0.5 * (x - 1.5) * (x - 1.5);
return 0;
}
static double bc_pow3(double x)
{
return (x <= 0) ? 0 : x * x * x;
}
static double bicubic(kernel *k, double x)
{
return (1.0/6.0) * ( bc_pow3(x + 2)
- 4 * bc_pow3(x + 1)
+ 6 * bc_pow3(x)
- 4 * bc_pow3(x - 1));
}
static double bessel_i0(double epsilon, double x)
{
double sum = 1;
double y = x * x / 4;
double t = y;
for (int i = 2; t > epsilon; i++) {
sum += t;
t *= y / (i * i);
}
return sum;
}
static double kaiser(kernel *k, double x)
{
double a = k->params[0];
double b = k->params[1];
double epsilon = 1e-12;
double i0a = 1 / bessel_i0(epsilon, b);
return bessel_i0(epsilon, a * sqrt(1 - x * x)) * i0a;
}
static double catmull_rom(kernel *k, double x)
{
if (x < 1.0)
return 0.5 * (2.0 + x * x * (-5.0 + x * 3.0));
if (x < 2.0)
return 0.5 * (4.0 + x * (-8.0 + x * (5.0 - x)));
return 0;
}
// Mitchell-Netravali
static double mitchell(kernel *k, double x)
{
double b = k->params[0];
double c = k->params[1];
double
p0 = (6.0 - 2.0 * b) / 6.0,
p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0,
p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0,
q0 = (8.0 * b + 24.0 * c) / 6.0,
q1 = (-12.0 * b - 48.0 * c) / 6.0,
q2 = (6.0 * b + 30.0 * c) / 6.0,
q3 = (-b - 6.0 * c) / 6.0;
if (x < 1.0)
return p0 + x * x * (p2 + x * p3);
if (x < 2.0)
return q0 + x * (q1 + x * (q2 + x * q3));
return 0;
}
static double spline16(kernel *k, double x)
{
if (x < 1.0)
return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0;
return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1);
}
static double spline36(kernel *k, double x)
{
if(x < 1.0)
return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0;
if(x < 2.0)
return ((-6.0/11.0 * (x - 1) + 270.0/209.0) * (x - 1) - 156.0/209.0)
* (x - 1);
return ((1.0/11.0 * (x - 2) - 45.0/209.0) * (x - 2) + 26.0/209.0)
* (x - 2);
}
static double gaussian(kernel *k, double x)
{
return exp(-2.0 * x * x) * sqrt(2.0 / M_PI);
}
static double sinc(kernel *k, double x)
{
if (x == 0.0)
return 1.0;
double pix = M_PI * x;
return sin(pix) / pix;
}
static double lanczos(kernel *k, double x)
{
double radius = k->size / 2;
if (x < -radius || x > radius)
return 0;
if (x == 0)
return 1;
double pix = M_PI * x;
return radius * sin(pix) * sin(pix / radius) / (pix * pix);
}
static double blackman(kernel *k, double x)
{
double radius = k->size / 2;
if (x == 0.0)
return 1.0;
if (x > radius)
return 0.0;
x *= M_PI;
double xr = x / radius;
return (sin(x) / x) * (0.42 + 0.5 * cos(xr) + 0.08 * cos(2 * xr));
}
const struct filter_kernel mp_filter_kernels[] = {
{"bilinear_slow", 1, bilinear},
{"hanning", 1, hanning},
{"hamming", 1, hamming},
{"hermite", 1, hermite},
{"quadric", 1.5, quadric},
{"bicubic", 2, bicubic},
{"kaiser", 1, kaiser, .params = {6.33, 6.33} },
{"catmull_rom", 2, catmull_rom},
{"mitchell", 2, mitchell, .params = {1.0/3.0, 1.0/3.0} },
{"spline16", 2, spline16},
{"spline36", 3, spline36},
{"gaussian", 2, gaussian},
{"sinc2", 2, sinc},
{"sinc3", 3, sinc},
{"sinc4", 4, sinc},
{"lanczos2", 2, lanczos},
{"lanczos3", 3, lanczos},
{"lanczos4", 4, lanczos},
{"blackman2", 2, blackman},
{"blackman3", 3, blackman},
{"blackman4", 4, blackman},
{0}
};

45
libvo/filter_kernels.h Normal file
View File

@ -0,0 +1,45 @@
/*
* This file is part of mplayer2.
*
* mplayer2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mplayer2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with mplayer2; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MPLAYER_FILTER_KERNELS_H
#define MPLAYER_FILTER_KERNELS_H
#include <stdbool.h>
struct filter_kernel {
const char *name;
double radius;
double (*weight)(struct filter_kernel *kernel, double x);
// The filter params can be changed at runtime. Only used by some filters.
float params[2];
// The following values are set by mp_init_filter() at runtime.
// Number of coefficients; equals the rounded up radius multiplied with 2.
int size;
double inv_scale;
};
extern const struct filter_kernel mp_filter_kernels[];
const struct filter_kernel *mp_find_filter_kernel(const char *name);
bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
double scale);
void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w);
void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array);
#endif /* MPLAYER_FILTER_KERNELS_H */

View File

@ -2587,3 +2587,19 @@ void uninit_mpglcontext(MPGLContext *ctx)
} }
talloc_free(ctx); talloc_free(ctx);
} }
void mp_log_source(int mod, int lev, const char *src)
{
int line = 1;
if (!src)
return;
while (*src) {
const char *end = strchr(src, '\n');
const char *next = end + 1;
if (!end)
next = end = src + strlen(src);
mp_msg(mod, lev, "[%3d] %.*s\n", line, (int)(end - src), src);
line++;
src = next;
}
}

View File

@ -431,6 +431,10 @@ void uninit_mpglcontext(MPGLContext *ctx);
int create_mpglcontext(struct MPGLContext *ctx, int gl_flags, int gl_version, int create_mpglcontext(struct MPGLContext *ctx, int gl_flags, int gl_version,
uint32_t d_width, uint32_t d_height, uint32_t flags); uint32_t d_width, uint32_t d_height, uint32_t flags);
// print a multi line string with line numbers (e.g. for shader sources)
// mod, lev: module and log level, as in mp_msg()
void mp_log_source(int mod, int lev, const char *src);
//function pointers loaded from the OpenGL library //function pointers loaded from the OpenGL library
struct GL { struct GL {
void (GLAPIENTRY *Begin)(GLenum); void (GLAPIENTRY *Begin)(GLenum);

View File

@ -84,6 +84,7 @@ extern struct vo_driver video_out_vdpau;
extern struct vo_driver video_out_xv; extern struct vo_driver video_out_xv;
extern struct vo_driver video_out_gl_nosw; extern struct vo_driver video_out_gl_nosw;
extern struct vo_driver video_out_gl; extern struct vo_driver video_out_gl;
extern struct vo_driver video_out_gl3;
extern struct vo_driver video_out_dga; extern struct vo_driver video_out_dga;
extern struct vo_driver video_out_sdl; extern struct vo_driver video_out_sdl;
extern struct vo_driver video_out_3dfx; extern struct vo_driver video_out_3dfx;
@ -169,6 +170,9 @@ const struct vo_driver *video_out_drivers[] =
#ifdef CONFIG_XV #ifdef CONFIG_XV
&video_out_xv, &video_out_xv,
#endif #endif
#ifdef CONFIG_GL
&video_out_gl3,
#endif
#ifdef CONFIG_X11 #ifdef CONFIG_X11
#ifdef CONFIG_GL #ifdef CONFIG_GL
&video_out_gl_nosw, &video_out_gl_nosw,

2418
libvo/vo_gl3.c Normal file

File diff suppressed because it is too large Load Diff

316
libvo/vo_gl3_shaders.glsl Normal file
View File

@ -0,0 +1,316 @@
/*
* This file is part of mplayer2.
*
* mplayer2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* mplayer2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with mplayer2; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
// Note that this file is not directly passed as shader, but run through some
// text processing functions, and in fact contains multiple vertex and fragment
// shaders.
// inserted at the beginning of all shaders
#!section prelude
#!section vertex_all
uniform mat3 transform;
uniform sampler3D lut_3d;
in vec2 vertex_position;
in vec4 vertex_color;
out vec4 color;
in vec2 vertex_texcoord;
out vec2 texcoord;
void main() {
vec3 position = vec3(vertex_position, 1);
#ifndef FIXED_SCALE
position = transform * position;
#endif
gl_Position = vec4(position, 1);
color = vertex_color;
#ifdef USE_3DLUT
color = vec4(texture(lut_3d, color.rgb).rgb, color.a);
#endif
texcoord = vertex_texcoord;
}
#!section frag_eosd
uniform sampler2D texture1;
in vec2 texcoord;
in vec4 color;
out vec4 out_color;
void main() {
out_color = vec4(color.rgb, color.a * texture(texture1, texcoord).r);
}
#!section frag_osd
uniform sampler2D texture1;
in vec2 texcoord;
in vec4 color;
out vec4 out_color;
void main() {
out_color = texture(texture1, texcoord).rrrg * color;
}
#!section frag_video
uniform sampler2D texture1;
uniform sampler2D texture2;
uniform sampler2D texture3;
uniform sampler1D lut_c_1d;
uniform sampler1D lut_l_1d;
uniform sampler2D lut_c_2d;
uniform sampler2D lut_l_2d;
uniform sampler3D lut_3d;
uniform sampler2D dither;
uniform mat4x3 colormatrix;
uniform vec3 inv_gamma;
uniform float conv_gamma;
uniform float dither_quantization;
uniform float dither_multiply;
uniform float filter_param1;
in vec2 texcoord;
out vec4 out_color;
vec4 sample_bilinear(sampler2D tex, vec2 texcoord) {
return texture(tex, texcoord);
}
// Explanation how bicubic scaling with only 4 texel fetches is done:
// http://www.mate.tue.nl/mate/pdfs/10318.pdf
// 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
// Explanation why this algorithm normally always blurs, even with unit scaling:
// http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
// 'GPU Prefilter for Accurate Cubic B-spline Interpolation'
vec4 calcweights(float s) {
vec4 t = vec4(-0.5, 0.1666, 0.3333, -0.3333) * s + vec4(1, 0, -0.5, 0.5);
t = t * s + vec4(0, 0, -0.5, 0.5);
t = t * s + vec4(-0.6666, 0, 0.8333, 0.1666);
vec2 a = vec2(1 / t.z, 1 / t.w);
t.xy = t.xy * a + vec2(1, 1);
t.x = t.x + s;
t.y = t.y - s;
return t;
}
vec4 sample_bicubic_fast(sampler2D tex, vec2 texcoord) {
vec2 texsize = textureSize(tex, 0);
vec2 pt = 1 / texsize;
vec2 fcoord = fract(texcoord * texsize + vec2(0.5, 0.5));
vec4 parmx = calcweights(fcoord.x);
vec4 parmy = calcweights(fcoord.y);
vec4 cdelta;
cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);
cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);
// first y-interpolation
vec4 ar = texture(tex, texcoord + cdelta.xy);
vec4 ag = texture(tex, texcoord + cdelta.xw);
vec4 ab = mix(ag, ar, parmy.b);
// second y-interpolation
vec4 br = texture(tex, texcoord + cdelta.zy);
vec4 bg = texture(tex, texcoord + cdelta.zw);
vec4 aa = mix(bg, br, parmy.b);
// x-interpolation
return mix(aa, ab, parmx.b);
}
float[2] weights2(sampler1D lookup, float f) {
vec4 c = texture(lookup, f);
return float[2](c.r, c.g);
}
float[4] weights4(sampler1D lookup, float f) {
vec4 c = texture(lookup, f);
return float[4](c.r, c.g, c.b, c.a);
}
float[6] weights6(sampler2D lookup, float f) {
vec4 c1 = texture(lookup, vec2(0.25, f));
vec4 c2 = texture(lookup, vec2(0.75, f));
return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);
}
float[8] weights8(sampler2D lookup, float f) {
vec4 c1 = texture(lookup, vec2(0.25, f));
vec4 c2 = texture(lookup, vec2(0.75, f));
return float[8](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a);
}
float[12] weights12(sampler2D lookup, float f) {
vec4 c1 = texture(lookup, vec2(1.0/6.0, f));
vec4 c2 = texture(lookup, vec2(0.5, f));
vec4 c3 = texture(lookup, vec2(5.0/6.0, f));
return float[12](c1.r, c1.g, c1.b, c1.a,
c2.r, c2.g, c2.b, c2.a,
c3.r, c3.g, c3.b, c3.a);
}
float[16] weights16(sampler2D lookup, float f) {
vec4 c1 = texture(lookup, vec2(0.125, f));
vec4 c2 = texture(lookup, vec2(0.375, f));
vec4 c3 = texture(lookup, vec2(0.625, f));
vec4 c4 = texture(lookup, vec2(0.875, f));
return float[16](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a,
c3.r, c3.g, c3.b, c3.a, c4.r, c4.g, c4.b, c4.a);
}
#define CONVOLUTION_SEP_N(NAME, N) \
vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float weights[N]) { \
vec4 res = vec4(0); \
for (int n = 0; n < N; n++) { \
res += weights[n] * texture(tex, texcoord + pt * n); \
} \
return res; \
}
CONVOLUTION_SEP_N(convolution_sep2, 2)
CONVOLUTION_SEP_N(convolution_sep4, 4)
CONVOLUTION_SEP_N(convolution_sep6, 6)
CONVOLUTION_SEP_N(convolution_sep8, 8)
CONVOLUTION_SEP_N(convolution_sep12, 12)
CONVOLUTION_SEP_N(convolution_sep16, 16)
// The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to
// remove all the redundant multiplications and additions.
#define SAMPLE_CONVOLUTION_SEP_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC)\
vec4 NAME(vec2 dir, SAMPLERT lookup, sampler2D tex, vec2 texcoord) { \
vec2 texsize = textureSize(tex, 0); \
vec2 pt = (1 / texsize) * dir; \
float fcoord = dot(fract(texcoord * texsize - 0.5), dir); \
vec2 base = texcoord - fcoord * pt; \
return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt, \
WEIGHTS_FUNC(lookup, fcoord)); \
}
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, sampler1D, convolution_sep2, weights2)
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, sampler1D, convolution_sep4, weights4)
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, sampler2D, convolution_sep6, weights6)
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, sampler2D, convolution_sep8, weights8)
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, sampler2D, convolution_sep12, weights12)
SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, sampler2D, convolution_sep16, weights16)
#define CONVOLUTION_N(NAME, N) \
vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float taps_x[N], \
float taps_y[N]) { \
vec4 res = vec4(0); \
for (int y = 0; y < N; y++) { \
vec4 line = vec4(0); \
for (int x = 0; x < N; x++) \
line += taps_x[x] * texture(tex, texcoord + pt * vec2(x, y));\
res += taps_y[y] * line; \
} \
return res; \
}
CONVOLUTION_N(convolution2, 2)
CONVOLUTION_N(convolution4, 4)
CONVOLUTION_N(convolution6, 6)
CONVOLUTION_N(convolution8, 8)
CONVOLUTION_N(convolution12, 12)
CONVOLUTION_N(convolution16, 16)
#define SAMPLE_CONVOLUTION_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC) \
vec4 NAME(SAMPLERT lookup, sampler2D tex, vec2 texcoord) { \
vec2 texsize = textureSize(tex, 0); \
vec2 pt = 1 / texsize; \
vec2 fcoord = fract(texcoord * texsize - 0.5); \
vec2 base = texcoord - fcoord * pt; \
return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt, \
WEIGHTS_FUNC(lookup, fcoord.x), \
WEIGHTS_FUNC(lookup, fcoord.y)); \
}
SAMPLE_CONVOLUTION_N(sample_convolution2, 2, sampler1D, convolution2, weights2)
SAMPLE_CONVOLUTION_N(sample_convolution4, 4, sampler1D, convolution4, weights4)
SAMPLE_CONVOLUTION_N(sample_convolution6, 6, sampler2D, convolution6, weights6)
SAMPLE_CONVOLUTION_N(sample_convolution8, 8, sampler2D, convolution8, weights8)
SAMPLE_CONVOLUTION_N(sample_convolution12, 12, sampler2D, convolution12, weights12)
SAMPLE_CONVOLUTION_N(sample_convolution16, 16, sampler2D, convolution16, weights16)
// Unsharp masking
vec4 sample_sharpen3(sampler2D tex, vec2 texcoord) {
vec2 texsize = textureSize(tex, 0);
vec2 pt = 1 / texsize;
vec2 st = pt * 0.5;
vec4 p = texture(tex, texcoord);
vec4 sum = texture(tex, texcoord + st * vec2(+1, +1))
+ texture(tex, texcoord + st * vec2(+1, -1))
+ texture(tex, texcoord + st * vec2(-1, +1))
+ texture(tex, texcoord + st * vec2(-1, -1));
return p + (p - 0.25 * sum) * filter_param1;
}
vec4 sample_sharpen5(sampler2D tex, vec2 texcoord) {
vec2 texsize = textureSize(tex, 0);
vec2 pt = 1 / texsize;
vec2 st1 = pt * 1.2;
vec4 p = texture(tex, texcoord);
vec4 sum1 = texture(tex, texcoord + st1 * vec2(+1, +1))
+ texture(tex, texcoord + st1 * vec2(+1, -1))
+ texture(tex, texcoord + st1 * vec2(-1, +1))
+ texture(tex, texcoord + st1 * vec2(-1, -1));
vec2 st2 = pt * 1.5;
vec4 sum2 = texture(tex, texcoord + st2 * vec2(+1, 0))
+ texture(tex, texcoord + st2 * vec2( 0, +1))
+ texture(tex, texcoord + st2 * vec2(-1, 0))
+ texture(tex, texcoord + st2 * vec2( 0, -1));
vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;
return p + t * filter_param1;
}
void main() {
#ifdef USE_PLANAR
vec3 color = vec3(SAMPLE_L(texture1, texcoord).r,
SAMPLE_C(texture2, texcoord).r,
SAMPLE_C(texture3, texcoord).r);
#else
vec3 color = SAMPLE_L(texture1, texcoord).rgb;
#endif
#ifdef USE_GBRP
color.gbr = color;
#endif
#ifdef USE_YGRAY
// NOTE: actually slightly wrong for 16 bit input video, and completely
// wrong for 9/10 bit input
color.gb = vec2(128.0/255.0);
#endif
#ifdef USE_COLORMATRIX
color = mat3(colormatrix) * color + colormatrix[3];
#endif
#ifdef USE_LINEAR_CONV
color = pow(color, vec3(2.2));
#endif
#ifdef USE_LINEAR_CONV_INV
// Convert from linear RGB to gamma RGB before putting it through the 3D-LUT
// in the final stage.
color = pow(color, vec3(1.0/2.2));
#endif
#ifdef USE_GAMMA_POW
color = pow(color, inv_gamma);
#endif
#ifdef USE_3DLUT
color = texture(lut_3d, color).rgb;
#endif
#ifdef USE_DITHER
float dither = texture(dither, gl_FragCoord.xy / textureSize(dither, 0)).r;
color = floor(color * dither_multiply + dither ) / dither_quantization;
#endif
out_color = vec4(color, 1);
}