libvo: add vo_gl3

This new vo is heavily based on vo_gl.c. It provides better scale filters, dithering, and optional color management with LittleCMS2. It requires OpenGL 3. Many features are enabled by default, so it will be slower than vo_gl. However, it can be tuned to behave almost as vo_gl.
2024-09-20 03:52:22 +02:00 · 2012-03-31 01:13:38 +02:00 · 2012-03-31 01:13:38 +02:00 · 98052873da
commit 98052873da
parent b00c1335c8
10 changed files with 3112 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -17,3 +17,5 @@
 /TAGS
 /locale
 /po
 libvo/vo_gl3_shaders.h
--- a/8
+++ b/8
@ -451,7 +451,7 @@ SRCS_MPLAYER-$(ESD)          += libao2/ao_esd.c
 SRCS_MPLAYER-$(FBDEV)        += libvo/vo_fbdev.c libvo/vo_fbdev2.c
 SRCS_MPLAYER-$(GGI)          += libvo/vo_ggi.c
 SRCS_MPLAYER-$(GIF)          += libvo/vo_gif89a.c
-SRCS_MPLAYER-$(GL)           += libvo/gl_common.c libvo/vo_gl.c \
+SRCS_MPLAYER-$(GL)           += libvo/gl_common.c libvo/vo_gl.c libvo/vo_gl3.c \
                                pnm_loader.c
 SRCS_MPLAYER-$(GL_COCOA)     += libvo/cocoa_common.m
 SRCS_MPLAYER-$(GL_SDL)       += libvo/sdl_common.c
@ -510,6 +510,7 @@ SRCS_MPLAYER = command.c \
               libao2/audio_out.c \
               libvo/aspect.c \
               libvo/csputils.c \
               libvo/filter_kernels.c \
               libvo/geometry.c \
               libvo/old_vo_wrapper.c \
               libvo/spuenc.c \
@ -605,6 +606,11 @@ codec-cfg$(EXESUF): codec-cfg.c codec-cfg.h
 codecs.conf.h: codec-cfg$(EXESUF) etc/codecs.conf
 	./$^ > $@
 libvo/vo_gl3_shaders.h: libvo/vo_gl3_shaders.glsl
 	python ./bin_to_header.py $^ $@
 libvo/vo_gl3.c: libvo/vo_gl3_shaders.h
 # ./configure must be rerun if it changed
 config.mak: configure
 	@echo "############################################################"
--- a/21
+++ b/21
@ -338,6 +338,7 @@ Optional features:
  --enable-smb           enable Samba (SMB) input [autodetect]
  --enable-live          enable LIVE555 Streaming Media [disable]
  --enable-nemesi        enable Nemesi Streaming Media [autodetect]
  --enable-lcms2         enable LCMS2 support [autodetect]
  --disable-vcd          disable VCD support [autodetect]
  --disable-bluray       disable Blu-ray support [autodetect]
  --disable-dvdnav       disable libdvdnav [autodetect]
@ -637,6 +638,7 @@ _xanim=auto
 _real=auto
 _live=no
 _nemesi=auto
 _lcms2=auto
 _native_rtsp=yes
 _xinerama=auto
 _mga=auto
@ -990,6 +992,8 @@ for ac_option do
  --disable-live)       _live=no        ;;
  --enable-nemesi)      _nemesi=yes     ;;
  --disable-nemesi)     _nemesi=no      ;;
  --enable-lcms2)       _lcms2=yes      ;;
  --disable-lcms2)      _lcms2=no       ;;
  --enable-xinerama)    _xinerama=yes   ;;
  --disable-xinerama)   _xinerama=no    ;;
  --enable-mga)         _mga=yes        ;;
@ -5726,6 +5730,20 @@ else
 fi
 echores "$_qtx"
 echocheck "LCMS2 support"
 if test "$_lcms2" = auto ; then
    _lcms2=no
    if pkg_config_add lcms2 ; then
        _lcms2=yes
    fi
 fi
 if test "$_lcms2" = yes; then
    def_lcms2="#define CONFIG_LCMS2 1"
 else
    def_lcms2="#undef CONFIG_LCMS2"
 fi
 echores "$_lcms2"
 echocheck "Nemesi Streaming Media libraries"
 if test "$_nemesi" = auto  && test "$networking" = yes ; then
    _nemesi=no
@ -6518,6 +6536,7 @@ LIBDV = $_libdv
 LIBDVDCSS_INTERNAL = $_libdvdcss_internal
 LIBMAD = $_mad
 LIBNEMESI = $_nemesi
 LCMS2 = $_lcms2
 LIBNUT = $_libnut
 LIBPOSTPROC = $libpostproc
 LIBSMBCLIENT = $_smb
@ -6874,6 +6893,8 @@ $def_smb
 $def_socklen_t
 $def_vstream
 $def_lcms2
 /* libvo options */
 $def_3dfx
--- a/libvo/filter_kernels.c
+++ b/libvo/filter_kernels.c
@ -0,0 +1,279 @@
 /*
 * This file is part of mplayer2.
 *
 * Most code for computing the weights is taken from Anti-Grain Geometry (AGG)
 * (licensed under GPL 2 or later), with modifications.
 * Copyright (C) 2002-2006 Maxim Shemanarev
 * http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h?view=markup
 *
 * Also see glumpy (BSD licensed), contains the same code in Python:
 * http://code.google.com/p/glumpy/source/browse/glumpy/image/filter.py
 *
 * Also see: Paul Heckbert's "zoom"
 *
 * Also see XBMC: ConvolutionKernels.cpp etc.
 *
 * mplayer2 is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mplayer2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with mplayer2; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #include <stddef.h>
 #include <string.h>
 #include <math.h>
 #include <assert.h>
 #include "filter_kernels.h"
 // NOTE: all filters are separable, symmetric, and are intended for use with
 //       a lookup table/texture.
 const struct filter_kernel *mp_find_filter_kernel(const char *name)
 {
    for (const struct filter_kernel *k = mp_filter_kernels; k->name; k++) {
        if (strcmp(k->name, name) == 0)
            return k;
    }
    return NULL;
 }
 // sizes = sorted list of available filter sizes, terminated with size 0
 // inv_scale = source_size / dest_size
 bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
                    double inv_scale)
 {
    // only downscaling requires widening the filter
    filter->inv_scale = inv_scale >= 1.0 ? inv_scale : 1.0;
    double support = filter->radius * filter->inv_scale;
    int size = ceil(2.0 * support);
    // round up to smallest available size that's still large enough
    if (size < sizes[0])
        size = sizes[0];
    const int *cursize = sizes;
    while (size > *cursize && *cursize)
        cursize++;
    if (*cursize) {
        filter->size = *cursize;
        return true;
    } else {
        // The filter doesn't fit - instead of failing completely, use the
        // largest filter available. This is incorrect, but better than refusing
        // to do anything.
        filter->size = cursize[-1];
        filter->inv_scale = filter->size / 2.0 / filter->radius;
        return false;
    }
 }
 // Calculate the 1D filtering kernel for N sample points.
 // N = number of samples, which is filter->size
 // The weights will be stored in out_w[0] to out_w[N - 1]
 // f = x0 - abs(x0), subpixel position in the range [0,1) or [0,1].
 void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w)
 {
    assert(filter->size > 0);
    double sum = 0;
    for (int n = 0; n < filter->size; n++) {
        double x = f - (n - filter->size / 2 + 1);
        double w = filter->weight(filter, fabs(x) / filter->inv_scale);
        out_w[n] = w;
        sum += w;
    }
    //normalize
    for (int n = 0; n < filter->size; n++)
        out_w[n] /= sum;
 }
 // Fill the given array with weights for the range [0.0, 1.0]. The array is
 // interpreted as rectangular array of count * filter->size items.
 void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array)
 {
    for (int n = 0; n < count; n++) {
        mp_compute_weights(filter, n / (double)(count - 1),
                           out_array + filter->size * n);
    }
 }
 typedef struct filter_kernel kernel;
 static double bilinear(kernel *k, double x)
 {
    return 1.0 - x;
 }
 static double hanning(kernel *k, double x)
 {
    return 0.5 + 0.5 * cos(M_PI * x);
 }
 static double hamming(kernel *k, double x)
 {
    return 0.54 + 0.46 * cos(M_PI * x);
 }
 static double hermite(kernel *k, double x)
 {
    return (2.0 * x - 3.0) * x * x + 1.0;
 }
 static double quadric(kernel *k, double x)
 {
    // NOTE: glumpy uses 0.75, AGG uses 0.5
    if (x < 0.5)
        return 0.75 - x * x;
    if (x < 1.5)
        return 0.5 * (x - 1.5) * (x - 1.5);
    return 0;
 }
 static double bc_pow3(double x)
 {
    return (x <= 0) ? 0 : x * x * x;
 }
 static double bicubic(kernel *k, double x)
 {
    return (1.0/6.0) * (      bc_pow3(x + 2)
                        - 4 * bc_pow3(x + 1)
                        + 6 * bc_pow3(x)
                        - 4 * bc_pow3(x - 1));
 }
 static double bessel_i0(double epsilon, double x)
 {
    double sum = 1;
    double y = x * x / 4;
    double t = y;
    for (int i = 2; t > epsilon; i++) {
        sum += t;
        t *= y / (i * i);
    }
    return sum;
 }
 static double kaiser(kernel *k, double x)
 {
    double a = k->params[0];
    double b = k->params[1];
    double epsilon = 1e-12;
    double i0a = 1 / bessel_i0(epsilon, b);
    return bessel_i0(epsilon, a * sqrt(1 - x * x)) * i0a;
 }
 static double catmull_rom(kernel *k, double x)
 {
    if (x < 1.0)
        return 0.5 * (2.0 + x * x * (-5.0 + x * 3.0));
    if (x < 2.0)
        return 0.5 * (4.0 + x * (-8.0 + x * (5.0 - x)));
    return 0;
 }
 // Mitchell-Netravali
 static double mitchell(kernel *k, double x)
 {
    double b = k->params[0];
    double c = k->params[1];
    double
        p0 = (6.0 - 2.0 * b) / 6.0,
        p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0,
        p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0,
        q0 = (8.0 * b + 24.0 * c) / 6.0,
        q1 = (-12.0 * b - 48.0 * c) / 6.0,
        q2 = (6.0 * b + 30.0 * c) / 6.0,
        q3 = (-b - 6.0 * c) / 6.0;
    if (x < 1.0)
        return p0 + x * x * (p2 + x * p3);
    if (x < 2.0)
        return q0 + x * (q1 + x * (q2 + x * q3));
    return 0;
 }
 static double spline16(kernel *k, double x)
 {
    if (x < 1.0)
        return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0;
    return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1);
 }
 static double spline36(kernel *k, double x)
 {
    if(x < 1.0)
        return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0;
    if(x < 2.0)
        return ((-6.0/11.0 * (x - 1) + 270.0/209.0) * (x - 1) - 156.0/209.0)
               * (x - 1);
    return ((1.0/11.0 * (x - 2) - 45.0/209.0) * (x - 2) +  26.0/209.0)
           * (x - 2);
 }
 static double gaussian(kernel *k, double x)
 {
    return exp(-2.0 * x * x) * sqrt(2.0 / M_PI);
 }
 static double sinc(kernel *k, double x)
 {
    if (x == 0.0)
        return 1.0;
    double pix = M_PI * x;
    return sin(pix) / pix;
 }
 static double lanczos(kernel *k, double x)
 {
    double radius = k->size / 2;
    if (x < -radius || x > radius)
        return 0;
    if (x == 0)
        return 1;
    double pix = M_PI * x;
    return radius * sin(pix) * sin(pix / radius) / (pix * pix);
 }
 static double blackman(kernel *k, double x)
 {
    double radius = k->size / 2;
    if (x == 0.0)
        return 1.0;
    if (x > radius)
        return 0.0;
    x *= M_PI;
    double xr = x / radius;
    return (sin(x) / x) * (0.42 + 0.5 * cos(xr) + 0.08 * cos(2 * xr));
 }
 const struct filter_kernel mp_filter_kernels[] = {
    {"bilinear_slow",  1,   bilinear},
    {"hanning",        1,   hanning},
    {"hamming",        1,   hamming},
    {"hermite",        1,   hermite},
    {"quadric",        1.5, quadric},
    {"bicubic",        2,   bicubic},
    {"kaiser",         1,   kaiser, .params = {6.33, 6.33} },
    {"catmull_rom",    2,   catmull_rom},
    {"mitchell",       2,   mitchell, .params = {1.0/3.0, 1.0/3.0} },
    {"spline16",       2,   spline16},
    {"spline36",       3,   spline36},
    {"gaussian",       2,   gaussian},
    {"sinc2",          2,   sinc},
    {"sinc3",          3,   sinc},
    {"sinc4",          4,   sinc},
    {"lanczos2",       2,   lanczos},
    {"lanczos3",       3,   lanczos},
    {"lanczos4",       4,   lanczos},
    {"blackman2",      2,   blackman},
    {"blackman3",      3,   blackman},
    {"blackman4",      4,   blackman},
    {0}
 };
--- a/libvo/filter_kernels.h
+++ b/libvo/filter_kernels.h
@ -0,0 +1,45 @@
 /*
 * This file is part of mplayer2.
 *
 * mplayer2 is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mplayer2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with mplayer2; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 #ifndef MPLAYER_FILTER_KERNELS_H
 #define MPLAYER_FILTER_KERNELS_H
 #include <stdbool.h>
 struct filter_kernel {
    const char *name;
    double radius;
    double (*weight)(struct filter_kernel *kernel, double x);
    // The filter params can be changed at runtime. Only used by some filters.
    float params[2];
    // The following values are set by mp_init_filter() at runtime.
    // Number of coefficients; equals the rounded up radius multiplied with 2.
    int size;
    double inv_scale;
 };
 extern const struct filter_kernel mp_filter_kernels[];
 const struct filter_kernel *mp_find_filter_kernel(const char *name);
 bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
                    double scale);
 void mp_compute_weights(struct filter_kernel *filter, double f, float *out_w);
 void mp_compute_lut(struct filter_kernel *filter, int count, float *out_array);
 #endif /* MPLAYER_FILTER_KERNELS_H */
--- a/libvo/gl_common.c
+++ b/libvo/gl_common.c
@ -2587,3 +2587,19 @@ void uninit_mpglcontext(MPGLContext *ctx)
    }
    talloc_free(ctx);
 }
 void mp_log_source(int mod, int lev, const char *src)
 {
    int line = 1;
    if (!src)
        return;
    while (*src) {
        const char *end = strchr(src, '\n');
        const char *next = end + 1;
        if (!end)
            next = end = src + strlen(src);
        mp_msg(mod, lev, "[%3d] %.*s\n", line, (int)(end - src), src);
        line++;
        src = next;
    }
 }
--- a/libvo/gl_common.h
+++ b/libvo/gl_common.h
@ -431,6 +431,10 @@ void uninit_mpglcontext(MPGLContext *ctx);
 int create_mpglcontext(struct MPGLContext *ctx, int gl_flags, int gl_version,
                       uint32_t d_width, uint32_t d_height, uint32_t flags);
 // print a multi line string with line numbers (e.g. for shader sources)
 // mod, lev: module and log level, as in mp_msg()
 void mp_log_source(int mod, int lev, const char *src);
 //function pointers loaded from the OpenGL library
 struct GL {
    void (GLAPIENTRY *Begin)(GLenum);
--- a/libvo/video_out.c
+++ b/libvo/video_out.c
@ -84,6 +84,7 @@ extern struct vo_driver video_out_vdpau;
 extern struct vo_driver video_out_xv;
 extern struct vo_driver video_out_gl_nosw;
 extern struct vo_driver video_out_gl;
 extern struct vo_driver video_out_gl3;
 extern struct vo_driver video_out_dga;
 extern struct vo_driver video_out_sdl;
 extern struct vo_driver video_out_3dfx;
@ -169,6 +170,9 @@ const struct vo_driver *video_out_drivers[] =
 #ifdef CONFIG_XV
        &video_out_xv,
 #endif
 #ifdef CONFIG_GL
        &video_out_gl3,
 #endif
 #ifdef CONFIG_X11
 #ifdef CONFIG_GL
        &video_out_gl_nosw,
--- a/libvo/vo_gl3.c
+++ b/libvo/vo_gl3.c
--- a/libvo/vo_gl3_shaders.glsl
+++ b/libvo/vo_gl3_shaders.glsl
@ -0,0 +1,316 @@
 /*
 * This file is part of mplayer2.
 *
 * mplayer2 is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mplayer2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with mplayer2; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 // Note that this file is not directly passed as shader, but run through some
 // text processing functions, and in fact contains multiple vertex and fragment
 // shaders.
 // inserted at the beginning of all shaders
 #!section prelude
 #!section vertex_all
 uniform mat3 transform;
 uniform sampler3D lut_3d;
 in vec2 vertex_position;
 in vec4 vertex_color;
 out vec4 color;
 in vec2 vertex_texcoord;
 out vec2 texcoord;
 void main() {
    vec3 position = vec3(vertex_position, 1);
 #ifndef FIXED_SCALE
    position = transform * position;
 #endif
    gl_Position = vec4(position, 1);
    color = vertex_color;
 #ifdef USE_3DLUT
    color = vec4(texture(lut_3d, color.rgb).rgb, color.a);
 #endif
    texcoord = vertex_texcoord;
 }
 #!section frag_eosd
 uniform sampler2D texture1;
 in vec2 texcoord;
 in vec4 color;
 out vec4 out_color;
 void main() {
    out_color = vec4(color.rgb, color.a * texture(texture1, texcoord).r);
 }
 #!section frag_osd
 uniform sampler2D texture1;
 in vec2 texcoord;
 in vec4 color;
 out vec4 out_color;
 void main() {
    out_color = texture(texture1, texcoord).rrrg * color;
 }
 #!section frag_video
 uniform sampler2D texture1;
 uniform sampler2D texture2;
 uniform sampler2D texture3;
 uniform sampler1D lut_c_1d;
 uniform sampler1D lut_l_1d;
 uniform sampler2D lut_c_2d;
 uniform sampler2D lut_l_2d;
 uniform sampler3D lut_3d;
 uniform sampler2D dither;
 uniform mat4x3 colormatrix;
 uniform vec3 inv_gamma;
 uniform float conv_gamma;
 uniform float dither_quantization;
 uniform float dither_multiply;
 uniform float filter_param1;
 in vec2 texcoord;
 out vec4 out_color;
 vec4 sample_bilinear(sampler2D tex, vec2 texcoord) {
    return texture(tex, texcoord);
 }
 // Explanation how bicubic scaling with only 4 texel fetches is done:
 //   http://www.mate.tue.nl/mate/pdfs/10318.pdf
 //   'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
 // Explanation why this algorithm normally always blurs, even with unit scaling:
 //   http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
 //   'GPU Prefilter for Accurate Cubic B-spline Interpolation'
 vec4 calcweights(float s) {
    vec4 t = vec4(-0.5, 0.1666, 0.3333, -0.3333) * s + vec4(1, 0, -0.5, 0.5);
    t = t * s + vec4(0, 0, -0.5, 0.5);
    t = t * s + vec4(-0.6666, 0, 0.8333, 0.1666);
    vec2 a = vec2(1 / t.z, 1 / t.w);
    t.xy = t.xy * a + vec2(1, 1);
    t.x = t.x + s;
    t.y = t.y - s;
    return t;
 }
 vec4 sample_bicubic_fast(sampler2D tex, vec2 texcoord) {
    vec2 texsize = textureSize(tex, 0);
    vec2 pt = 1 / texsize;
    vec2 fcoord = fract(texcoord * texsize + vec2(0.5, 0.5));
    vec4 parmx = calcweights(fcoord.x);
    vec4 parmy = calcweights(fcoord.y);
    vec4 cdelta;
    cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);
    cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);
    // first y-interpolation
    vec4 ar = texture(tex, texcoord + cdelta.xy);
    vec4 ag = texture(tex, texcoord + cdelta.xw);
    vec4 ab = mix(ag, ar, parmy.b);
    // second y-interpolation
    vec4 br = texture(tex, texcoord + cdelta.zy);
    vec4 bg = texture(tex, texcoord + cdelta.zw);
    vec4 aa = mix(bg, br, parmy.b);
    // x-interpolation
    return mix(aa, ab, parmx.b);
 }
 float[2] weights2(sampler1D lookup, float f) {
    vec4 c = texture(lookup, f);
    return float[2](c.r, c.g);
 }
 float[4] weights4(sampler1D lookup, float f) {
    vec4 c = texture(lookup, f);
    return float[4](c.r, c.g, c.b, c.a);
 }
 float[6] weights6(sampler2D lookup, float f) {
    vec4 c1 = texture(lookup, vec2(0.25, f));
    vec4 c2 = texture(lookup, vec2(0.75, f));
    return float[6](c1.r, c1.g, c1.b, c2.r, c2.g, c2.b);
 }
 float[8] weights8(sampler2D lookup, float f) {
    vec4 c1 = texture(lookup, vec2(0.25, f));
    vec4 c2 = texture(lookup, vec2(0.75, f));
    return float[8](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a);
 }
 float[12] weights12(sampler2D lookup, float f) {
    vec4 c1 = texture(lookup, vec2(1.0/6.0, f));
    vec4 c2 = texture(lookup, vec2(0.5, f));
    vec4 c3 = texture(lookup, vec2(5.0/6.0, f));
    return float[12](c1.r, c1.g, c1.b, c1.a,
                     c2.r, c2.g, c2.b, c2.a,
                     c3.r, c3.g, c3.b, c3.a);
 }
 float[16] weights16(sampler2D lookup, float f) {
    vec4 c1 = texture(lookup, vec2(0.125, f));
    vec4 c2 = texture(lookup, vec2(0.375, f));
    vec4 c3 = texture(lookup, vec2(0.625, f));
    vec4 c4 = texture(lookup, vec2(0.875, f));
    return float[16](c1.r, c1.g, c1.b, c1.a, c2.r, c2.g, c2.b, c2.a,
                     c3.r, c3.g, c3.b, c3.a, c4.r, c4.g, c4.b, c4.a);
 }
 #define CONVOLUTION_SEP_N(NAME, N)                                           \
    vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float weights[N]) {     \
        vec4 res = vec4(0);                                                  \
        for (int n = 0; n < N; n++) {                                        \
            res += weights[n] * texture(tex, texcoord + pt * n);             \
        }                                                                    \
        return res;                                                          \
    }
 CONVOLUTION_SEP_N(convolution_sep2, 2)
 CONVOLUTION_SEP_N(convolution_sep4, 4)
 CONVOLUTION_SEP_N(convolution_sep6, 6)
 CONVOLUTION_SEP_N(convolution_sep8, 8)
 CONVOLUTION_SEP_N(convolution_sep12, 12)
 CONVOLUTION_SEP_N(convolution_sep16, 16)
 // The dir parameter is (0, 1) or (1, 0), and we expect the shader compiler to
 // remove all the redundant multiplications and additions.
 #define SAMPLE_CONVOLUTION_SEP_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC)\
    vec4 NAME(vec2 dir, SAMPLERT lookup, sampler2D tex, vec2 texcoord) {    \
        vec2 texsize = textureSize(tex, 0);                                 \
        vec2 pt = (1 / texsize) * dir;                                      \
        float fcoord = dot(fract(texcoord * texsize - 0.5), dir);           \
        vec2 base = texcoord - fcoord * pt;                                 \
        return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt,                  \
                         WEIGHTS_FUNC(lookup, fcoord));                     \
    }
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep2, 2, sampler1D, convolution_sep2, weights2)
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep4, 4, sampler1D, convolution_sep4, weights4)
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep6, 6, sampler2D, convolution_sep6, weights6)
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep8, 8, sampler2D, convolution_sep8, weights8)
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep12, 12, sampler2D, convolution_sep12, weights12)
 SAMPLE_CONVOLUTION_SEP_N(sample_convolution_sep16, 16, sampler2D, convolution_sep16, weights16)
 #define CONVOLUTION_N(NAME, N)                                               \
    vec4 NAME(sampler2D tex, vec2 texcoord, vec2 pt, float taps_x[N],        \
              float taps_y[N]) {                                             \
        vec4 res = vec4(0);                                                  \
        for (int y = 0; y < N; y++) {                                        \
            vec4 line = vec4(0);                                             \
            for (int x = 0; x < N; x++)                                      \
                line += taps_x[x] * texture(tex, texcoord + pt * vec2(x, y));\
            res += taps_y[y] * line;                                         \
        }                                                                    \
        return res;                                                          \
    }
 CONVOLUTION_N(convolution2, 2)
 CONVOLUTION_N(convolution4, 4)
 CONVOLUTION_N(convolution6, 6)
 CONVOLUTION_N(convolution8, 8)
 CONVOLUTION_N(convolution12, 12)
 CONVOLUTION_N(convolution16, 16)
 #define SAMPLE_CONVOLUTION_N(NAME, N, SAMPLERT, CONV_FUNC, WEIGHTS_FUNC)    \
    vec4 NAME(SAMPLERT lookup, sampler2D tex, vec2 texcoord) {              \
        vec2 texsize = textureSize(tex, 0);                                 \
        vec2 pt = 1 / texsize;                                              \
        vec2 fcoord = fract(texcoord * texsize - 0.5);                      \
        vec2 base = texcoord - fcoord * pt;                                 \
        return CONV_FUNC(tex, base - pt * (N / 2 - 1), pt,                  \
                         WEIGHTS_FUNC(lookup, fcoord.x),                    \
                         WEIGHTS_FUNC(lookup, fcoord.y));                   \
    }
 SAMPLE_CONVOLUTION_N(sample_convolution2, 2, sampler1D, convolution2, weights2)
 SAMPLE_CONVOLUTION_N(sample_convolution4, 4, sampler1D, convolution4, weights4)
 SAMPLE_CONVOLUTION_N(sample_convolution6, 6, sampler2D, convolution6, weights6)
 SAMPLE_CONVOLUTION_N(sample_convolution8, 8, sampler2D, convolution8, weights8)
 SAMPLE_CONVOLUTION_N(sample_convolution12, 12, sampler2D, convolution12, weights12)
 SAMPLE_CONVOLUTION_N(sample_convolution16, 16, sampler2D, convolution16, weights16)
 // Unsharp masking
 vec4 sample_sharpen3(sampler2D tex, vec2 texcoord) {
    vec2 texsize = textureSize(tex, 0);
    vec2 pt = 1 / texsize;
    vec2 st = pt * 0.5;
    vec4 p = texture(tex, texcoord);
    vec4 sum = texture(tex, texcoord + st * vec2(+1, +1))
             + texture(tex, texcoord + st * vec2(+1, -1))
             + texture(tex, texcoord + st * vec2(-1, +1))
             + texture(tex, texcoord + st * vec2(-1, -1));
    return p + (p - 0.25 * sum) * filter_param1;
 }
 vec4 sample_sharpen5(sampler2D tex, vec2 texcoord) {
    vec2 texsize = textureSize(tex, 0);
    vec2 pt = 1 / texsize;
    vec2 st1 = pt * 1.2;
    vec4 p = texture(tex, texcoord);
    vec4 sum1 = texture(tex, texcoord + st1 * vec2(+1, +1))
              + texture(tex, texcoord + st1 * vec2(+1, -1))
              + texture(tex, texcoord + st1 * vec2(-1, +1))
              + texture(tex, texcoord + st1 * vec2(-1, -1));
    vec2 st2 = pt * 1.5;
    vec4 sum2 = texture(tex, texcoord + st2 * vec2(+1,  0))
              + texture(tex, texcoord + st2 * vec2( 0, +1))
              + texture(tex, texcoord + st2 * vec2(-1,  0))
              + texture(tex, texcoord + st2 * vec2( 0, -1));
    vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;
    return p + t * filter_param1;
 }
 void main() {
 #ifdef USE_PLANAR
    vec3 color = vec3(SAMPLE_L(texture1, texcoord).r,
                      SAMPLE_C(texture2, texcoord).r,
                      SAMPLE_C(texture3, texcoord).r);
 #else
    vec3 color = SAMPLE_L(texture1, texcoord).rgb;
 #endif
 #ifdef USE_GBRP
    color.gbr = color;
 #endif
 #ifdef USE_YGRAY
    // NOTE: actually slightly wrong for 16 bit input video, and completely
    //       wrong for 9/10 bit input
    color.gb = vec2(128.0/255.0);
 #endif
 #ifdef USE_COLORMATRIX
    color = mat3(colormatrix) * color + colormatrix[3];
 #endif
 #ifdef USE_LINEAR_CONV
    color = pow(color, vec3(2.2));
 #endif
 #ifdef USE_LINEAR_CONV_INV
    // Convert from linear RGB to gamma RGB before putting it through the 3D-LUT
    // in the final stage.
    color = pow(color, vec3(1.0/2.2));
 #endif
 #ifdef USE_GAMMA_POW
    color = pow(color, inv_gamma);
 #endif
 #ifdef USE_3DLUT
    color = texture(lut_3d, color).rgb;
 #endif
 #ifdef USE_DITHER
    float dither = texture(dither, gl_FragCoord.xy / textureSize(dither, 0)).r;
    color = floor(color * dither_multiply + dither ) / dither_quantization;
 #endif
    out_color = vec4(color, 1);
 }