diff --git a/DOCS/man/vf.rst b/DOCS/man/vf.rst index 5c576e6bad..6465d305ff 100644 --- a/DOCS/man/vf.rst +++ b/DOCS/man/vf.rst @@ -546,3 +546,74 @@ Available mpv-only filters are: which algorithm is actually selected. ``none`` always falls back. On most if not all hardware, this option will probably do nothing, because a video processor usually supports all modes or none. + +``fingerprint=...`` + Compute video frame fingerprints and provide them as metadata. Actually, it + currently barely deserved to be called ``fingerprint``, because it does not + compute "proper" fingerprints, only tiny downscaled images (but which can be + used to compute image hashes or for similarity matching). + + The main purpose of this filter is to support the ``skip-logo.lua`` script. + If this script is dropped, or mpv ever gains a way to load user-defined + filters (other than VapourSynth), this filter will be removed. Due to the + "special" nature of this filter, it will be removed without warning. + + The intended way to read from the filter is using ``vf-metadata`` (also + see ``clear-on-query`` filter parameter). The property will return a list + of key/value pairs as follows: + + :: + + fp0.pts = 1.2345 + fp0.hex = 1234abcdef...bcde + fp1.pts = 1.4567 + fp1.hex = abcdef1234...6789 + ... + fpN.pts = ... + fpN.hex = ... + type = gray-hex-16x16 + + Each ``fp`` entry is for a frame. The ``pts`` entry specifies the + timestamp of the frame (within the filter chain; in simple cases this is + the same as the display timestamp). The ``hex`` field is the hex encoded + fingerprint, whose size and meaning depend on the ``type`` filter option. + The ``type`` field has the same value as the option the filter was created + with. + + This returns the frames that were filtered since the last query of the + property. If ``clear-on-query=no`` was set, a query doesn't reset the list + of frames. In both cases, a maximum of 10 frames is returned. If there are + more frames, the oldest frames are discarded. Frames are returned in filter + order. + + (This doesn't return a structured list for the per-frame details because the + internals of the ``vf-metadata`` mechanism suck. The returned format may + change in the future.) + + This filter uses zimg for speed and profit. However, it will fallback to + libswscale in a number of situations: lesser pixel formats, unaligned data + pointers or strides, or if zimg fails to initialize for unknown reasons. In + these cases, the filter will use more CPU. Also, it will output different + fingerprints, because libswscale cannot perform the full range expansion we + normally request from zimg. As a consequence, the filter may be slower and + not work correctly in random situations. + + ``type=...`` + What fingerprint to compute. Available types are: + + :gray-hex-8x8: grayscale, 8 bit, 8x8 size + :gray-hex-16x16: grayscale, 8 bit, 16x16 size (default) + + Both types simply remove all colors, downscale the image, concatenate + all pixel values to a byte array, and convert the array to a hex string. + + ``clear-on-query=yes|no`` + Clear the list of frame fingerprints if the ``vf-metadata`` property for + this filter is queried (default: yes). This requires some care by the + user. Some types of accesses might query the filter multiple times, + which leads to lost frames. + + ``print=yes|no`` + Print computed fingerprints the the terminal (default: no). This is + mostly for testing and such. Scripts should use ``vf-metadata`` to + read information from this filter instead. diff --git a/TOOLS/lua/skip-logo.lua b/TOOLS/lua/skip-logo.lua new file mode 100644 index 0000000000..2c624a04e8 --- /dev/null +++ b/TOOLS/lua/skip-logo.lua @@ -0,0 +1,245 @@ +--[[ + +Automatically skip in files if video frames with pre-supplied fingerprints are +detected. This will skip ahead by a pre-configured amount of time if a matching +video frame is detected. + +This requires the vf_fingerprint video filter to be compiled in. Read the +documentation of this filter for caveats (which will automatically apply to +this script as well), such as no support for zero-copy hardware decoding. + +You need to manually gather and provide fingerprints for video frames and add +them to a configuration file in script-opts/skip-logo.conf (the "script-opts" +directory must be in the mpv configuration directory, typically ~/.config/mpv/). + +Example script-opts/skip-logo.conf: + + + cases = { + { + -- Skip ahead 10 seconds if a black frame was detected + -- Note: this is dangerous non-sense. It's just for demonstration. + name = "black frame", -- print if matched + skip = 10, -- number of seconds to skip forward + score = 0.3, -- required score + fingerprint = "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + }, + { + -- Skip ahead 20 seconds if a white frame was detected + -- Note: this is dangerous non-sense. It's just for demonstration. + name = "fun2", + skip = 20, + fingerprint = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + }, + } + +This is actually a lua file. Lua was chosen because it seemed less of a pain to +parse. Future versions of this script may change the format. + +The fingerprint is a video frame, converted to "gray" (8 bit per pixels), full +range, each pixel concatenated into an array, converted to a hex string. You +can produce these fingerprints by running this manually: + + mpv --vf=fingerprint:print yourfile.mkv + +This will log the fingerprint of each video frame to the console, along with its +timestamp. You find the fingerprint of a unique-enough looking frame, and add +it as entry to skip-logo.conf. + +You can provide a score for "fuzziness". If no score is provided, a default +value of 0.3 is used. The score is inverse: 0 means exactly the same, while a +higher score means a higher difference. Currently, the score is computed as +euclidean distance between the video frame and the pre-provided fingerprint, +thus the highest score is 16. You probably want a score lower than 1 at least. +(This algorithm is very primitive, but also simple and fast to compute.) + +There's always the danger of false positives, which might be quite annoying. +It's up to you what you hate more, the logo, or random skips if false positives +are detected. Also, it's always active, and might eat too much CPU with files +that have a high resolution or framerate. To temporarily disable the script, +having a keybind like this in your input.conf will be helpful: + + ctrl+k vf toggle @skip-logo + +This will disable/enable the fingerprint filter, which the script automatically +adds at start. + +Another important caveat is that the script currently disables matching during +seeking or playback initialization, which means it cannot match the first few +frames of a video. This could be fixed, but the author was too lazy to do so. + +--]] + +local utils = require "mp.utils" +local msg = require "mp.msg" + +local label = "skip-logo" +local meta_property = string.format("vf-metadata/%s", label) + +local config = {} +local cases = {} +local cur_bmp + +-- Convert a hex string to an array. Convert each byte to a [0,1] float by +-- interpreting it as normalized uint8_t. +-- The data parameter, if not nil, may be used as storage (avoiding garbage). +local function hex_to_norm8(hex, data) + local size = math.floor(#hex / 2) + if #hex ~= size * 2 then + return nil + end + local res + if (data ~= nil) and (#data == size) then + res = data + else + res = {} + end + for i = 1, size do + local num = tonumber(hex:sub(i * 2, i * 2 + 1), 16) + if num == nil then + return nil + end + res[i] = num / 255.0 + end + return res +end + +local function compare_bmp(a, b) + if #a ~= #b then + return nil -- can't compare + end + local sum = 0 + for i = 1, #a do + local diff = a[i] - b[i] + sum = sum + diff * diff + end + return math.sqrt(sum) +end + +local function load_config() + local conf_file = mp.find_config_file("script-opts/skip-logo.conf") + local conf_fn + local err = nil + if conf_file then + if setfenv then + conf_fn, err = loadfile(conf_file) + if conf_fn then + setfenv(conf_fn, config) + end + else + msg.warn("Lua 5.2 was not tested, this might go wrong.") + conf_fn, err = loadfile(conf_file, "t", config) + end + else + err = "config file not found" + end + + if conf_fn and (not err) then + local ok, err2 = pcall(conf_fn) + err = err2 + end + + if err then + msg.error("Failed to load config file:", err) + end + + if config.cases then + for n, case in ipairs(config.cases) do + local err = nil + case.bitmap = hex_to_norm8(case.fingerprint) + if case.bitmap == nil then + err = "invalid or missing fingerprint field" + end + if case.score == nil then + case.score = 0.3 + end + if type(case.score) ~= "number" then + err = "score field is not a number" + end + if type(case.skip) ~= "number" then + err = "skip field is not a number or missing" + end + if case.name == nil then + case.name = ("Entry %d"):format(n) + end + if err == nil then + cases[#cases + 1] = case + else + msg.error(("Entry %s: %s, ignoring."):format(case.name, err)) + end + end + end +end + +load_config() + +-- Returns true on match and if something was done. +local function check_fingerprint(hex, pts) + local bmp = hex_to_norm8(hex, cur_bmp) + cur_bmp = bmp + + -- If parsing the filter's result failed (well, it shouldn't). + assert(bmp ~= nil, "filter returned nonsense") + + for _, case in ipairs(cases) do + local score = compare_bmp(case.bitmap, bmp) + if (score ~= nil) and (score <= case.score) then + msg.warn(("Matching %s: score=%f (required: %f) at %s, skipping %f seconds"): + format(case.name, score, case.score, mp.format_time(pts), case.skip)) + mp.commandv("seek", pts + case.skip, "absolute+exact") + return true + end + end + + return false +end + +mp.observe_property(meta_property, "none", function() + local result = mp.get_property_native(meta_property) + if result == nil then + return + end + + -- Disable matching while seeking. This is not always ideal. For example, + -- the filter chain may filter frames ahead of where it will resume + -- playback (if something prefetches frames). On the other hand, the + -- skipping logic shouldn't activate when the user is trying to seek past + -- the skip frame anyway. You could be more fancy and here, and store all + -- seen frames, then apply the skipping when it's actually displayed (by + -- observing the playback time). But for now, the naive and not-always- + -- correct way seems to suffice. + if mp.get_property_bool("seeking", false) then + return + end + + -- Try to get all entries. Out of laziness, assume that there are at most + -- 100 entries. (In fact, vf_fingerprint limits it to 10.) + for i = 0, 99 do + local prefix = string.format("fp%d.", i) + local hex = result[prefix .. "hex"] + + local pts = tonumber(result[prefix .. "pts"]) + if (hex == nil) or (pts == nil) then + break + end + + if check_fingerprint(hex, pts) then + break + end + end +end) + +local filters = mp.get_property_native("option-info/vf/choices", {}) +local found = false +for _, f in ipairs(filters) do + if f == "fingerprint" then + found = true + break + end +end + +if found then + mp.command(("no-osd vf add @%s:fingerprint"):format(label, filter)) +else + msg.warn("vf_fingerprint not found") +end diff --git a/filters/user_filters.c b/filters/user_filters.c index e1b7a8bce1..1a4cf3b122 100644 --- a/filters/user_filters.c +++ b/filters/user_filters.c @@ -64,6 +64,9 @@ const struct mp_user_filter_entry *vf_list[] = { &vf_lavfi, &vf_lavfi_bridge, &vf_sub, +#if HAVE_ZIMG + &vf_fingerprint, +#endif #if HAVE_VAPOURSYNTH &vf_vapoursynth, #endif diff --git a/filters/user_filters.h b/filters/user_filters.h index 88cb859a71..9bf40e248b 100644 --- a/filters/user_filters.h +++ b/filters/user_filters.h @@ -33,3 +33,4 @@ extern const struct mp_user_filter_entry vf_format; extern const struct mp_user_filter_entry vf_vdpaupp; extern const struct mp_user_filter_entry vf_vavpp; extern const struct mp_user_filter_entry vf_d3d11vpp; +extern const struct mp_user_filter_entry vf_fingerprint; diff --git a/video/filter/vf_fingerprint.c b/video/filter/vf_fingerprint.c new file mode 100644 index 0000000000..3fa2ed6770 --- /dev/null +++ b/video/filter/vf_fingerprint.c @@ -0,0 +1,293 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see . + */ + +#include + +#include "common/common.h" +#include "common/tags.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "options/m_option.h" +#include "video/img_format.h" +#include "video/sws_utils.h" + +#include "osdep/timer.h" + +#define ZIMG_ALIGN 32 + +#define PRINT_ENTRY_NUM 10 + +struct f_opts { + int type; + int clear; + int print; +}; + +const struct m_opt_choice_alternatives type_names[] = { + {"gray-hex-8x8", 8}, + {"gray-hex-16x16", 16}, + {0} +}; + +#define OPT_BASE_STRUCT struct f_opts +static const struct m_option f_opts_list[] = { + OPT_CHOICE_C("type", type, 0, type_names), + OPT_FLAG("clear-on-query", clear, 0), + OPT_FLAG("print", print, 0), + {0} +}; + +static const struct f_opts f_opts_def = { + .type = 16, + .clear = 1, +}; + +struct print_entry { + double pts; + char *print; +}; + +struct priv { + struct f_opts *opts; + struct mp_image *scaled; + struct mp_sws_context *sws; + struct print_entry entries[PRINT_ENTRY_NUM]; + int num_entries; + int last_imgfmt, last_w, last_h; + int gray_plane_imgfmt; + zimg_filter_graph *zimg_graph; + void *zimg_tmp; +}; + +// (Other code internal to this filter also calls this to reset the frame list.) +static void f_reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + + for (int n = 0; n < p->num_entries; n++) + talloc_free(p->entries[n].print); + p->num_entries = 0; +} + +static void reinit_fmt(struct mp_filter *f, struct mp_image *mpi) +{ + struct priv *p = f->priv; + + if (mpi->imgfmt == p->last_imgfmt && + mpi->w == p->last_w && + mpi->h == p->last_h) + return; + + p->last_imgfmt = mpi->imgfmt; + p->last_w = mpi->w; + p->last_h = mpi->h; + + free(p->zimg_tmp); + p->zimg_tmp = NULL; + zimg_filter_graph_free(p->zimg_graph); + p->zimg_graph = NULL; + + if (!(mpi->fmt.flags & (MP_IMGFLAG_YUV_NV | MP_IMGFLAG_YUV_P))) + return; + + zimg_image_format src_fmt, dst_fmt; + + // Note: we try to pass only the first plane. Formats which do not have + // such a luma plane are excluded above. + zimg_image_format_default(&src_fmt, ZIMG_API_VERSION); + src_fmt.width = mpi->w; + src_fmt.height = mpi->h; + src_fmt.color_family = ZIMG_COLOR_GREY; + src_fmt.pixel_type = ZIMG_PIXEL_BYTE; + src_fmt.depth = mpi->fmt.component_bits; + src_fmt.pixel_range = mpi->params.color.levels == MP_CSP_LEVELS_PC ? + ZIMG_RANGE_FULL : ZIMG_RANGE_LIMITED; + + zimg_image_format_default(&dst_fmt, ZIMG_API_VERSION); + dst_fmt.width = p->scaled->w; + dst_fmt.height = p->scaled->h; + dst_fmt.color_family = ZIMG_COLOR_GREY; + dst_fmt.pixel_type = ZIMG_PIXEL_BYTE; + dst_fmt.depth = 8; + dst_fmt.pixel_range = ZIMG_RANGE_FULL; + + zimg_graph_builder_params params; + zimg_graph_builder_params_default(¶ms, ZIMG_API_VERSION); + params.resample_filter = ZIMG_RESIZE_BILINEAR; + + p->zimg_graph = zimg_filter_graph_build(&src_fmt, &dst_fmt, ¶ms); + if (!p->zimg_graph) + return; + + size_t tmp_size; + if (!zimg_filter_graph_get_tmp_size(p->zimg_graph, &tmp_size)) { + if (posix_memalign(&p->zimg_tmp, ZIMG_ALIGN, tmp_size)) + p->zimg_tmp = NULL; + } + + if (!p->zimg_tmp) { + zimg_filter_graph_free(p->zimg_graph); + p->zimg_graph = NULL; + } +} + +static void f_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0])) + return; + + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + if (mp_frame_is_signaling(frame)) { + mp_pin_in_write(f->ppins[1], frame); + return; + } + + if (frame.type != MP_FRAME_VIDEO) + goto error; + + struct mp_image *mpi = frame.data; + + reinit_fmt(f, mpi); + + if (p->zimg_graph && + !((uintptr_t)mpi->planes[0] % ZIMG_ALIGN) && + !(mpi->stride[0] % ZIMG_ALIGN)) + { + zimg_image_buffer_const src_buf = {ZIMG_API_VERSION}; + src_buf.plane[0].data = mpi->planes[0]; + src_buf.plane[0].stride = mpi->stride[0]; + src_buf.plane[0].mask = ZIMG_BUFFER_MAX; + zimg_image_buffer dst_buf = {ZIMG_API_VERSION}; + dst_buf.plane[0].data = p->scaled->planes[0]; + dst_buf.plane[0].stride = p->scaled->stride[0]; + dst_buf.plane[0].mask = ZIMG_BUFFER_MAX; + // (The API promises to succeed if no user callbacks fail, so no need + // to check the return value.) + zimg_filter_graph_process(p->zimg_graph, &src_buf, &dst_buf, + p->zimg_tmp, NULL, NULL, NULL, NULL); + } else { + if (mp_sws_scale(p->sws, p->scaled, mpi) < 0) + goto error; + } + + if (p->num_entries >= PRINT_ENTRY_NUM) { + talloc_free(p->entries[0].print); + MP_TARRAY_REMOVE_AT(p->entries, p->num_entries, 0); + } + + int size = p->scaled->w; + + struct print_entry *e = &p->entries[p->num_entries++]; + e->pts = mpi->pts; + e->print = talloc_array(p, char, size * size * 2 + 1); + + for (int y = 0; y < size; y++) { + for (int x = 0; x < size; x++) { + char *offs = &e->print[(y * size + x) * 2]; + uint8_t v = p->scaled->planes[0][y * p->scaled->stride[0] + x]; + snprintf(offs, 3, "%02x", v); + } + } + + if (p->opts->print) + MP_INFO(f, "%f: %s\n", e->pts, e->print); + + mp_pin_in_write(f->ppins[1], frame); + return; + +error: + MP_ERR(f, "unsupported video format\n"); + mp_pin_in_write(f->ppins[1], frame); + mp_filter_internal_mark_failed(f); +} + +static bool f_command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct priv *p = f->priv; + + switch (cmd->type) { + case MP_FILTER_COMMAND_GET_META: { + struct mp_tags *t = talloc_zero(NULL, struct mp_tags); + + for (int n = 0; n < p->num_entries; n++) { + struct print_entry *e = &p->entries[n]; + + if (e->pts != MP_NOPTS_VALUE) { + mp_tags_set_str(t, mp_tprintf(80, "fp%d.pts", n), + mp_tprintf(80, "%f", e->pts)); + } + mp_tags_set_str(t, mp_tprintf(80, "fp%d.hex", n), e->print); + } + + mp_tags_set_str(t, "type", m_opt_choice_str(type_names, p->opts->type)); + + if (p->opts->clear) + f_reset(f); + + *(struct mp_tags **)cmd->res = t; + return true; + } + default: + return false; + } +} + +static const struct mp_filter_info filter = { + .name = "fingerprint", + .process = f_process, + .command = f_command, + .reset = f_reset, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *f_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + int size = p->opts->type; + p->scaled = mp_image_alloc(IMGFMT_Y8, size, size); + MP_HANDLE_OOM(p->scaled); + talloc_steal(p, p->scaled); + p->scaled->params.color.levels = MP_CSP_LEVELS_PC; + p->sws = mp_sws_alloc(p); + MP_HANDLE_OOM(p->sws); + return f; +} + +const struct mp_user_filter_entry vf_fingerprint = { + .desc = { + .description = "'Compute video frame fingerprints", + .name = "fingerprint", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &f_opts_def, + .options = f_opts_list, + }, + .create = f_create, +}; diff --git a/wscript b/wscript index 736844662a..bcd658e6e2 100644 --- a/wscript +++ b/wscript @@ -384,6 +384,10 @@ iconv support use --disable-iconv.", 'desc': 'librubberband support', 'deps': 'libaf', 'func': check_pkg_config('rubberband', '>= 1.8.0'), + }, { + 'name': '--zimg', + 'desc': 'libzimg support (for vf_fingerprint)', + 'func': check_pkg_config('zimg', '>= 2.9'), }, { 'name': '--lcms2', 'desc': 'LCMS2 support', diff --git a/wscript_build.py b/wscript_build.py index 56668f1a09..63cd7538ec 100644 --- a/wscript_build.py +++ b/wscript_build.py @@ -389,6 +389,7 @@ def build(ctx): ( "video/decode/vd_lavc.c" ), ( "video/filter/refqueue.c" ), ( "video/filter/vf_d3d11vpp.c", "d3d-hwaccel" ), + ( "video/filter/vf_fingerprint.c", "zimg" ), ( "video/filter/vf_format.c" ), ( "video/filter/vf_sub.c" ), ( "video/filter/vf_vapoursynth.c", "vapoursynth" ),