0
0
mirror of https://github.com/mpv-player/mpv.git synced 2024-09-20 03:52:22 +02:00

filter_sdh: add full width parentheses to the enclosures string

Since these are technically parentheses, we'll treat them the same way
as normal parenthesis. Fixes #11155.
This commit is contained in:
Dudemanguy 2023-11-05 15:31:47 -06:00
parent ce958b7742
commit 443c2487d7
3 changed files with 12 additions and 9 deletions

View File

@ -2901,8 +2901,9 @@ Subtitles
It removes speaker labels (like MAN:) and any text enclosed within symbols like
parentheses or brackets as specified by the ``--sub-filter-sdh-enclosures`` option.
Note that parenthesis are a special case and only upper case text is removed. For
more filtering, you can use the ``--sub-filter-sdh-harder`` option.
Note that parenthesis (full width parenthesis and the normal variant) are a special
case and only upper case text is removed. For more filtering, you can use the
``--sub-filter-sdh-harder`` option.
Default: ``no``.
@ -2920,7 +2921,7 @@ Subtitles
will be mapped internally to their matching right hand character, so you only need
to specify left hand characters.
Default: ``([``.
Default: ``([``.
``--sub-filter-regex-...=...``
Set a list of regular expressions to match on text subtitles, and remove any

View File

@ -272,7 +272,7 @@ const struct m_sub_options mp_sub_filter_opts = {
},
.size = sizeof(OPT_BASE_STRUCT),
.defaults = &(OPT_BASE_STRUCT){
.sub_filter_SDH_enclosures = "([",
.sub_filter_SDH_enclosures = "([\uFF08",
.rf_enable = true,
},
.change_flags = UPDATE_SUB_FILT,

View File

@ -34,8 +34,9 @@
// It is for filtering ASS encoded subtitles
static const char *const enclosure_pair[][2] = {
{"(", ")"},
{"[", "]"},
{"(", ")"},
{"[", "]"},
{"\uFF08", "\uFF09"},
{0},
};
@ -249,15 +250,16 @@ static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
bool filter_harder = sd->opts->sub_filter_SDH_harder;
char *rp = *rpp;
int old_pos = buf->pos;
bool parenthesis = strcmp(left, "(") == 0 || strcmp(left, "\uFF08") == 0;
// skip past the left character
rp += get_char_bytes(rp);
// skip past valid data searching for the right character
bool only_digits = strcmp(left, "(") == 0;
bool only_digits = parenthesis;
while (*rp && rp[0] != right[0]) {
if (rp[0] == '{') {
copy_ass(sd, &rp, buf);
} else if (strcmp(left, "(") == 0 && ((mp_isalpha(rp[0]) &&
} else if (parenthesis && ((mp_isalpha(rp[0]) &&
(filter_harder || mp_isupper(rp[0]) || rp[0] == 'l')) ||
mp_isdigit(rp[0]) ||
rp[0] == ' ' || rp[0] == '\'' || rp[0] == '#' ||
@ -266,7 +268,7 @@ static bool skip_enclosed(struct sd_filter *sd, char **rpp, struct buffer *buf,
if (!mp_isdigit(rp[0]))
only_digits = false;
rp++;
} else if (strcmp(left, "(") == 0) {
} else if (parenthesis) {
buf->pos = old_pos;
return false;
} else {