diff --git a/misc/language.c b/misc/language.c index 0d9d0689da..028437f1b6 100644 --- a/misc/language.c +++ b/misc/language.c @@ -21,7 +21,7 @@ #include #include "common/common.h" -#include "misc/bstr.h" +#include "misc/ctype.h" #define L(s) { #s, sizeof(#s) - 1 } @@ -296,3 +296,56 @@ done: talloc_free(ta_ctx); return best_score; } + +bstr mp_guess_lang_from_filename(bstr name, int *lang_start) +{ + name = bstr_strip(bstr_strip_ext(name)); + + if (name.len < 2) + return (bstr){0}; + + int lang_length = 0; + int i = name.len - 1; + int suffixes_length = 0; + + char delimiter = '.'; + if (name.start[i] == ')') { + delimiter = '('; + i--; + } + if (name.start[i] == ']') { + delimiter = '['; + i--; + } + + while (true) { + while (i >= 0 && mp_isalpha(name.start[i])) { + lang_length++; + i--; + } + + // According to + // https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags + // subtags after the first are composed of 1 to 8 letters. + if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8) + return (bstr){0}; + + if (i >= 0 && name.start[i] == '-') { + lang_length++; + i--; + suffixes_length = lang_length; + } else { + break; + } + } + + // The primary subtag can have 2 or 3 letters. + if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 || + i <= 0 || name.start[i] != delimiter) + return (bstr){0}; + + if (lang_start) + *lang_start = i; + + return (bstr){name.start + i + 1, lang_length}; +} diff --git a/misc/language.h b/misc/language.h index d765e6614a..ed57e75d4d 100644 --- a/misc/language.h +++ b/misc/language.h @@ -20,8 +20,11 @@ #ifndef MP_LANGUAGE_H #define MP_LANGUAGE_H +#include "misc/bstr.h" + // Result numerically higher => better match. 0 == no match. int mp_match_lang(char **langs, const char *lang); char **mp_get_user_langs(void); +bstr mp_guess_lang_from_filename(bstr name, int *lang_start); #endif /* MP_LANGUAGE_H */ diff --git a/player/command.c b/player/command.c index 4c7ca24db9..d16b510ed4 100644 --- a/player/command.c +++ b/player/command.c @@ -67,6 +67,7 @@ #include "options/path.h" #include "screenshot.h" #include "misc/dispatch.h" +#include "misc/language.h" #include "misc/node.h" #include "misc/thread_pool.h" #include "misc/thread_tools.h" @@ -6005,7 +6006,7 @@ static void cmd_track_reload(void *p) struct track *nt = mpctx->tracks[nt_num]; if (!nt->lang) - nt->lang = mp_guess_lang_from_filename(nt, nt->external_filename); + nt->lang = bstrto0(nt, mp_guess_lang_from_filename(bstr0(nt->external_filename), NULL)); mp_switch_track(mpctx, nt->type, nt, 0); print_track_list(mpctx, "Reloaded:"); diff --git a/player/external_files.c b/player/external_files.c index fd19d4fea9..9e14aba01b 100644 --- a/player/external_files.c +++ b/player/external_files.c @@ -24,8 +24,8 @@ #include "common/common.h" #include "common/global.h" #include "common/msg.h" -#include "misc/ctype.h" #include "misc/charset_conv.h" +#include "misc/language.h" #include "options/options.h" #include "options/path.h" #include "external_files.h" @@ -108,63 +108,6 @@ static int compare_sub_priority(const void *a, const void *b) return strcoll(s1->fname, s2->fname); } -static struct bstr guess_lang_from_filename(struct bstr name, int *lang_start) -{ - if (name.len < 2) - return (struct bstr){NULL, 0}; - - int lang_length = 0; - int i = name.len - 1; - int suffixes_length = 0; - - char delimiter = '.'; - if (name.start[i] == ')') { - delimiter = '('; - i--; - } - if (name.start[i] == ']') { - delimiter = '['; - i--; - } - - while (true) { - while (i >= 0 && mp_isalpha(name.start[i])) { - lang_length++; - i--; - } - - // According to - // https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags - // subtags after the first are composed of 1 to 8 letters. - if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8) - return (struct bstr){0}; - - if (i >= 0 && name.start[i] == '-') { - lang_length++; - i--; - suffixes_length = lang_length; - } else { - break; - } - } - - // The primary subtag can have 2 or 3 letters. - if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 || - i == 0 || name.start[i] != delimiter) - return (struct bstr){0}; - - *lang_start = i; - return (struct bstr){name.start + i + 1, lang_length}; -} - -char *mp_guess_lang_from_filename(void* ctx, const char *filename) -{ - bstr filename_no_ext = bstr_strip_ext(bstr0(filename)); - int start = 0; // only used in append_dir_subtitles() - char *lang = bstrto0(ctx, guess_lang_from_filename(filename_no_ext, &start)); - return lang; -} - static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts, struct subfn **slist, int *nsub, struct bstr path, const char *fname, @@ -235,7 +178,7 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts, bstr lang = {0}; int start = 0; - lang = guess_lang_from_filename(tmp_fname_trim, &start); + lang = mp_guess_lang_from_filename(dename, &start); if (bstr_case_startswith(tmp_fname_trim, f_fname_trim)) { if (lang.len && start == f_fname_trim.len) prio |= 16; // exact movie name + followed by lang diff --git a/player/external_files.h b/player/external_files.h index 5d42c551ae..20b37c3705 100644 --- a/player/external_files.h +++ b/player/external_files.h @@ -34,6 +34,5 @@ struct subfn *find_external_files(struct mpv_global *global, const char *fname, bool mp_might_be_subtitle_file(const char *filename); void mp_update_subtitle_exts(struct MPOpts *opts); -char *mp_guess_lang_from_filename(void *talloc_ctx, const char *filename); #endif /* MPLAYER_FINDFILES_H */