0
0
mirror of https://github.com/mpv-player/mpv.git synced 2024-09-19 19:42:24 +02:00

misc/language: move mp_guess_lang_from_filename() here

This commit is contained in:
Guido Cella 2024-05-09 21:37:53 +02:00 committed by Kacper Michajłow
parent dffc37dcfa
commit 20b8fe05bf
5 changed files with 61 additions and 62 deletions

View File

@ -21,7 +21,7 @@
#include <stdint.h>
#include "common/common.h"
#include "misc/bstr.h"
#include "misc/ctype.h"
#define L(s) { #s, sizeof(#s) - 1 }
@ -296,3 +296,56 @@ done:
talloc_free(ta_ctx);
return best_score;
}
bstr mp_guess_lang_from_filename(bstr name, int *lang_start)
{
name = bstr_strip(bstr_strip_ext(name));
if (name.len < 2)
return (bstr){0};
int lang_length = 0;
int i = name.len - 1;
int suffixes_length = 0;
char delimiter = '.';
if (name.start[i] == ')') {
delimiter = '(';
i--;
}
if (name.start[i] == ']') {
delimiter = '[';
i--;
}
while (true) {
while (i >= 0 && mp_isalpha(name.start[i])) {
lang_length++;
i--;
}
// According to
// https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags
// subtags after the first are composed of 1 to 8 letters.
if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8)
return (bstr){0};
if (i >= 0 && name.start[i] == '-') {
lang_length++;
i--;
suffixes_length = lang_length;
} else {
break;
}
}
// The primary subtag can have 2 or 3 letters.
if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 ||
i <= 0 || name.start[i] != delimiter)
return (bstr){0};
if (lang_start)
*lang_start = i;
return (bstr){name.start + i + 1, lang_length};
}

View File

@ -20,8 +20,11 @@
#ifndef MP_LANGUAGE_H
#define MP_LANGUAGE_H
#include "misc/bstr.h"
// Result numerically higher => better match. 0 == no match.
int mp_match_lang(char **langs, const char *lang);
char **mp_get_user_langs(void);
bstr mp_guess_lang_from_filename(bstr name, int *lang_start);
#endif /* MP_LANGUAGE_H */

View File

@ -67,6 +67,7 @@
#include "options/path.h"
#include "screenshot.h"
#include "misc/dispatch.h"
#include "misc/language.h"
#include "misc/node.h"
#include "misc/thread_pool.h"
#include "misc/thread_tools.h"
@ -6005,7 +6006,7 @@ static void cmd_track_reload(void *p)
struct track *nt = mpctx->tracks[nt_num];
if (!nt->lang)
nt->lang = mp_guess_lang_from_filename(nt, nt->external_filename);
nt->lang = bstrto0(nt, mp_guess_lang_from_filename(bstr0(nt->external_filename), NULL));
mp_switch_track(mpctx, nt->type, nt, 0);
print_track_list(mpctx, "Reloaded:");

View File

@ -24,8 +24,8 @@
#include "common/common.h"
#include "common/global.h"
#include "common/msg.h"
#include "misc/ctype.h"
#include "misc/charset_conv.h"
#include "misc/language.h"
#include "options/options.h"
#include "options/path.h"
#include "external_files.h"
@ -108,63 +108,6 @@ static int compare_sub_priority(const void *a, const void *b)
return strcoll(s1->fname, s2->fname);
}
static struct bstr guess_lang_from_filename(struct bstr name, int *lang_start)
{
if (name.len < 2)
return (struct bstr){NULL, 0};
int lang_length = 0;
int i = name.len - 1;
int suffixes_length = 0;
char delimiter = '.';
if (name.start[i] == ')') {
delimiter = '(';
i--;
}
if (name.start[i] == ']') {
delimiter = '[';
i--;
}
while (true) {
while (i >= 0 && mp_isalpha(name.start[i])) {
lang_length++;
i--;
}
// According to
// https://en.wikipedia.org/wiki/IETF_language_tag#Syntax_of_language_tags
// subtags after the first are composed of 1 to 8 letters.
if (lang_length < suffixes_length + 1 || lang_length > suffixes_length + 8)
return (struct bstr){0};
if (i >= 0 && name.start[i] == '-') {
lang_length++;
i--;
suffixes_length = lang_length;
} else {
break;
}
}
// The primary subtag can have 2 or 3 letters.
if (lang_length < suffixes_length + 2 || lang_length > suffixes_length + 3 ||
i == 0 || name.start[i] != delimiter)
return (struct bstr){0};
*lang_start = i;
return (struct bstr){name.start + i + 1, lang_length};
}
char *mp_guess_lang_from_filename(void* ctx, const char *filename)
{
bstr filename_no_ext = bstr_strip_ext(bstr0(filename));
int start = 0; // only used in append_dir_subtitles()
char *lang = bstrto0(ctx, guess_lang_from_filename(filename_no_ext, &start));
return lang;
}
static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
struct subfn **slist, int *nsub,
struct bstr path, const char *fname,
@ -235,7 +178,7 @@ static void append_dir_subtitles(struct mpv_global *global, struct MPOpts *opts,
bstr lang = {0};
int start = 0;
lang = guess_lang_from_filename(tmp_fname_trim, &start);
lang = mp_guess_lang_from_filename(dename, &start);
if (bstr_case_startswith(tmp_fname_trim, f_fname_trim)) {
if (lang.len && start == f_fname_trim.len)
prio |= 16; // exact movie name + followed by lang

View File

@ -34,6 +34,5 @@ struct subfn *find_external_files(struct mpv_global *global, const char *fname,
bool mp_might_be_subtitle_file(const char *filename);
void mp_update_subtitle_exts(struct MPOpts *opts);
char *mp_guess_lang_from_filename(void *talloc_ctx, const char *filename);
#endif /* MPLAYER_FINDFILES_H */