mirror of
https://github.com/obsproject/obs-studio.git
synced 2024-09-20 04:42:18 +02:00
2844 lines
72 KiB
C
2844 lines
72 KiB
C
/******************************************************************************
|
|
Copyright (C) 2024 by Dennis Sädtler <dennis@obsproject.com>
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
******************************************************************************/
|
|
|
|
#include "mp4-mux-internal.h"
|
|
|
|
#include "rtmp-hevc.h"
|
|
#include "rtmp-av1.h"
|
|
|
|
#include <obs-avc.h>
|
|
#include <obs-hevc.h>
|
|
#include <obs-module.h>
|
|
#include <util/dstr.h>
|
|
#include <util/platform.h>
|
|
#include <util/array-serializer.h>
|
|
|
|
#include <time.h>
|
|
|
|
/*
|
|
* (Mostly) compliant MP4 muxer for fun and profit.
|
|
* Based on ISO/IEC 14496-12 and FFmpeg's libavformat/movenc.c ([L]GPL)
|
|
*
|
|
* Specification section numbers are noted where applicable.
|
|
* Standard identifier is included if not referring to ISO/IEC 14496-12.
|
|
*/
|
|
|
|
#define do_log(level, format, ...) \
|
|
blog(level, "[mp4 muxer: '%s'] " format, \
|
|
obs_output_get_name(mux->output), ##__VA_ARGS__)
|
|
|
|
#define warn(format, ...) do_log(LOG_WARNING, format, ##__VA_ARGS__)
|
|
#define info(format, ...) do_log(LOG_INFO, format, ##__VA_ARGS__)
|
|
|
|
/* Helper to overwrite placeholder size and return total size. */
|
|
static inline size_t write_box_size(struct serializer *s, int64_t start)
|
|
{
|
|
int64_t end = serializer_get_pos(s);
|
|
size_t size = end - start;
|
|
|
|
serializer_seek(s, start, SERIALIZE_SEEK_START);
|
|
s_wb32(s, (uint32_t)size);
|
|
serializer_seek(s, end, SERIALIZE_SEEK_START);
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 4.2 Box header with size and char[4] name
|
|
static inline void write_box(struct serializer *s, const size_t size,
|
|
const char name[4])
|
|
{
|
|
if (size <= UINT32_MAX) {
|
|
s_wb32(s, (uint32_t)size); // size
|
|
s_write(s, name, 4); // boxtype
|
|
} else {
|
|
s_wb32(s, 1); // size
|
|
s_write(s, name, 4); // boxtype
|
|
s_wb64(s, size); // largesize
|
|
}
|
|
}
|
|
|
|
/// 4.2 FullBox extended header with u8 version and u24 flags
|
|
static inline void write_fullbox(struct serializer *s, const size_t size,
|
|
const char name[4], uint8_t version,
|
|
uint32_t flags)
|
|
{
|
|
write_box(s, size, name);
|
|
s_w8(s, version);
|
|
s_wb24(s, flags);
|
|
}
|
|
|
|
/// 4.3 File Type Box
|
|
static size_t mp4_write_ftyp(struct mp4_mux *mux, bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "ftyp");
|
|
|
|
const char *major_brand = "isom";
|
|
/* Following FFmpeg's example, when using negative CTS the major brand
|
|
* needs to be either iso4 or iso6 depending on whether the file is
|
|
* currently fragmented. */
|
|
if (mux->flags & MP4_USE_NEGATIVE_CTS)
|
|
major_brand = fragmented ? "iso6" : "iso4";
|
|
|
|
s_write(s, major_brand, 4); // major brand
|
|
s_wb32(s, 512); // minor version
|
|
|
|
// minor brands (first one matches major brand)
|
|
s_write(s, major_brand, 4);
|
|
|
|
/* Write isom base brand if it's not the major brand */
|
|
if (strcmp(major_brand, "isom") != 0)
|
|
s_write(s, "isom", 4);
|
|
|
|
/* Avoid adding newer brand (iso6) unless necessary, use "obs1" brand
|
|
* as a placeholder to maintain ftyp box size. */
|
|
if (fragmented && strcmp(major_brand, "iso6") != 0)
|
|
s_write(s, "iso6", 4);
|
|
else
|
|
s_write(s, "obs1", 4);
|
|
|
|
s_write(s, "iso2", 4);
|
|
|
|
/* Include H.264 brand if used */
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
if (track->type == TRACK_VIDEO) {
|
|
if (track->codec == CODEC_H264)
|
|
s_write(s, "avc1", 4);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* General MP4 brannd */
|
|
s_write(s, "mp41", 4);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.1.2 Free Space Box
|
|
static size_t mp4_write_free(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* Write a 16-byte free box, so it can be replaced with a 64-bit size
|
|
* box header (u32 + char[4] + u64) */
|
|
s_wb32(s, 16);
|
|
s_write(s, "free", 4);
|
|
s_wb64(s, 0);
|
|
|
|
return 16;
|
|
}
|
|
|
|
/// 8.2.2 Movie Header Box
|
|
static size_t mp4_write_mvhd(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
size_t start = serializer_get_pos(s);
|
|
|
|
/* Use primary video track as the baseline for duration */
|
|
uint64_t duration = 0;
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
if (track->type == TRACK_VIDEO) {
|
|
duration = util_mul_div64(track->duration, 1000,
|
|
track->timebase_den);
|
|
break;
|
|
}
|
|
}
|
|
|
|
write_fullbox(s, 0, "mvhd", 0, 0);
|
|
|
|
if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
|
|
s_wb64(s, mux->creation_time); // creation time
|
|
s_wb64(s, mux->creation_time); // modification time
|
|
s_wb32(s, 1000); // timescale
|
|
s_wb64(s, duration); // duration (0 for fragmented)
|
|
} else {
|
|
s_wb32(s, (uint32_t)mux->creation_time); // creation time
|
|
s_wb32(s, (uint32_t)mux->creation_time); // modification time
|
|
s_wb32(s, 1000); // timescale
|
|
s_wb32(s, (uint32_t)duration); // duration (0 for fragmented)
|
|
}
|
|
|
|
s_wb32(s, 0x00010000); // rate, 16.16 fixed float (1 << 16)
|
|
s_wb16(s, 0x0100); // volume
|
|
|
|
s_wb16(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
|
|
// Matrix
|
|
for (int i = 0; i < 9; i++)
|
|
s_wb32(s, UNITY_MATRIX[i]);
|
|
|
|
// pre_defined
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
|
|
s_wb32(s, mux->track_ctr + 1); // next_track_ID
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.3.2 Track Header Box
|
|
static size_t mp4_write_tkhd(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
size_t start = serializer_get_pos(s);
|
|
|
|
uint64_t duration =
|
|
util_mul_div64(track->duration, 1000, track->timebase_den);
|
|
|
|
/* Flags are 0x1 (enabled) | 0x2 (in movie) */
|
|
static const uint32_t flags = 0x1 | 0x2;
|
|
write_fullbox(s, 0, "tkhd", 0, flags);
|
|
|
|
if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
|
|
s_wb64(s, mux->creation_time); // creation time
|
|
s_wb64(s, mux->creation_time); // modification time
|
|
s_wb32(s, track->track_id); // track_id
|
|
s_wb32(s, 0); // reserved
|
|
s_wb64(s, duration); // duration in movie timescale
|
|
} else {
|
|
s_wb32(s, (uint32_t)mux->creation_time); // creation time
|
|
s_wb32(s, (uint32_t)mux->creation_time); // modification time
|
|
s_wb32(s, track->track_id); // track_id
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, (uint32_t)duration); // duration in movie timescale
|
|
}
|
|
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
s_wb16(s, 0); // layer
|
|
s_wb16(s, track->type == TRACK_AUDIO ? 1 : 0); // alternate group
|
|
s_wb16(s, track->type == TRACK_AUDIO ? 0x100 : 0); // volume
|
|
s_wb16(s, 0); // reserved
|
|
|
|
// Matrix (predefined)
|
|
for (int i = 0; i < 9; i++)
|
|
s_wb32(s, UNITY_MATRIX[i]);
|
|
|
|
if (track->type == TRACK_AUDIO) {
|
|
s_wb32(s, 0); // width
|
|
s_wb32(s, 0); // height
|
|
} else {
|
|
/* width/height are fixed point 16.16, so we just shift the
|
|
* integer to the upper 16 bits */
|
|
uint32_t width = obs_encoder_get_width(track->encoder);
|
|
s_wb32(s, width << 16);
|
|
uint32_t height = obs_encoder_get_height(track->encoder);
|
|
s_wb32(s, height << 16);
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.4.2 Media Header Box
|
|
static size_t mp4_write_mdhd(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
size_t size = 32;
|
|
uint8_t version = 0;
|
|
uint64_t duration = track->duration;
|
|
uint32_t timescale = track->timescale;
|
|
|
|
if (track->type == TRACK_VIDEO) {
|
|
/* Update to track timescale */
|
|
duration = util_mul_div64(duration, track->timescale,
|
|
track->timebase_den);
|
|
}
|
|
|
|
/* use 64-bit duration if necessary */
|
|
if (duration > UINT32_MAX || mux->creation_time > UINT32_MAX) {
|
|
size = 44;
|
|
version = 1;
|
|
}
|
|
|
|
write_fullbox(s, size, "mdhd", version, 0);
|
|
|
|
if (version == 1) {
|
|
s_wb64(s, mux->creation_time); // creation time
|
|
s_wb64(s, mux->creation_time); // modification time
|
|
s_wb32(s, timescale); // timescale
|
|
s_wb64(s, (uint32_t)duration); // duration
|
|
} else {
|
|
s_wb32(s, (uint32_t)mux->creation_time); // creation time
|
|
s_wb32(s, (uint32_t)mux->creation_time); // modification time
|
|
s_wb32(s, timescale); // timescale
|
|
s_wb32(s, (uint32_t)duration); // duration
|
|
}
|
|
|
|
s_wb16(s, 21956); // language (undefined)
|
|
s_wb16(s, 0); // pre_defined
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 8.4.3 Handler Reference Box
|
|
static size_t mp4_write_hdlr(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "hdlr", 0, 0);
|
|
|
|
s_wb32(s, 0); // pre_defined
|
|
|
|
// handler_type
|
|
if (track->type == TRACK_VIDEO)
|
|
s_write(s, "vide", 4);
|
|
else if (track->type == TRACK_CHAPTERS)
|
|
s_write(s, "text", 4);
|
|
else
|
|
s_write(s, "soun", 4);
|
|
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
|
|
// name (utf-8 string, null terminated)
|
|
if (track->type == TRACK_VIDEO)
|
|
s_write(s, "OBS Video Handler", 18);
|
|
else if (track->type == TRACK_CHAPTERS)
|
|
s_write(s, "OBS Chapter Handler", 20);
|
|
else
|
|
s_write(s, "OBS Audio Handler", 18);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 12.1.2 Video media header
|
|
static size_t mp4_write_vmhd(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* Flags is always 1 */
|
|
write_fullbox(s, 20, "vmhd", 0, 1);
|
|
|
|
s_wb16(s, 0); // graphicsmode
|
|
s_wb16(s, 0); // opcolor r
|
|
s_wb16(s, 0); // opcolor g
|
|
s_wb16(s, 0); // opcolor b
|
|
|
|
return 16;
|
|
}
|
|
|
|
/// 12.2.2 Sound media header
|
|
static size_t mp4_write_smhd(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 16, "smhd", 0, 0);
|
|
|
|
s_wb16(s, 0); // balance
|
|
s_wb16(s, 0); // reserved
|
|
|
|
return 16;
|
|
}
|
|
|
|
/// (QTFF/Apple) Text media information atom
|
|
static size_t mp4_write_qt_text(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "text");
|
|
|
|
/* Identity matrix, note that it's not fixed point 16.16 */
|
|
s_wb16(s, 0x01);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x01);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x00);
|
|
s_wb32(s, 0x00004000);
|
|
/* Seemingly undocumented */
|
|
s_wb16(s, 0x0000);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Base media info atom
|
|
static size_t mp4_write_gmin(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "gmin", 0, 0);
|
|
|
|
s_wb16(s, 0x40); // graphics mode
|
|
s_wb16(s, 0x8000); // opColor r
|
|
s_wb16(s, 0x8000); // opColor g
|
|
s_wb16(s, 0x8000); // opColor b
|
|
s_wb16(s, 0); // balance
|
|
s_wb16(s, 0); // reserved
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Base media information header atom
|
|
static size_t mp4_write_gmhd(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "gmhd");
|
|
|
|
// gmin
|
|
mp4_write_gmin(mux);
|
|
// text (QuickTime)
|
|
mp4_write_qt_text(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// ISO/IEC 14496-15 5.4.2.1 AVCConfigurationBox
|
|
static size_t mp4_write_avcC(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* For AVC this is the parsed extra data. */
|
|
uint8_t *header;
|
|
size_t size;
|
|
|
|
struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
|
|
.timebase_den = 1,
|
|
.keyframe = true};
|
|
|
|
if (!obs_encoder_get_extra_data(enc, &header, &size))
|
|
return 0;
|
|
|
|
packet.size = obs_parse_avc_header(&packet.data, header, size);
|
|
|
|
size_t box_size = packet.size + 8;
|
|
write_box(s, box_size, "avcC");
|
|
s_write(s, packet.data, packet.size);
|
|
|
|
bfree(packet.data);
|
|
return box_size;
|
|
}
|
|
|
|
/// ISO/IEC 14496-15 8.4.1.1 HEVCConfigurationBox
|
|
static size_t mp4_write_hvcC(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* For HEVC this is the parsed extra data. */
|
|
uint8_t *header;
|
|
size_t size;
|
|
|
|
struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
|
|
.timebase_den = 1,
|
|
.keyframe = true};
|
|
|
|
if (!obs_encoder_get_extra_data(enc, &header, &size))
|
|
return 0;
|
|
|
|
packet.size = obs_parse_hevc_header(&packet.data, header, size);
|
|
|
|
size_t box_size = packet.size + 8;
|
|
write_box(s, box_size, "hvcC");
|
|
s_write(s, packet.data, packet.size);
|
|
|
|
bfree(packet.data);
|
|
return box_size;
|
|
}
|
|
|
|
/// AV1 ISOBMFF 2.3. AV1 Codec Configuration Box
|
|
static size_t mp4_write_av1C(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* For AV1 this is just the parsed extra data. */
|
|
uint8_t *header;
|
|
size_t size;
|
|
|
|
struct encoder_packet packet = {.type = OBS_ENCODER_VIDEO,
|
|
.timebase_den = 1,
|
|
.keyframe = true};
|
|
|
|
if (!obs_encoder_get_extra_data(enc, &header, &size))
|
|
return 0;
|
|
|
|
packet.size = obs_parse_av1_header(&packet.data, header, size);
|
|
|
|
size_t box_size = packet.size + 8;
|
|
write_box(s, box_size, "av1C");
|
|
s_write(s, packet.data, packet.size);
|
|
|
|
bfree(packet.data);
|
|
return box_size;
|
|
}
|
|
|
|
/// 12.1.5 Colour information
|
|
static size_t mp4_write_colr(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
UNUSED_PARAMETER(enc);
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_box(s, 19, "colr");
|
|
|
|
uint8_t full_range = 0;
|
|
uint16_t pri, trc, spc;
|
|
pri = trc = spc = 0;
|
|
get_colour_information(enc, &pri, &trc, &spc, &full_range);
|
|
|
|
s_write(s, "nclx", 4); // colour_type
|
|
s_wb16(s, pri); // colour_primaries
|
|
s_wb16(s, trc); // transfer_characteristics
|
|
s_wb16(s, spc); // matrix_coefficiencts
|
|
s_w8(s, full_range << 7); // full range flag + 7 reserved bits (0)
|
|
|
|
return 19;
|
|
}
|
|
|
|
/// 12.1.4 Pixel Aspect Ratio
|
|
static size_t mp4_write_pasp(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_box(s, 16, "pasp");
|
|
|
|
s_wb32(s, 1); // hSpacing
|
|
s_wb32(s, 1); // vSpacing
|
|
|
|
return 16;
|
|
}
|
|
|
|
/// 12.1.3 Visual Sample Entry
|
|
static inline void mp4_write_visual_sample_entry(struct mp4_mux *mux,
|
|
obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
// SampleEntry Box
|
|
s_w8(s, 0); // reserved
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
|
|
s_wb16(s, 1); // data_reference_index
|
|
|
|
// VisualSampleEntry Box
|
|
s_wb16(s, 0); // pre_defined
|
|
s_wb16(s, 0); // reserved
|
|
s_wb32(s, 0); // pre_defined
|
|
s_wb32(s, 0); // pre_defined
|
|
s_wb32(s, 0); // pre_defined
|
|
|
|
s_wb16(s, (uint16_t)obs_encoder_get_width(enc)); // width
|
|
s_wb16(s, (uint16_t)obs_encoder_get_height(enc)); // height
|
|
|
|
s_wb32(s, 0x00480000); // horizresolution (predefined)
|
|
s_wb32(s, 0x00480000); // vertresolution (predefined)
|
|
|
|
s_wb32(s, 0); // reserved
|
|
s_wb16(s, 1); // frame_count
|
|
|
|
/* Name is fixed 32-bytes and needs to be padded to that length.
|
|
* First byte is the length, rest is a string sans NULL terminator. */
|
|
char compressor_name[32] = {0};
|
|
const char *enc_id = obs_encoder_get_id(enc);
|
|
if (enc_id) {
|
|
size_t len = strlen(enc_id);
|
|
if (len > 31)
|
|
len = 31;
|
|
|
|
compressor_name[0] = (char)len;
|
|
memcpy(compressor_name + 1, enc_id, len);
|
|
}
|
|
s_write(s, compressor_name, sizeof(compressor_name)); // compressorname
|
|
|
|
s_wb16(s, 0x0018); // depth
|
|
s_wb16(s, -1); // pre_defined
|
|
}
|
|
|
|
/// 12.1.6 Content light level
|
|
static size_t mp4_write_clli(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
video_t *video = obs_encoder_video(enc);
|
|
const struct video_output_info *info = video_output_get_info(video);
|
|
|
|
/* Only write box for HDR video */
|
|
if (info->colorspace != VIDEO_CS_2100_PQ &&
|
|
info->colorspace != VIDEO_CS_2100_HLG)
|
|
return 0;
|
|
|
|
write_box(s, 12, "clli");
|
|
|
|
float nominal_peak = obs_get_video_hdr_nominal_peak_level();
|
|
|
|
s_wb16(s, (uint16_t)nominal_peak); // max_content_light_level
|
|
s_wb16(s, (uint16_t)nominal_peak); // max_pic_average_light_level
|
|
|
|
return 12;
|
|
}
|
|
|
|
/// 12.1.7 Mastering display colour volume
|
|
static size_t mp4_write_mdcv(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
video_t *video = obs_encoder_video(enc);
|
|
const struct video_output_info *info = video_output_get_info(video);
|
|
|
|
// Only write atom for HDR video
|
|
if (info->colorspace != VIDEO_CS_2100_PQ &&
|
|
info->colorspace != VIDEO_CS_2100_HLG)
|
|
return 0;
|
|
|
|
write_box(s, 32, "mdcv");
|
|
|
|
float nominal_peak = obs_get_video_hdr_nominal_peak_level();
|
|
uint32_t max_lum = (uint32_t)nominal_peak * 10000;
|
|
|
|
/* Note that these values are hardcoded everywhere in OBS, so these are
|
|
* just the same as used in our other muxers/encoders. */
|
|
|
|
// 3 x display_primaries (x, y) pairs
|
|
s_wb16(s, 13250);
|
|
s_wb16(s, 34500);
|
|
s_wb16(s, 7500);
|
|
s_wb16(s, 3000);
|
|
s_wb16(s, 34000);
|
|
s_wb16(s, 16000);
|
|
|
|
s_wb16(s, 15635); // white_point_x
|
|
s_wb16(s, 16450); // white_point_y
|
|
s_wb32(s, max_lum); // max_display_mastering_luminance
|
|
s_wb32(s, 0); // min_display_mastering_luminance
|
|
|
|
return 32;
|
|
}
|
|
|
|
/// ISO/IEC 14496-15 5.4.2.1 AVCSampleEntry
|
|
static size_t mp4_write_avc1(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "avc1");
|
|
|
|
mp4_write_visual_sample_entry(mux, enc);
|
|
|
|
// avcC
|
|
mp4_write_avcC(mux, enc);
|
|
|
|
// colr
|
|
mp4_write_colr(mux, enc);
|
|
|
|
// pasp
|
|
mp4_write_pasp(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// ISO/IEC 14496-15 8.4.1.1 HEVCSampleEntry
|
|
static size_t mp4_write_hvc1(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "hvc1");
|
|
|
|
mp4_write_visual_sample_entry(mux, enc);
|
|
|
|
// avcC
|
|
mp4_write_hvcC(mux, enc);
|
|
|
|
// colr
|
|
mp4_write_colr(mux, enc);
|
|
|
|
// clli
|
|
mp4_write_clli(mux, enc);
|
|
|
|
// mdcv
|
|
mp4_write_mdcv(mux, enc);
|
|
|
|
// pasp
|
|
mp4_write_pasp(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// AV1 ISOBMFF 2.2. AV1 Sample Entry
|
|
static size_t mp4_write_av01(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "av01");
|
|
|
|
mp4_write_visual_sample_entry(mux, enc);
|
|
|
|
// avcC
|
|
mp4_write_av1C(mux, enc);
|
|
|
|
// colr
|
|
mp4_write_colr(mux, enc);
|
|
|
|
// clli
|
|
mp4_write_clli(mux, enc);
|
|
|
|
// mdcv
|
|
mp4_write_mdcv(mux, enc);
|
|
|
|
// pasp
|
|
mp4_write_pasp(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
static inline void put_descr(struct serializer *s, uint8_t tag, size_t size)
|
|
{
|
|
int i = 3;
|
|
s_w8(s, tag);
|
|
for (; i > 0; i--)
|
|
s_w8(s, (uint8_t)((size >> (7 * i)) | 0x80));
|
|
s_w8(s, size & 0x7F);
|
|
}
|
|
|
|
/// ISO/IEC 14496-14 5.6 ESDBox
|
|
static size_t mp4_write_esds(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "esds", 0, 0);
|
|
|
|
/* Encoder extradata will be used as DecoderSpecificInfo */
|
|
uint8_t *extradata;
|
|
size_t extradata_size;
|
|
if (!obs_encoder_get_extra_data(track->encoder, &extradata,
|
|
&extradata_size)) {
|
|
extradata_size = 0;
|
|
}
|
|
|
|
/// ISO/IEC 14496-1
|
|
|
|
// ES_Descriptor
|
|
size_t decoder_specific_info_len = extradata_size ? extradata_size + 5
|
|
: 0;
|
|
|
|
put_descr(s, 0x03, 3 + 5 + 13 + decoder_specific_info_len + 5 + 1);
|
|
s_wb16(s, track->track_id);
|
|
s_w8(s, 0x00); // flags
|
|
|
|
// DecoderConfigDescriptor
|
|
put_descr(s, 0x04, 13 + decoder_specific_info_len);
|
|
s_w8(s, 0x40); // codec tag, 0x40 = AAC
|
|
s_w8(s, 0x15); // stream type field (0x15 = audio stream)
|
|
|
|
/* When writing the final MOOV this could theoretically be calculated
|
|
* based on chunks, but it's not really all that important. */
|
|
uint32_t bitrate = 0;
|
|
obs_data_t *settings = obs_encoder_get_settings(track->encoder);
|
|
if (settings) {
|
|
int64_t enc_bitrate = obs_data_get_int(settings, "bitrate");
|
|
if (enc_bitrate)
|
|
bitrate = (uint32_t)(enc_bitrate * 1000);
|
|
|
|
obs_data_release(settings);
|
|
}
|
|
|
|
s_wb24(s, 0); // bufferSizeDB (in bytes)
|
|
s_wb32(s, bitrate); // maxbitrate
|
|
s_wb32(s, bitrate); // avgBitrate
|
|
|
|
// DecoderSpecificInfo
|
|
if (extradata_size) {
|
|
put_descr(s, 0x05, extradata_size);
|
|
s_write(s, extradata, extradata_size);
|
|
}
|
|
|
|
// SLConfigDescriptor descriptor
|
|
put_descr(s, 0x06, 1);
|
|
s_w8(s, 0x02); // 0x2 = reserved for MP4, descriptor is empty
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 12.2.3 Audio Sample Entry
|
|
static inline void mp4_write_audio_sample_entry(struct mp4_mux *mux,
|
|
struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
// SampleEntry Box
|
|
s_w8(s, 0); // reserved
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
s_w8(s, 0);
|
|
|
|
s_wb16(s, 1); // data_reference_index
|
|
|
|
// AudioSampleEntry Box
|
|
if (version == 1) {
|
|
s_wb16(s, 1); // entry_version
|
|
s_wb16(s, 0); // reserved
|
|
s_wb16(s, 0); // reserved
|
|
s_wb16(s, 0); // reserved
|
|
} else {
|
|
s_wb32(s, 0); // reserved
|
|
s_wb32(s, 0); // reserved
|
|
}
|
|
|
|
audio_t *audio = obs_encoder_audio(track->encoder);
|
|
size_t channels = audio_output_get_channels(audio);
|
|
uint32_t sample_rate = track->timescale;
|
|
bool alac = track->codec == CODEC_ALAC;
|
|
|
|
s_wb16(s, (uint32_t)channels); // channelcount
|
|
|
|
/* OBS FLAC is currently always 16 bit, ALAC always 24, this may change
|
|
* in the futrure and should be handled differently then.
|
|
* That being said thoes codecs are self-describing so in most cases it
|
|
* shouldn't matter either way. */
|
|
s_wb16(s, alac ? 24 : 16); // samplesize
|
|
|
|
s_wb16(s, 0); // pre_defined
|
|
s_wb16(s, 0); // reserved
|
|
|
|
s_wb32(s, sample_rate << 16); // samplerate
|
|
}
|
|
|
|
/// 12.2.4 Channel layout
|
|
static size_t mp4_write_chnl(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "chnl", 0, 0);
|
|
|
|
audio_t *audio = obs_encoder_audio(track->encoder);
|
|
const struct audio_output_info *info = audio_output_get_info(audio);
|
|
|
|
s_w8(s, 1); // stream_structure (1 = channels)
|
|
|
|
/* 5.1 and 4.1 do not have a corresponding ISO layout, so we have to
|
|
* write a manually created channel map for those. */
|
|
uint8_t map[8] = {0};
|
|
uint8_t items = 0;
|
|
uint8_t defined_layout = 0;
|
|
|
|
get_speaker_positions(info->speakers, map, &items, &defined_layout);
|
|
|
|
if (!defined_layout) {
|
|
warn("No ISO layout available for speaker layout %d, "
|
|
"this may not be supported by all applications!",
|
|
info->speakers);
|
|
s_w8(s, 0); // definedLayout
|
|
s_write(s, map, items); // uint8_t speaker_position[count]
|
|
} else {
|
|
s_w8(s, defined_layout); // definedLayout
|
|
s_wb64(s, 0); // ommitedChannelMap
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// ISO/IEC 14496-14 5.6 MP4AudioSampleEntry
|
|
static size_t mp4_write_mp4a(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "mp4a");
|
|
|
|
mp4_write_audio_sample_entry(mux, track, version);
|
|
|
|
// esds
|
|
mp4_write_esds(mux, track);
|
|
|
|
/* Write channel layout for version 1 sample entires */
|
|
if (version == 1)
|
|
mp4_write_chnl(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// Encapsulation of FLAC in ISO Base Media File Format 3.3.2 FLAC Specific Box
|
|
static size_t mp4_write_dfLa(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
uint8_t *extradata;
|
|
size_t extradata_size;
|
|
|
|
if (!obs_encoder_get_extra_data(track->encoder, &extradata,
|
|
&extradata_size))
|
|
return 0;
|
|
|
|
write_fullbox(s, 0, "dfLa", 0, 0);
|
|
|
|
/// FLACMetadataBlock
|
|
|
|
// LastMetadataBlockFlag (1) | BlockType (0)
|
|
s_w8(s, 1 << 7 | 0);
|
|
// Length
|
|
s_wb24(s, (uint32_t)extradata_size);
|
|
// BlockData[Length]
|
|
s_write(s, extradata, extradata_size);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// Encapsulation of FLAC in ISO Base Media File Format 3.3.1 FLACSampleEntry
|
|
static size_t mp4_write_fLaC(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "fLaC");
|
|
|
|
mp4_write_audio_sample_entry(mux, track, version);
|
|
|
|
// dfLa
|
|
mp4_write_dfLa(mux, track);
|
|
|
|
if (version == 1)
|
|
mp4_write_chnl(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// Apple Lossless Format "Magic Cookie" Description - MP4/M4A File
|
|
static size_t mp4_write_alac(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
uint8_t *extradata;
|
|
size_t extradata_size;
|
|
|
|
if (!obs_encoder_get_extra_data(track->encoder, &extradata,
|
|
&extradata_size))
|
|
return 0;
|
|
|
|
write_box(s, 0, "alac");
|
|
|
|
mp4_write_audio_sample_entry(mux, track, version);
|
|
|
|
/* Apple Lossless Magic Cookie */
|
|
s_write(s, extradata, extradata_size);
|
|
|
|
if (version == 1)
|
|
mp4_write_chnl(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// ISO/IEC 23003-5 5.1 PCM configuration
|
|
static size_t mp4_write_pcmc(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "pcmC", 0, 0);
|
|
|
|
s_w8(s, 1); // endianness, 1 = little endian
|
|
|
|
// bits per sample
|
|
if (track->codec == CODEC_PCM_I16)
|
|
s_w8(s, 16);
|
|
else if (track->codec == CODEC_PCM_I24)
|
|
s_w8(s, 24);
|
|
else if (track->codec == CODEC_PCM_F32)
|
|
s_w8(s, 32);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// ISO/IEC 23003-5 5.1 PCM configuration
|
|
static size_t mp4_write_xpcm(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
/* Different box types for floating point and integer PCM*/
|
|
write_box(s, 0, track->codec == CODEC_PCM_F32 ? "fpcm" : "ipcm");
|
|
|
|
mp4_write_audio_sample_entry(mux, track, version);
|
|
|
|
/* ChannelLayout (chnl) is required for PCM */
|
|
mp4_write_chnl(mux, track);
|
|
|
|
// pcmc
|
|
mp4_write_pcmc(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Text sample description
|
|
static size_t mp4_write_text(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "text", 0, 0);
|
|
|
|
s_wb32(s, 1); // number of entries
|
|
|
|
/* Preset sample description as used by FFmpeg. */
|
|
s_write(s, &TEXT_STUB_HEADER, sizeof(TEXT_STUB_HEADER));
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
static inline uint32_t rl32(const uint8_t *ptr)
|
|
{
|
|
return (ptr[3] << 24) + (ptr[2] << 16) + (ptr[1] << 8) + ptr[0];
|
|
}
|
|
|
|
static inline uint16_t rl16(const uint8_t *ptr)
|
|
{
|
|
return (ptr[1] << 8) + ptr[0];
|
|
}
|
|
|
|
/// Encapsulation of Opus in ISO Base Media File Format 4.3.2 Opus Specific Box
|
|
static size_t mp4_write_dOps(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
uint8_t *extradata;
|
|
size_t extradata_size;
|
|
|
|
if (!obs_encoder_get_extra_data(track->encoder, &extradata,
|
|
&extradata_size))
|
|
return 0;
|
|
|
|
write_box(s, 0, "dOps");
|
|
s_w8(s, 0); // version
|
|
|
|
uint8_t channels = *(extradata + 9);
|
|
uint8_t channel_map = *(extradata + 18);
|
|
|
|
s_w8(s, channels); // channel count
|
|
// OpusHead is little-endian, but MP4 is big-endian, so we have to swap them here
|
|
s_wb16(s, rl16(extradata + 10)); // pre-skip
|
|
s_wb32(s, rl32(extradata + 12)); // input sample rate
|
|
s_wb16(s, rl16(extradata + 16)); // output gain
|
|
s_w8(s, channel_map); // channel mapping family
|
|
|
|
if (channel_map)
|
|
s_write(s, extradata + 19, 2 + channels);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// Encapsulation of Opus in ISO Base Media File Format 4.3.1 Sample entry format
|
|
static size_t mp4_write_Opus(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint8_t version)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "Opus");
|
|
|
|
mp4_write_audio_sample_entry(mux, track, version);
|
|
|
|
// dOps
|
|
mp4_write_dOps(mux, track);
|
|
|
|
if (version == 1)
|
|
mp4_write_chnl(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.5.2 Sample Description Box
|
|
static size_t mp4_write_stsd(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
/* Anything but mono or stereo technically requires v1,
|
|
* but in practice that doesn't appear to matter. */
|
|
uint8_t version = 0;
|
|
|
|
if (track->type == TRACK_AUDIO) {
|
|
audio_t *audio = obs_encoder_audio(track->encoder);
|
|
version = audio_output_get_channels(audio) > 2 ? 1 : 0;
|
|
}
|
|
|
|
write_fullbox(s, 0, "stsd", version, 0);
|
|
|
|
s_wb32(s, 1); // entry_count
|
|
|
|
// codec specific boxes
|
|
if (track->type == TRACK_VIDEO) {
|
|
if (track->codec == CODEC_H264)
|
|
mp4_write_avc1(mux, track->encoder);
|
|
else if (track->codec == CODEC_HEVC)
|
|
mp4_write_hvc1(mux, track->encoder);
|
|
else if (track->codec == CODEC_AV1)
|
|
mp4_write_av01(mux, track->encoder);
|
|
} else if (track->type == TRACK_AUDIO) {
|
|
if (track->codec == CODEC_AAC)
|
|
mp4_write_mp4a(mux, track, version);
|
|
else if (track->codec == CODEC_OPUS)
|
|
mp4_write_Opus(mux, track, version);
|
|
else if (track->codec == CODEC_FLAC)
|
|
mp4_write_fLaC(mux, track, version);
|
|
else if (track->codec == CODEC_ALAC)
|
|
mp4_write_alac(mux, track, version);
|
|
else if (track->codec == CODEC_PCM_I16 ||
|
|
track->codec == CODEC_PCM_I24 ||
|
|
track->codec == CODEC_PCM_F32)
|
|
mp4_write_xpcm(mux, track, version);
|
|
} else if (track->type == TRACK_CHAPTERS) {
|
|
mp4_write_text(mux);
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.6.1.2 Decoding Time to Sample Box
|
|
static size_t mp4_write_stts(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
if (fragmented) {
|
|
write_fullbox(s, 16, "stts", 0, 0);
|
|
s_wb32(s, 0); // entry_count
|
|
return 16;
|
|
}
|
|
|
|
int64_t start = serializer_get_pos(s);
|
|
struct sample_delta *arr = track->deltas.array;
|
|
size_t num = track->deltas.num;
|
|
|
|
write_fullbox(s, 0, "stts", 0, 0);
|
|
|
|
s_wb32(s, (uint32_t)num); // entry_count
|
|
|
|
for (size_t idx = 0; idx < num; idx++) {
|
|
struct sample_delta *smp = &arr[idx];
|
|
|
|
uint64_t delta = util_mul_div64(smp->delta, track->timescale,
|
|
track->timebase_den);
|
|
|
|
s_wb32(s, smp->count); // sample_count
|
|
s_wb32(s, (uint32_t)delta); // sample_delta
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.6.2 Sync Sample Box
|
|
static size_t mp4_write_stss(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
uint32_t num = (uint32_t)track->sync_samples.num;
|
|
|
|
if (!num)
|
|
return 0;
|
|
|
|
/* 16 byte FullBox header + 4-bytes (u32) per sync sample */
|
|
uint32_t size = 16 + 4 * num;
|
|
|
|
write_fullbox(s, size, "stss", 0, 0);
|
|
s_wb32(s, num); // entry_count
|
|
|
|
for (size_t idx = 0; idx < num; idx++)
|
|
s_wb32(s, track->sync_samples.array[idx]); // sample_number
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 8.6.1.3 Composition Time to Sample Box
|
|
static size_t mp4_write_ctts(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
uint32_t num = (uint32_t)track->offsets.num;
|
|
|
|
uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
|
|
|
|
/* 16 byte FullBox header + 8-bytes (u32+u32/i32) per offset entry */
|
|
uint32_t size = 16 + 8 * num;
|
|
write_fullbox(s, size, "ctts", version, 0);
|
|
|
|
s_wb32(s, num); // entry_count
|
|
|
|
for (size_t idx = 0; idx < num; idx++) {
|
|
int64_t offset = (int64_t)track->offsets.array[idx].offset *
|
|
(int64_t)track->timescale /
|
|
(int64_t)track->timebase_den;
|
|
|
|
s_wb32(s, track->offsets.array[idx].count); // sample_count
|
|
s_wb32(s, (uint32_t)offset); // sample_offset
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 8.7.4 Sample To Chunk Box
|
|
static size_t mp4_write_stsc(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
if (fragmented) {
|
|
write_fullbox(s, 16, "stsc", 0, 0);
|
|
s_wb32(s, 0); // entry_count
|
|
return 16;
|
|
}
|
|
|
|
struct chunk *arr = track->chunks.array;
|
|
size_t arr_num = track->chunks.num;
|
|
|
|
/* Compress into array with counter for repeating chunk sizes */
|
|
DARRAY(struct chunk_run {
|
|
uint32_t first;
|
|
uint32_t samples;
|
|
}) chunk_runs;
|
|
|
|
da_init(chunk_runs);
|
|
|
|
for (size_t idx = 0; idx < arr_num; idx++) {
|
|
struct chunk *chk = &arr[idx];
|
|
|
|
if (!chunk_runs.num ||
|
|
chunk_runs.array[chunk_runs.num - 1].samples !=
|
|
chk->samples) {
|
|
struct chunk_run *cr = da_push_back_new(chunk_runs);
|
|
cr->samples = chk->samples;
|
|
cr->first = (uint32_t)idx + 1; // ISO-BMFF is 1-indexed
|
|
}
|
|
}
|
|
|
|
uint32_t num = (uint32_t)chunk_runs.num;
|
|
|
|
/* 16 byte FullBox header + 12-bytes (u32+u32+u32) per chunk run */
|
|
uint32_t size = 16 + 12 * num;
|
|
write_fullbox(s, size, "stsc", 0, 0);
|
|
|
|
s_wb32(s, num); // entry_count
|
|
|
|
for (size_t idx = 0; idx < num; idx++) {
|
|
struct chunk_run *cr = &chunk_runs.array[idx];
|
|
s_wb32(s, cr->first); // first_chunk
|
|
s_wb32(s, cr->samples); // samples_per_chunk
|
|
s_wb32(s, 1); // sample_description_index
|
|
}
|
|
|
|
da_free(chunk_runs);
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 8.7.3 Sample Size Boxes
|
|
static size_t mp4_write_stsz(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
if (fragmented) {
|
|
write_fullbox(s, 20, "stsz", 0, 0);
|
|
s_wb32(s, 0); // sample_size
|
|
s_wb32(s, 0); // sample_count
|
|
|
|
return 20;
|
|
}
|
|
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
/* This should only ever happen when recording > 24 hours of
|
|
* 48 kHz PCM audio or 828 days of 60 FPS video. */
|
|
if (track->samples > UINT32_MAX) {
|
|
warn("Track %u has too many samples, its duration may not be "
|
|
"read correctly. Remuxing the file to another format such "
|
|
"as MKV may be required.",
|
|
track->track_id);
|
|
}
|
|
|
|
write_fullbox(s, 0, "stsz", 0, 0);
|
|
|
|
if (track->sample_size) {
|
|
/* Fixed size samples mean we don't need an array */
|
|
s_wb32(s, track->sample_size); // sample_size
|
|
s_wb32(s, (uint32_t)track->samples); // sample_count
|
|
} else {
|
|
s_wb32(s, 0); // sample_size
|
|
s_wb32(s, (uint32_t)track->sample_sizes.num); // sample_count
|
|
|
|
for (size_t idx = 0; idx < track->sample_sizes.num; idx++) {
|
|
s_wb32(s, track->sample_sizes.array[idx]); // entry_size
|
|
}
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.7.5 Chunk Offset Box
|
|
static size_t mp4_write_stco(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
if (fragmented) {
|
|
write_fullbox(s, 16, "stco", 0, 0);
|
|
s_wb32(s, 0); // entry_count
|
|
return 16;
|
|
}
|
|
|
|
struct chunk *arr = track->chunks.array;
|
|
uint32_t num = (uint32_t)track->chunks.num;
|
|
|
|
uint64_t last_off = arr[num - 1].offset;
|
|
uint32_t size;
|
|
bool co64 = last_off > UINT32_MAX;
|
|
|
|
/* When using 64-bit offsets we write 8-bytes (u64) per chunk,
|
|
* otherwise 4-bytes (u32). */
|
|
if (co64) {
|
|
size = 16 + 8 * num;
|
|
write_fullbox(s, size, "co64", 0, 0);
|
|
} else {
|
|
size = 16 + 4 * num;
|
|
write_fullbox(s, size, "stco", 0, 0);
|
|
}
|
|
|
|
s_wb32(s, num); // entry_count
|
|
|
|
for (size_t idx = 0; idx < num; idx++) {
|
|
if (co64)
|
|
s_wb64(s, arr[idx].offset); // chunk_offset
|
|
else
|
|
s_wb32(s, (uint32_t)arr[idx].offset); // chunk_offset
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
/// 8.9.3 Sample Group Description Box
|
|
static size_t mp4_write_sgpd_aac(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
int64_t start = serializer_get_pos(s);
|
|
write_fullbox(s, 0, "sgpd", 1, 0);
|
|
|
|
s_write(s, "roll", 4); // grouping_tpye
|
|
s_wb32(s, 2); // default_length (i16)
|
|
|
|
s_wb32(s, 1); // entry_count
|
|
|
|
// AudioRollRecoveryEntry
|
|
s_wb16(s, -1); // roll_distance
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.9.2 Sample to Group Box
|
|
static size_t mp4_write_sbgp_aac(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
int64_t start = serializer_get_pos(s);
|
|
write_fullbox(s, 0, "sbgp", 0, 0);
|
|
|
|
/// 10.1 AudioRollRecoveryEntry
|
|
s_write(s, "roll", 4); // grouping_tpye
|
|
|
|
s_wb32(s, 1); // entry_count
|
|
|
|
s_wb32(s, (uint32_t)track->samples); // sample_count
|
|
s_wb32(s, 1); // group_description_index
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
static size_t mp4_write_sbgp_sbgp_opus(struct mp4_mux *mux,
|
|
struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
/// 8.9.3 Sample Group Description Box
|
|
write_fullbox(s, 0, "sgpd", 1, 0);
|
|
|
|
s_write(s, "roll", 4); // grouping_tpye
|
|
s_wb32(s, 2); // default_length (i16)
|
|
|
|
/* Opus requires 80 ms of preroll, which at 48 kHz is 3840 PCM samples */
|
|
const int64_t opus_preroll = 3840;
|
|
|
|
/* Compute the preroll samples (should be 4, each being 20 ms) */
|
|
uint16_t preroll_count = 0;
|
|
int64_t preroll_remaining = opus_preroll;
|
|
|
|
for (size_t i = 0; i < track->deltas.num && preroll_remaining > 0;
|
|
i++) {
|
|
for (uint32_t j = 0;
|
|
j < track->deltas.array[i].count && preroll_remaining > 0;
|
|
j++) {
|
|
preroll_remaining -= track->deltas.array[i].delta;
|
|
preroll_count++;
|
|
}
|
|
}
|
|
|
|
s_wb32(s, 1); // entry_count
|
|
/// 10.1 AudioRollRecoveryEntry
|
|
s_wb16(s, -preroll_count); // roll_distance
|
|
|
|
size_t size_sgpd = write_box_size(s, start);
|
|
|
|
/* --------------- */
|
|
|
|
/// 8.9.2 Sample to Group Box
|
|
start = serializer_get_pos(s);
|
|
write_fullbox(s, 0, "sbgp", 0, 0);
|
|
|
|
s_write(s, "roll", 4); // grouping_tpye
|
|
s_wb32(s, 2); // entry_count
|
|
|
|
// entry 0
|
|
s_wb32(s, preroll_count); // sample_count
|
|
s_wb32(s, 0); // group_description_index
|
|
// entry 1
|
|
s_wb32(s, (uint32_t)track->samples - preroll_count); // sample_count
|
|
s_wb32(s, 1); // group_description_index
|
|
|
|
return size_sgpd + write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.5.1 Sample Table Box
|
|
static size_t mp4_write_stbl(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "stbl");
|
|
|
|
// stsd
|
|
mp4_write_stsd(mux, track);
|
|
|
|
// stts
|
|
mp4_write_stts(mux, track, fragmented);
|
|
|
|
// stss (non-fragmented only)
|
|
if (track->type == TRACK_VIDEO && !fragmented)
|
|
mp4_write_stss(mux, track);
|
|
|
|
// ctts (non-fragmented only)
|
|
if (track->needs_ctts && !fragmented)
|
|
mp4_write_ctts(mux, track);
|
|
|
|
// stsc
|
|
mp4_write_stsc(mux, track, fragmented);
|
|
|
|
// stsz
|
|
mp4_write_stsz(mux, track, fragmented);
|
|
|
|
// stco
|
|
mp4_write_stco(mux, track, fragmented);
|
|
|
|
if (!fragmented) {
|
|
/* AAC and Opus require a pre-roll to get correct decoder
|
|
* output, sgpd and sbgp are used to create a "roll" group. */
|
|
if (track->codec == CODEC_AAC) {
|
|
// sgpd
|
|
mp4_write_sgpd_aac(mux);
|
|
// sbgp
|
|
mp4_write_sbgp_aac(mux, track);
|
|
} else if (track->codec == CODEC_OPUS) {
|
|
// sgpd + sbgp
|
|
mp4_write_sbgp_sbgp_opus(mux, track);
|
|
}
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.7.2.2 DataEntryUrlBox
|
|
static size_t mp4_write_url(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "url ", 0, 1);
|
|
|
|
/* empty, flag 1 means data is in this file */
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.7.2 Data Reference Box
|
|
static size_t mp4_write_dref(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "dref ", 0, 0);
|
|
|
|
s_wb32(s, 1); // entry_count
|
|
|
|
mp4_write_url(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.7.1 Data Information Box
|
|
static size_t mp4_write_dinf(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "dinf");
|
|
|
|
mp4_write_dref(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.4.4 Media Information Box
|
|
static size_t mp4_write_minf(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "minf");
|
|
|
|
// vmhd/smhd/gmhd
|
|
if (track->type == TRACK_VIDEO)
|
|
mp4_write_vmhd(mux);
|
|
else if (track->type == TRACK_CHAPTERS)
|
|
mp4_write_gmhd(mux);
|
|
else
|
|
mp4_write_smhd(mux);
|
|
|
|
// dinf, unnecessary but mandatory
|
|
mp4_write_dinf(mux);
|
|
|
|
// stbl
|
|
mp4_write_stbl(mux, track, fragmented);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.4.1 Media Box
|
|
static size_t mp4_write_mdia(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "mdia");
|
|
|
|
// mdhd
|
|
mp4_write_mdhd(mux, track);
|
|
|
|
// hdlr
|
|
mp4_write_hdlr(mux, track);
|
|
|
|
// minf
|
|
mp4_write_minf(mux, track, fragmented);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) User data atom
|
|
static size_t mp4_write_udta_atom(struct mp4_mux *mux, const char tag[4],
|
|
const char *val)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, tag);
|
|
s_write(s, val, strlen(val));
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.10.1 User Data Box
|
|
static size_t mp4_write_track_udta(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "udta");
|
|
|
|
/* Our udta box contains QuickTime format user data atoms, which are
|
|
* simple key-value pairs. Some are prefixed with 0xa9. */
|
|
|
|
const char *name = obs_encoder_get_name(track->encoder);
|
|
if (name)
|
|
mp4_write_udta_atom(mux, "name", name);
|
|
|
|
if (mux->flags & MP4_WRITE_ENCODER_INFO) {
|
|
const char *id = obs_encoder_get_id(track->encoder);
|
|
if (name)
|
|
mp4_write_udta_atom(mux, "\251enc", id);
|
|
|
|
obs_data_t *settings = obs_encoder_get_settings(track->encoder);
|
|
if (settings) {
|
|
const char *json =
|
|
obs_data_get_json_with_defaults(settings);
|
|
mp4_write_udta_atom(mux, "json", json);
|
|
obs_data_release(settings);
|
|
}
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.6.6 Edit List Box
|
|
static size_t mp4_write_elst(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "elst", 0, 0);
|
|
|
|
s_wb32(s, 1); // entry count
|
|
|
|
uint64_t duration =
|
|
util_mul_div64(track->duration, 1000, track->timebase_den);
|
|
uint64_t delay = 0;
|
|
|
|
if (track->type == TRACK_VIDEO &&
|
|
!(mux->flags & MP4_USE_NEGATIVE_CTS)) {
|
|
/* Compensate for frame-reordering delay (for example, when
|
|
* using b-frames). */
|
|
int64_t dts_offset = 0;
|
|
|
|
if (track->offsets.num) {
|
|
struct sample_offset sample = track->offsets.array[0];
|
|
dts_offset = sample.offset;
|
|
} else if (track->packets.size) {
|
|
/* If no offset data exists yet (i.e. when writing the
|
|
* incomplete moov in a fragmented file) use the raw
|
|
* data from the current queued packets instead. */
|
|
struct encoder_packet pkt;
|
|
deque_peek_front(&track->packets, &pkt, sizeof(pkt));
|
|
dts_offset = pkt.pts - pkt.dts;
|
|
}
|
|
|
|
delay = util_mul_div64(dts_offset, track->timescale,
|
|
track->timebase_den);
|
|
} else if (track->type == TRACK_AUDIO && track->first_pts < 0) {
|
|
delay = util_mul_div64(llabs(track->first_pts),
|
|
track->timescale, track->timebase_den);
|
|
/* Subtract priming delay from total duration */
|
|
duration -= util_mul_div64(delay, 1000, track->timescale);
|
|
}
|
|
|
|
s_wb32(s, (uint32_t)duration); // segment_duration (movie timescale)
|
|
s_wb32(s, (uint32_t)delay); // media_time (track timescale)
|
|
s_wb32(s, 1 << 16); // media_rate
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.6.5 Edit Box
|
|
static size_t mp4_write_edts(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "edts");
|
|
|
|
mp4_write_elst(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.3.3.2 TrackReferenceTypeBox
|
|
static size_t mp4_write_chap(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
/// QTFF/Apple chapter track reference
|
|
write_box(s, 0, "chap");
|
|
|
|
s_wb32(s, mux->chapter_track->track_id);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.3.3 Track Reference Box
|
|
static size_t mp4_write_tref(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "tref");
|
|
|
|
mp4_write_chap(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.3.1 Track Box
|
|
static size_t mp4_write_trak(struct mp4_mux *mux, struct mp4_track *track,
|
|
bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "trak");
|
|
|
|
// tkhd
|
|
mp4_write_tkhd(mux, track);
|
|
|
|
// edts
|
|
mp4_write_edts(mux, track);
|
|
|
|
// tref
|
|
if (mux->chapter_track && track->type != TRACK_CHAPTERS)
|
|
mp4_write_tref(mux);
|
|
|
|
// mdia
|
|
mp4_write_mdia(mux, track, fragmented);
|
|
|
|
// udta (audio track name mainly)
|
|
mp4_write_track_udta(mux, track);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.8.3 Track Extends Box
|
|
static size_t mp4_write_trex(struct mp4_mux *mux, uint32_t track_id)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 32, "trex", 0, 0);
|
|
|
|
s_wb32(s, track_id); // track_ID
|
|
s_wb32(s, 1); // default_sample_description_index
|
|
s_wb32(s, 0); // default_sample_duration
|
|
s_wb32(s, 0); // default_sample_size
|
|
s_wb32(s, 0); // default_sample_flags
|
|
|
|
return 32;
|
|
}
|
|
|
|
/// 8.8.1 Movie Extends Box
|
|
static size_t mp4_write_mvex(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "mvex");
|
|
|
|
for (size_t track_id = 0; track_id < mux->tracks.num; track_id++)
|
|
mp4_write_trex(mux, (uint32_t)(track_id + 1));
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Undocumented QuickTime/iTunes metadata handler
|
|
static size_t mp4_write_itunes_hdlr(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 33, "hdlr", 0, 0);
|
|
|
|
s_wb32(s, 0); // pre_defined
|
|
s_write(s, "mdir", 4); // handler_type
|
|
|
|
// reserved
|
|
s_write(s, "appl", 4);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
|
|
s_w8(s, 0); // name (NULL)
|
|
|
|
return 33;
|
|
}
|
|
|
|
/// (QTFF/Apple) Data atom
|
|
static size_t mp4_write_data_atom(struct mp4_mux *mux, const char *data)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
size_t len = strlen(data);
|
|
uint32_t size = 16 + (uint32_t)len;
|
|
|
|
write_box(s, size, "data");
|
|
|
|
s_wb32(s, 1); // type, 1 = utf-8 string
|
|
s_wb32(s, 0); // locale, 0 = default
|
|
s_write(s, data, len);
|
|
|
|
return size;
|
|
}
|
|
|
|
/// (QTFF/Apple) Metadata item atom
|
|
static size_t mp4_write_ilst_item_atom(struct mp4_mux *mux, const char name[4],
|
|
const char *value)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, name);
|
|
|
|
mp4_write_data_atom(mux, value);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Metadata item list atom
|
|
static size_t mp4_write_ilst(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
struct dstr value = {0};
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "ilst");
|
|
|
|
/* Encoder name */
|
|
dstr_cat(&value, "OBS Studio (");
|
|
dstr_cat(&value, obs_get_version_string());
|
|
dstr_cat(&value, ")");
|
|
/* Some QuickTime keys are prefixed with 0xa9 */
|
|
mp4_write_ilst_item_atom(mux, "\251too", value.array);
|
|
|
|
dstr_free(&value);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Key value metadata handler
|
|
static size_t mp4_write_mdta_hdlr(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 33, "hdlr", 0, 0);
|
|
|
|
s_wb32(s, 0); // pre_defined
|
|
s_write(s, "mdta", 4); // handler_type
|
|
|
|
// reserved
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
s_wb32(s, 0);
|
|
|
|
s_w8(s, 0); // name (NULL)
|
|
return 33;
|
|
}
|
|
|
|
/// (QTFF/Apple) Metadata item keys atom
|
|
static size_t mp4_write_mdta_keys(struct mp4_mux *mux, obs_data_t *meta)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "keys", 0, 0);
|
|
|
|
uint32_t count = 0;
|
|
int64_t count_pos = serializer_get_pos(s);
|
|
s_wb32(s, count); // count
|
|
|
|
obs_data_item_t *item = obs_data_first(meta);
|
|
|
|
for (; item != NULL; obs_data_item_next(&item)) {
|
|
const char *name = obs_data_item_get_name(item);
|
|
size_t len = strlen(name);
|
|
|
|
/* name is key type, can be udta or mdta */
|
|
write_box(s, len + 8, "mdta");
|
|
s_write(s, name, len); // key name
|
|
|
|
count++;
|
|
}
|
|
|
|
int64_t end = serializer_get_pos(s);
|
|
|
|
/* Overwrite count with correct value */
|
|
serializer_seek(s, count_pos, SERIALIZE_SEEK_START);
|
|
s_wb32(s, count);
|
|
serializer_seek(s, end, SERIALIZE_SEEK_START);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Metadata item atom, but name is an index instead
|
|
static inline void write_key_entry(struct mp4_mux *mux, obs_data_item_t *item,
|
|
uint32_t idx)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
s_wb32(s, 0); // size
|
|
s_wb32(s, idx); // index
|
|
|
|
mp4_write_data_atom(mux, obs_data_item_get_string(item));
|
|
|
|
write_box_size(s, start);
|
|
}
|
|
|
|
/// (QTFF/Apple) Metadata item list atom
|
|
static size_t mp4_write_mdta_ilst(struct mp4_mux *mux, obs_data_t *meta)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "ilst");
|
|
|
|
/* indices start with 1 */
|
|
uint32_t key_idx = 1;
|
|
|
|
obs_data_item_t *item = obs_data_first(meta);
|
|
|
|
for (; item != NULL; obs_data_item_next(&item)) {
|
|
write_key_entry(mux, item, key_idx);
|
|
key_idx++;
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
static void mp4_write_mdta_kv(struct mp4_mux *mux)
|
|
{
|
|
struct dstr value = {0};
|
|
|
|
obs_data_t *meta = obs_data_create();
|
|
|
|
dstr_cat(&value, "OBS Studio (");
|
|
dstr_cat(&value, obs_get_version_string());
|
|
dstr_cat(&value, ")");
|
|
|
|
// ToDo figure out what else we could put in here for fun and profit :)
|
|
obs_data_set_string(meta, "tool", value.array);
|
|
|
|
/* Write keys */
|
|
mp4_write_mdta_keys(mux, meta);
|
|
/* Write values */
|
|
mp4_write_mdta_ilst(mux, meta);
|
|
|
|
obs_data_release(meta);
|
|
dstr_free(&value);
|
|
}
|
|
|
|
/// 8.11.1 The Meta box
|
|
static size_t mp4_write_meta(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_fullbox(s, 0, "meta", 0, 0);
|
|
|
|
if (mux->flags & MP4_USE_MDTA_KEY_VALUE) {
|
|
mp4_write_mdta_hdlr(mux);
|
|
mp4_write_mdta_kv(mux);
|
|
} else {
|
|
mp4_write_itunes_hdlr(mux);
|
|
mp4_write_ilst(mux);
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.10.1 User Data Box
|
|
static size_t mp4_write_udta(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "udta");
|
|
|
|
/* Normally metadata would be directly in the moov, but since this is
|
|
* Apple/QTFF format metadata it is inside udta. */
|
|
|
|
// meta
|
|
mp4_write_meta(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// Movie Box (8.2.1)
|
|
static size_t mp4_write_moov(struct mp4_mux *mux, bool fragmented)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "moov");
|
|
|
|
mp4_write_mvhd(mux);
|
|
|
|
// trak(s)
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
mp4_write_trak(mux, track, fragmented);
|
|
}
|
|
|
|
if (!fragmented && mux->chapter_track)
|
|
mp4_write_trak(mux, mux->chapter_track, false);
|
|
|
|
// mvex
|
|
if (fragmented)
|
|
mp4_write_mvex(mux);
|
|
|
|
// udta (metadata)
|
|
mp4_write_udta(mux);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* moof (fragment header) stuff */
|
|
|
|
/// 8.8.5 Movie Fragment Header Box
|
|
static size_t mp4_write_mfhd(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 16, "mfhd", 0, 0);
|
|
|
|
s_wb32(s, mux->fragments_written); // sequence_number
|
|
|
|
return 16;
|
|
}
|
|
|
|
/// 8.8.7 Track Fragment Header Box
|
|
static size_t mp4_write_tfhd(struct mp4_mux *mux, struct mp4_track *track,
|
|
size_t moof_start)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
uint32_t flags = BASE_DATA_OFFSET_PRESENT |
|
|
DEFAULT_SAMPLE_FLAGS_PRESENT;
|
|
|
|
/* Add default size/duration if all samples match. */
|
|
bool durations_match = true;
|
|
bool sizes_match = true;
|
|
uint32_t duration;
|
|
uint32_t sample_size;
|
|
|
|
if (track->sample_size) {
|
|
duration = 1;
|
|
sample_size = track->sample_size;
|
|
} else {
|
|
duration = track->fragment_samples.array[0].duration;
|
|
sample_size = track->fragment_samples.array[0].size;
|
|
|
|
for (size_t idx = 1; idx < track->fragment_samples.num; idx++) {
|
|
uint32_t frag_duration =
|
|
track->fragment_samples.array[idx].duration;
|
|
uint32_t frag_size =
|
|
track->fragment_samples.array[idx].size;
|
|
|
|
durations_match = frag_duration == duration;
|
|
sizes_match = frag_size == sample_size;
|
|
}
|
|
}
|
|
|
|
if (durations_match)
|
|
flags |= DEFAULT_SAMPLE_DURATION_PRESENT;
|
|
if (sizes_match)
|
|
flags |= DEFAULT_SAMPLE_SIZE_PRESENT;
|
|
|
|
write_fullbox(s, 0, "tfhd", 0, flags);
|
|
|
|
s_wb32(s, track->track_id); // track_ID
|
|
s_wb64(s, moof_start); // base_data_offset
|
|
|
|
// default_sample_duration
|
|
if (durations_match) {
|
|
if (track->type == TRACK_VIDEO) {
|
|
/* Convert duration to track timescale */
|
|
duration = (uint32_t)util_mul_div64(
|
|
duration, track->timescale,
|
|
track->timebase_den);
|
|
}
|
|
|
|
s_wb32(s, duration);
|
|
}
|
|
// default_sample_size
|
|
if (sizes_match)
|
|
s_wb32(s, sample_size);
|
|
// default_sample_flags
|
|
if (track->type == TRACK_VIDEO) {
|
|
s_wb32(s, SAMPLE_FLAG_DEPENDS_YES | SAMPLE_FLAG_IS_NON_SYNC);
|
|
} else {
|
|
s_wb32(s, SAMPLE_FLAG_DEPENDS_NO);
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.8.12 Track fragment decode time
|
|
static size_t mp4_write_tfdt(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
write_fullbox(s, 20, "tfdt", 1, 0);
|
|
|
|
/* Subtract samples that are not written yet */
|
|
uint64_t duration_written = track->duration;
|
|
for (size_t i = 0; i < track->fragment_samples.num; i++)
|
|
duration_written -= track->fragment_samples.array[i].duration;
|
|
|
|
if (track->type == TRACK_VIDEO) {
|
|
/* Convert to track timescale */
|
|
duration_written = util_mul_div64(duration_written,
|
|
track->timescale,
|
|
track->timebase_den);
|
|
}
|
|
|
|
s_wb64(s, duration_written); // baseMediaDecodeTime
|
|
|
|
return 20;
|
|
}
|
|
|
|
/// 8.8.8 Track Fragment Run Box
|
|
static size_t mp4_write_trun(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint32_t moof_size, uint64_t *samples_mdat_offset)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
uint32_t flags = DATA_OFFSET_PRESENT;
|
|
|
|
if (!track->sample_size)
|
|
flags |= SAMPLE_SIZE_PRESENT;
|
|
|
|
if (track->type == TRACK_VIDEO) {
|
|
flags |= FIRST_SAMPLE_FLAGS_PRESENT;
|
|
flags |= SAMPLE_COMPOSITION_TIME_OFFSETS_PRESENT;
|
|
}
|
|
|
|
uint8_t version = mux->flags & MP4_USE_NEGATIVE_CTS ? 1 : 0;
|
|
|
|
write_fullbox(s, 0, "trun", version, flags);
|
|
|
|
/* moof_size + 8 bytes for mdat header + offset into mdat box data */
|
|
size_t data_offset = moof_size + 8 + *samples_mdat_offset;
|
|
size_t sample_count = track->fragment_samples.num;
|
|
|
|
if (track->sample_size) {
|
|
/* Update count based on fixed size */
|
|
size_t total_size = 0;
|
|
for (size_t i = 0; i < sample_count; i++)
|
|
total_size += track->fragment_samples.array[i].size;
|
|
|
|
*samples_mdat_offset += total_size;
|
|
sample_count = total_size / track->sample_size;
|
|
}
|
|
|
|
s_wb32(s, (uint32_t)sample_count); // sample_count
|
|
s_wb32(s, (uint32_t)data_offset); // data_offset
|
|
|
|
/* If we have a fixed sample size (PCM audio) we only need to write
|
|
* the sample count and offset. */
|
|
if (track->sample_size)
|
|
return write_box_size(s, start);
|
|
|
|
if (track->type == TRACK_VIDEO)
|
|
s_wb32(s, SAMPLE_FLAG_DEPENDS_NO); // first_sample_flags
|
|
|
|
for (size_t idx = 0; idx < sample_count; idx++) {
|
|
struct fragment_sample *smp =
|
|
&track->fragment_samples.array[idx];
|
|
|
|
s_wb32(s, smp->size); // sample_size
|
|
|
|
if (track->type == TRACK_VIDEO) {
|
|
// sample_composition_time_offset
|
|
int64_t offset = (int64_t)smp->offset *
|
|
(int64_t)track->timescale /
|
|
(int64_t)track->timebase_den;
|
|
s_wb32(s, (uint32_t)offset);
|
|
}
|
|
|
|
*samples_mdat_offset += smp->size;
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.8.6 Track Fragment Box
|
|
static size_t mp4_write_traf(struct mp4_mux *mux, struct mp4_track *track,
|
|
int64_t moof_start, uint32_t moof_size,
|
|
uint64_t *samples_mdat_offset)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "traf");
|
|
|
|
// tfhd
|
|
mp4_write_tfhd(mux, track, moof_start);
|
|
|
|
// tfdt
|
|
mp4_write_tfdt(mux, track);
|
|
|
|
// trun
|
|
mp4_write_trun(mux, track, moof_size, samples_mdat_offset);
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/// 8.8.4 Movie Fragment Box
|
|
static size_t mp4_write_moof(struct mp4_mux *mux, uint32_t moof_size,
|
|
int64_t moof_start)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
int64_t start = serializer_get_pos(s);
|
|
|
|
write_box(s, 0, "moof");
|
|
|
|
mp4_write_mfhd(mux);
|
|
|
|
/* Track current mdat offset across tracks */
|
|
uint64_t samples_mdat_offset = 0;
|
|
|
|
// traf boxes
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
/* Skip tracks that do not have any samples */
|
|
if (!track->fragment_samples.num)
|
|
continue;
|
|
|
|
mp4_write_traf(mux, track, moof_start, moof_size,
|
|
&samples_mdat_offset);
|
|
}
|
|
|
|
return write_box_size(s, start);
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* Chapter packets */
|
|
|
|
static void mp4_create_chapter_pkt(struct encoder_packet *pkt, int64_t dts_usec,
|
|
const char *name)
|
|
{
|
|
int64_t dts = dts_usec / 1000; // chapter track uses a ms timebase
|
|
|
|
pkt->pts = dts;
|
|
pkt->dts = dts;
|
|
pkt->dts_usec = dts_usec;
|
|
pkt->timebase_num = 1;
|
|
pkt->timebase_den = 1000;
|
|
|
|
/* Serialize with data with ref count */
|
|
struct serializer s;
|
|
struct array_output_data ao;
|
|
array_output_serializer_init(&s, &ao);
|
|
|
|
size_t len = min(strlen(name), UINT16_MAX);
|
|
long refs = 1;
|
|
|
|
/* encoder_packet refs */
|
|
s_write(&s, &refs, sizeof(refs));
|
|
/* actual packet data */
|
|
s_wb16(&s, (uint16_t)len);
|
|
s_write(&s, name, len);
|
|
s_write(&s, &CHAPTER_PKT_FOOTER, sizeof(CHAPTER_PKT_FOOTER));
|
|
|
|
pkt->data = (void *)(ao.bytes.array + sizeof(long));
|
|
pkt->size = ao.bytes.num - sizeof(long);
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* Encoder packet processing and fragment writer */
|
|
|
|
static inline int64_t packet_pts_usec(struct encoder_packet *packet)
|
|
{
|
|
return packet->pts * 1000000 / packet->timebase_den;
|
|
}
|
|
|
|
static inline struct encoder_packet *get_pkt_at(struct deque *dq, size_t idx)
|
|
{
|
|
return deque_data(dq, idx * sizeof(struct encoder_packet));
|
|
}
|
|
|
|
static inline uint64_t get_longest_track_duration(struct mp4_mux *mux)
|
|
{
|
|
uint64_t dur = 0;
|
|
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
uint64_t track_dur = util_mul_div64(track->duration, 1000,
|
|
track->timebase_den);
|
|
|
|
if (track_dur > dur)
|
|
dur = track_dur;
|
|
}
|
|
|
|
return dur;
|
|
}
|
|
|
|
static void process_packets(struct mp4_mux *mux, struct mp4_track *track,
|
|
uint64_t *mdat_size)
|
|
{
|
|
size_t count = track->packets.size / sizeof(struct encoder_packet);
|
|
|
|
if (!count)
|
|
return;
|
|
|
|
/* Only iterate upt to penultimate packet so we can determine duration
|
|
* for all processed packets. */
|
|
for (size_t i = 0; i < count - 1; i++) {
|
|
struct encoder_packet *pkt = get_pkt_at(&track->packets, i);
|
|
|
|
if (mux->next_frag_pts &&
|
|
packet_pts_usec(pkt) >= mux->next_frag_pts)
|
|
break;
|
|
|
|
struct encoder_packet *next =
|
|
get_pkt_at(&track->packets, i + 1);
|
|
|
|
/* Duration is just distance between current and next DTS. */
|
|
uint32_t duration = (uint32_t)(next->dts - pkt->dts);
|
|
uint32_t sample_count = 1;
|
|
uint32_t size = (uint32_t)pkt->size;
|
|
int32_t offset = (int32_t)(pkt->pts - pkt->dts);
|
|
|
|
/* When using negative CTS, subtract DTS-PTS offset. */
|
|
if (track->type == TRACK_VIDEO &&
|
|
mux->flags & MP4_USE_NEGATIVE_CTS) {
|
|
if (!track->offsets.num)
|
|
track->dts_offset = offset;
|
|
|
|
offset -= track->dts_offset;
|
|
}
|
|
|
|
/* Create temporary sample information for moof */
|
|
struct fragment_sample *smp =
|
|
da_push_back_new(track->fragment_samples);
|
|
smp->size = size;
|
|
smp->offset = offset;
|
|
smp->duration = duration;
|
|
|
|
*mdat_size += size;
|
|
|
|
/* Update global sample information for full moov */
|
|
track->duration += duration;
|
|
|
|
if (track->sample_size) {
|
|
/* Adjust duration/count for fixed sample size */
|
|
sample_count = size / track->sample_size;
|
|
duration = 1;
|
|
}
|
|
|
|
if (!track->samples)
|
|
track->first_pts = pkt->pts;
|
|
|
|
track->samples += sample_count;
|
|
|
|
/* If delta (duration) matche sprevious, increment counter,
|
|
* otherwise create a new entry. */
|
|
if (track->deltas.num == 0 ||
|
|
track->deltas.array[track->deltas.num - 1].delta !=
|
|
duration) {
|
|
struct sample_delta *new =
|
|
da_push_back_new(track->deltas);
|
|
new->delta = duration;
|
|
new->count = sample_count;
|
|
} else {
|
|
track->deltas.array[track->deltas.num - 1].count +=
|
|
sample_count;
|
|
}
|
|
|
|
if (!track->sample_size)
|
|
da_push_back(track->sample_sizes, &size);
|
|
|
|
if (track->type != TRACK_VIDEO)
|
|
continue;
|
|
|
|
if (pkt->keyframe)
|
|
da_push_back(track->sync_samples, &track->samples);
|
|
|
|
/* Only require ctts box if offet is non-zero */
|
|
if (offset && !track->needs_ctts)
|
|
track->needs_ctts = true;
|
|
|
|
/* If dts-pts offset matche sprevious, increment counter,
|
|
* otherwise create a new entry. */
|
|
if (track->offsets.num == 0 ||
|
|
track->offsets.array[track->offsets.num - 1].offset !=
|
|
offset) {
|
|
struct sample_offset *new =
|
|
da_push_back_new(track->offsets);
|
|
new->offset = offset;
|
|
new->count = 1;
|
|
} else {
|
|
track->offsets.array[track->offsets.num - 1].count += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Write track data to file */
|
|
static void write_packets(struct mp4_mux *mux, struct mp4_track *track)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
size_t count = track->packets.size / sizeof(struct encoder_packet);
|
|
if (!count)
|
|
return;
|
|
|
|
struct chunk *chk = da_push_back_new(track->chunks);
|
|
chk->offset = serializer_get_pos(s);
|
|
chk->samples = (uint32_t)track->fragment_samples.num;
|
|
|
|
for (size_t i = 0; i < track->fragment_samples.num; i++) {
|
|
struct encoder_packet pkt;
|
|
deque_pop_front(&track->packets, &pkt,
|
|
sizeof(struct encoder_packet));
|
|
s_write(s, pkt.data, pkt.size);
|
|
obs_encoder_packet_release(&pkt);
|
|
}
|
|
|
|
chk->size = (uint32_t)(serializer_get_pos(s) - chk->offset);
|
|
|
|
/* Fixup sample count for fixed-size codecs */
|
|
if (track->sample_size)
|
|
chk->samples = chk->size / track->sample_size;
|
|
|
|
da_clear(track->fragment_samples);
|
|
}
|
|
|
|
static void mp4_flush_fragment(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
// Write file header if not already done
|
|
if (!mux->fragments_written) {
|
|
mp4_write_ftyp(mux, true);
|
|
/* Placeholder to write mdat header during soft-remux */
|
|
mux->placeholder_offset = serializer_get_pos(s);
|
|
mp4_write_free(mux);
|
|
}
|
|
|
|
// Array output as temporary buffer to avoid sending seeks to disk
|
|
struct serializer as;
|
|
struct array_output_data aod;
|
|
array_output_serializer_init(&as, &aod);
|
|
mux->serializer = &as;
|
|
|
|
// Write initial incomplete moov (because fragmentation)
|
|
if (!mux->fragments_written) {
|
|
mp4_write_moov(mux, true);
|
|
s_write(s, aod.bytes.array, aod.bytes.num);
|
|
array_output_serializer_reset(&aod);
|
|
}
|
|
|
|
mux->fragments_written++;
|
|
|
|
/* --------------------------------------------------------- */
|
|
/* Analyse packets and create fragment moof. */
|
|
|
|
uint64_t mdat_size = 8;
|
|
|
|
for (size_t idx = 0; idx < mux->tracks.num; idx++) {
|
|
struct mp4_track *track = &mux->tracks.array[idx];
|
|
process_packets(mux, track, &mdat_size);
|
|
}
|
|
|
|
if (!mux->next_frag_pts && mux->chapter_track) {
|
|
// Create dummy chapter marker at the end so duration is correct
|
|
uint64_t duration = get_longest_track_duration(mux);
|
|
struct encoder_packet pkt;
|
|
mp4_create_chapter_pkt(&pkt, (int64_t)duration * 1000, "Dummy");
|
|
deque_push_back(&mux->chapter_track->packets, &pkt,
|
|
sizeof(struct encoder_packet));
|
|
|
|
process_packets(mux, mux->chapter_track, &mdat_size);
|
|
}
|
|
|
|
// write moof once to get size
|
|
int64_t moof_start = serializer_get_pos(s);
|
|
size_t moof_size = mp4_write_moof(mux, 0, moof_start);
|
|
array_output_serializer_reset(&aod);
|
|
|
|
// write moof again with known size
|
|
mp4_write_moof(mux, (uint32_t)moof_size, moof_start);
|
|
|
|
// Write to output and restore real serializer
|
|
s_write(s, aod.bytes.array, aod.bytes.num);
|
|
mux->serializer = s;
|
|
array_output_serializer_free(&aod);
|
|
|
|
/* --------------------------------------------------------- */
|
|
/* Write audio and video samples (in chunks). Also update */
|
|
/* global chunk and sample information for final moov. */
|
|
|
|
if (mdat_size > UINT32_MAX) {
|
|
s_wb32(s, 1);
|
|
s_write(s, "mdat", 4);
|
|
s_wb64(s, mdat_size + 8);
|
|
} else {
|
|
s_wb32(s, (uint32_t)mdat_size);
|
|
s_write(s, "mdat", 4);
|
|
}
|
|
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *track = &mux->tracks.array[i];
|
|
write_packets(mux, track);
|
|
}
|
|
|
|
/* Only write chapter packets on final flush. */
|
|
if (!mux->next_frag_pts && mux->chapter_track)
|
|
write_packets(mux, mux->chapter_track);
|
|
|
|
mux->next_frag_pts = 0;
|
|
}
|
|
|
|
/* ========================================================================== */
|
|
/* Track object functions */
|
|
|
|
static inline void track_insert_packet(struct mp4_track *track,
|
|
struct encoder_packet *pkt)
|
|
{
|
|
int64_t pts_usec = packet_pts_usec(pkt);
|
|
if (pts_usec > track->last_pts_usec)
|
|
track->last_pts_usec = pts_usec;
|
|
|
|
deque_push_back(&track->packets, pkt, sizeof(struct encoder_packet));
|
|
}
|
|
|
|
static inline uint32_t get_sample_size(struct mp4_track *track)
|
|
{
|
|
audio_t *audio = obs_encoder_audio(track->encoder);
|
|
if (!audio)
|
|
return 0;
|
|
|
|
const struct audio_output_info *info = audio_output_get_info(audio);
|
|
uint32_t channels = get_audio_channels(info->speakers);
|
|
|
|
switch (track->codec) {
|
|
case CODEC_PCM_F32:
|
|
return channels * 4; // 4 bytes per sample (32-bit)
|
|
case CODEC_PCM_I24:
|
|
return channels * 3; // 3 bytes per sample (24-bit)
|
|
case CODEC_PCM_I16:
|
|
return channels * 2; // 2 bytes per sample (16-bit)
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static inline enum mp4_codec get_codec(obs_encoder_t *enc)
|
|
{
|
|
const char *codec = obs_encoder_get_codec(enc);
|
|
|
|
if (strcmp(codec, "h264") == 0)
|
|
return CODEC_H264;
|
|
if (strcmp(codec, "hevc") == 0)
|
|
return CODEC_HEVC;
|
|
if (strcmp(codec, "av1") == 0)
|
|
return CODEC_AV1;
|
|
if (strcmp(codec, "aac") == 0)
|
|
return CODEC_AAC;
|
|
if (strcmp(codec, "opus") == 0)
|
|
return CODEC_OPUS;
|
|
if (strcmp(codec, "flac") == 0)
|
|
return CODEC_FLAC;
|
|
if (strcmp(codec, "alac") == 0)
|
|
return CODEC_ALAC;
|
|
if (strcmp(codec, "pcm_s16le") == 0)
|
|
return CODEC_PCM_I16;
|
|
if (strcmp(codec, "pcm_s24le") == 0)
|
|
return CODEC_PCM_I24;
|
|
if (strcmp(codec, "pcm_f32le") == 0)
|
|
return CODEC_PCM_F32;
|
|
|
|
return CODEC_UNKNOWN;
|
|
}
|
|
|
|
static inline void add_track(struct mp4_mux *mux, obs_encoder_t *enc)
|
|
{
|
|
struct mp4_track *track = da_push_back_new(mux->tracks);
|
|
|
|
track->type = obs_encoder_get_type(enc) == OBS_ENCODER_VIDEO
|
|
? TRACK_VIDEO
|
|
: TRACK_AUDIO;
|
|
track->encoder = obs_encoder_get_ref(enc);
|
|
track->codec = get_codec(enc);
|
|
track->track_id = ++mux->track_ctr;
|
|
|
|
/* Set timebase/timescale */
|
|
if (track->type == TRACK_VIDEO) {
|
|
video_t *video = obs_encoder_video(enc);
|
|
const struct video_output_info *info =
|
|
video_output_get_info(video);
|
|
track->timebase_num = info->fps_den;
|
|
track->timebase_den = info->fps_num;
|
|
|
|
track->timescale = track->timebase_den;
|
|
/* FFmpeg does this to compensate for non-monotonic timestamps,
|
|
* we probably don't need it, but let's stick to what they do
|
|
* for maximum compatibility. */
|
|
while (track->timescale < 10000)
|
|
track->timescale *= 2;
|
|
} else {
|
|
uint32_t sample_rate = obs_encoder_get_sample_rate(enc);
|
|
/* Opus is always 48 kHz */
|
|
if (track->codec == CODEC_OPUS)
|
|
sample_rate = 48000;
|
|
track->timebase_num = 1;
|
|
track->timebase_den = sample_rate;
|
|
track->timescale = sample_rate;
|
|
}
|
|
|
|
/* Set sample size (if fixed) */
|
|
if (track->type == TRACK_AUDIO)
|
|
track->sample_size = get_sample_size(track);
|
|
}
|
|
|
|
static inline void add_chapter_track(struct mp4_mux *mux)
|
|
{
|
|
mux->chapter_track = bzalloc(sizeof(struct mp4_track));
|
|
mux->chapter_track->type = TRACK_CHAPTERS;
|
|
mux->chapter_track->codec = CODEC_TEXT;
|
|
mux->chapter_track->timescale = 1000;
|
|
mux->chapter_track->timebase_num = 1;
|
|
mux->chapter_track->timebase_den = 1000;
|
|
mux->chapter_track->track_id = ++mux->track_ctr;
|
|
}
|
|
|
|
static inline void free_packets(struct deque *dq)
|
|
{
|
|
size_t num = dq->size / sizeof(struct encoder_packet);
|
|
|
|
for (size_t i = 0; i < num; i++) {
|
|
struct encoder_packet pkt;
|
|
deque_pop_front(dq, &pkt, sizeof(struct encoder_packet));
|
|
obs_encoder_packet_release(&pkt);
|
|
}
|
|
}
|
|
|
|
static inline void free_track(struct mp4_track *track)
|
|
{
|
|
if (!track)
|
|
return;
|
|
|
|
obs_encoder_release(track->encoder);
|
|
|
|
free_packets(&track->packets);
|
|
deque_free(&track->packets);
|
|
|
|
da_free(track->sample_sizes);
|
|
da_free(track->chunks);
|
|
da_free(track->deltas);
|
|
da_free(track->offsets);
|
|
da_free(track->sync_samples);
|
|
da_free(track->fragment_samples);
|
|
}
|
|
|
|
/* ===========================================================================*/
|
|
/* API */
|
|
|
|
struct mp4_mux *mp4_mux_create(obs_output_t *output,
|
|
struct serializer *serializer,
|
|
enum mp4_mux_flags flags)
|
|
{
|
|
struct mp4_mux *mux = bzalloc(sizeof(struct mp4_mux));
|
|
|
|
mux->output = output;
|
|
mux->serializer = serializer;
|
|
mux->flags = flags;
|
|
/* Timestamp is based on 1904 rather than 1970. */
|
|
mux->creation_time = time(NULL) + 0x7C25B080;
|
|
|
|
for (size_t i = 0; i < MAX_OUTPUT_VIDEO_ENCODERS; i++) {
|
|
obs_encoder_t *enc = obs_output_get_video_encoder2(output, i);
|
|
if (!enc)
|
|
continue;
|
|
add_track(mux, enc);
|
|
}
|
|
|
|
for (size_t i = 0; i < MAX_OUTPUT_AUDIO_ENCODERS; i++) {
|
|
obs_encoder_t *enc = obs_output_get_audio_encoder(output, i);
|
|
if (!enc)
|
|
continue;
|
|
add_track(mux, enc);
|
|
}
|
|
|
|
return mux;
|
|
}
|
|
|
|
void mp4_mux_destroy(struct mp4_mux *mux)
|
|
{
|
|
for (size_t i = 0; i < mux->tracks.num; i++)
|
|
free_track(&mux->tracks.array[i]);
|
|
|
|
free_track(mux->chapter_track);
|
|
bfree(mux->chapter_track);
|
|
da_free(mux->tracks);
|
|
bfree(mux);
|
|
}
|
|
|
|
bool mp4_mux_submit_packet(struct mp4_mux *mux, struct encoder_packet *pkt)
|
|
{
|
|
struct mp4_track *track = NULL;
|
|
struct encoder_packet parsed_packet;
|
|
enum obs_encoder_type type = pkt->type;
|
|
bool fragment_ready = mux->next_frag_pts > 0;
|
|
|
|
for (size_t i = 0; i < mux->tracks.num; i++) {
|
|
struct mp4_track *tmp = &mux->tracks.array[i];
|
|
|
|
fragment_ready = fragment_ready &&
|
|
tmp->last_pts_usec >= mux->next_frag_pts;
|
|
|
|
if (tmp->encoder == pkt->encoder)
|
|
track = tmp;
|
|
}
|
|
|
|
if (!track) {
|
|
warn("Could not find track for packet of type %s with "
|
|
"track id %zu!",
|
|
type == OBS_ENCODER_VIDEO ? "video" : "audio",
|
|
pkt->track_idx);
|
|
return false;
|
|
}
|
|
|
|
/* If all tracks have caught up to the keyframe we want to fragment on,
|
|
* flush the current fragment to disk. */
|
|
if (fragment_ready)
|
|
mp4_flush_fragment(mux);
|
|
|
|
if (type == OBS_ENCODER_AUDIO) {
|
|
obs_encoder_packet_ref(&parsed_packet, pkt);
|
|
} else {
|
|
if (track->codec == CODEC_H264)
|
|
obs_parse_avc_packet(&parsed_packet, pkt);
|
|
else if (track->codec == CODEC_HEVC)
|
|
obs_parse_hevc_packet(&parsed_packet, pkt);
|
|
else if (track->codec == CODEC_AV1)
|
|
obs_parse_av1_packet(&parsed_packet, pkt);
|
|
|
|
/* Set fragmentation PTS if packet is keyframe and PTS > 0 */
|
|
if (parsed_packet.keyframe && parsed_packet.pts > 0) {
|
|
mux->next_frag_pts = packet_pts_usec(&parsed_packet);
|
|
}
|
|
}
|
|
|
|
track_insert_packet(track, &parsed_packet);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool mp4_mux_add_chapter(struct mp4_mux *mux, int64_t dts_usec,
|
|
const char *name)
|
|
{
|
|
if (dts_usec < 0)
|
|
return false;
|
|
if (!mux->chapter_track)
|
|
add_chapter_track(mux);
|
|
|
|
/* To work correctly there needs to be a chapter at PTS 0,
|
|
* create that here if necessary. */
|
|
if (dts_usec > 0 && mux->chapter_track->packets.size == 0) {
|
|
mp4_mux_add_chapter(mux, 0,
|
|
obs_module_text("MP4Output.StartChapter"));
|
|
}
|
|
|
|
/* Create packets that will be muxed on final flush */
|
|
struct encoder_packet pkt;
|
|
mp4_create_chapter_pkt(&pkt, dts_usec, name);
|
|
track_insert_packet(mux->chapter_track, &pkt);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool mp4_mux_finalise(struct mp4_mux *mux)
|
|
{
|
|
struct serializer *s = mux->serializer;
|
|
|
|
/* Flush remaining audio/video samples as final fragment. */
|
|
info("Flushing final fragment...");
|
|
|
|
/* Set target PTS to zero to indicate that we want to flush all
|
|
* the remaining packets */
|
|
mux->next_frag_pts = 0;
|
|
mp4_flush_fragment(mux);
|
|
|
|
info("Number of fragments: %u", mux->fragments_written);
|
|
|
|
if (mux->flags & MP4_SKIP_FINALISATION) {
|
|
warn("Skipping MP4 finalization!");
|
|
return true;
|
|
}
|
|
|
|
int64_t data_end = serializer_get_pos(s);
|
|
|
|
/* ---------------------------------------- */
|
|
/* Write full moov box */
|
|
|
|
/* Use array serializer for moov data as this will do a lot
|
|
* of seeks to write size values of variable-size boxes. */
|
|
struct serializer fs;
|
|
struct array_output_data ao;
|
|
array_output_serializer_init(&fs, &ao);
|
|
|
|
mux->serializer = &fs;
|
|
|
|
mp4_write_moov(mux, false);
|
|
s_write(s, ao.bytes.array, ao.bytes.num);
|
|
info("Full moov size: %zu KiB", ao.bytes.num / 1024);
|
|
|
|
mux->serializer = s; // restore real serializer
|
|
array_output_serializer_free(&ao);
|
|
|
|
/* ---------------------------------------- */
|
|
/* Overwrite file header (ftyp + free/moov) */
|
|
|
|
serializer_seek(s, 0, SERIALIZE_SEEK_START);
|
|
mp4_write_ftyp(mux, false);
|
|
|
|
size_t data_size = data_end - mux->placeholder_offset;
|
|
serializer_seek(s, (int64_t)mux->placeholder_offset,
|
|
SERIALIZE_SEEK_START);
|
|
|
|
/* If data is more than 4 GiB the mdat header becomes 16 bytes, hence
|
|
* why we create a 16-byte placeholder "free" box at the start. */
|
|
if (data_size > UINT32_MAX) {
|
|
s_wb32(s, 1); // 1 = use "largesize" field instead
|
|
s_write(s, "mdat", 4);
|
|
s_wb64(s, data_size); // largesize (64-bit)
|
|
} else {
|
|
s_wb32(s, (uint32_t)data_size);
|
|
s_write(s, "mdat", 4);
|
|
}
|
|
|
|
info("Final mdat size: %zu KiB", data_size / 1024);
|
|
return true;
|
|
}
|