0
0
mirror of https://github.com/mpv-player/mpv.git synced 2024-09-19 19:42:24 +02:00

Revert "Port several python scripts to Perl"

This reverts commit fae7307931.

Before the waf build system was used, we had a configure script written
in shell. To drop the build dependency on Python, someone rewrote the
Python scripts we had to Perl. Now the shell configure script is gone,
and it makes no sense to have a build dependency on both Perl and
Python.

This isn't just a straight revert. It adds the new Matroska EBML
elements to the old Python scripts, adjusts the waf build system, and of
course doesn't add anything back needed by the old build system.

It would be better if this used matroska.py/file2string.py directly by
importing them as modules, instead of calling them via "python". But for
now this is simpler.
This commit is contained in:
wm4 2016-12-17 13:24:05 +01:00
parent 2b8b17402e
commit ff9f5e06ff
11 changed files with 495 additions and 1404 deletions

View File

@ -1,24 +0,0 @@
#! /usr/bin/env perl
use strict;
use warnings;
# Convert the contents of a file into a C string constant.
# Note that the compiler will implicitly add an extra 0 byte at the end
# of every string, so code using the string may need to remove that to get
# the exact contents of the original file.
# FIXME: why not a char array?
# treat only alphanumeric and punctuations (excluding " and ?) as safe
my $unsafe_chars = qr{[^][A-Za-z0-9!#%&'()*+,./:;<=>^_{|}~ -]};
for my $file (@ARGV) {
open my $fh, '<:raw', $file or next;
print "/* Generated from $file */\n";
while (<$fh>) {
# replace unsafe chars with their equivalent octal escapes
s/($unsafe_chars)/\\@{[sprintf '%03o', ord($1)]}/gos;
print "\"$_\"\n"
}
close $fh;
}

27
TOOLS/file2string.py Executable file
View File

@ -0,0 +1,27 @@
#!/usr/bin/env python
# Convert the contents of a file into a C string constant.
# Note that the compiler will implicitly add an extra 0 byte at the end
# of every string, so code using the string may need to remove that to get
# the exact contents of the original file.
import sys
# Indexing a byte string yields int on Python 3.x, and a str on Python 2.x
def pord(c):
return ord(c) if type(c) == str else c
def main(infile):
conv = ['\\' + ("%03o" % c) for c in range(256)]
safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" \
"0123456789!#%&'()*+,-./:;<=>?[]^_{|}~ "
for c in safe_chars:
conv[ord(c)] = c
for c, esc in ("\nn", "\tt", r"\\", '""'):
conv[ord(c)] = '\\' + esc
for line in infile:
sys.stdout.write('"' + ''.join(conv[pord(c)] for c in line) + '"\n')
with open(sys.argv[1], 'rb') as infile:
sys.stdout.write("// Generated from %s\n\n" % sys.argv[1])
main(infile)

View File

@ -1,30 +0,0 @@
use 5.008;
use strict;
use warnings;
# ABSTRACT: Module collection to parse Matroska files.
package Parse::Matroska;
=head1 DESCRIPTION
C<use>s L<Parse::Matroska::Reader>. See the documentation
of the modules mentioned in L</"SEE ALSO"> for more information
in how to use this module.
It's intended for this module to contain high-level interfaces
to the other modules in the distribution.
=head1 SOURCE CODE
L<https://github.com/Kovensky/Parse-Matroska>
=head1 SEE ALSO
L<Parse::Matroska::Reader>, L<Parse::Matroska::Element>,
L<Parse::Matroska::Definitions>.
=cut
use Parse::Matroska::Reader;
1;

View File

@ -1,384 +0,0 @@
use 5.008;
use strict;
use warnings;
# ABSTRACT: internal EBML grammar definitions
package Parse::Matroska::Definitions;
use Parse::Matroska::Utils qw{uniq uncamelize};
use Exporter;
our @ISA = qw{Exporter};
our @EXPORT_OK = qw{elem_by_hexid %EBML_DEFINITION %MATROSKA_DEFINITION};
=head1 SYNOPSIS
use Parse::Matroska::Definitions qw{elem_by_hexid};
my $ebml_id = elem_by_hexid('1a45dfa3');
print "EBML ID $ebml_id->{elid}'s name: $ebml_id->{name}";
=head1 DESCRIPTION
Contains the definition of the EBML grammar as expected in
Matroska files. This module is meant mostly for internal use.
As this was extended from a script in mpv-player, some data
generated is apparently useless for regular module users
but is still relevant to the mpv-player script. Such data
is annotated as being for mpv compatibility.
=head1 NOTE
The API of this module is not yet considered stable.
=head1 GLOBALS
These global variables are considered B<immutable>.
=head2 @Parse::Matroska::Definitions::global_elem_list
A global list of known matroska elements. Useful for
mpv's matroska script, used for generating C headers
that parse matroska.
=head2 %Parse::Matroska::Definitions::global_elem_dict
A global hash of known matroska elements. Used internally
by L</elem_by_hexid($id)>.
=cut
@Parse::Matroska::Definitions::global_elem_list = ();
%Parse::Matroska::Definitions::global_elem_dict = ();
=head2 %EBML_DEFINITION
Optionally-importable hash of known EBML IDs belonging
to the EBML generic grammar.
=head2 %MATROSKA_DEFINITION
Optionally-importable hash of known EBML IDs belonging
to the Matroska-specific grammar.
=cut
our %EBML_DEFINITION = define_ebml();
our %MATROSKA_DEFINITION = define_matroska();
=method elem_by_hexid($id)
Returns an EBML Element Definition corresponding to the provided
hexadecimal string. Returns C<undef> if the element is unknown.
=cut
sub elem_by_hexid {
my ($elid) = @_;
return $Parse::Matroska::Definitions::global_elem_dict{$elid};
}
################################################
### Helper functions for document definition ###
################################################
# used by elem when setting the 'valname' key
use constant TYPE_MAP => {
uint => 'uint64_t',
str => 'char *',
binary => 'struct bstr',
ebml_id => 'uint32_t',
float => 'double',
sint => 'int64_t',
};
# this will be localized to "MATROSKA" or "EBML" on the elem declarations
our $ELEM_DEFINE_TYPE = undef;
=method elem($name,$elid,$valtype)
NOTE: never call this function yourself; it changes data structures
that are considered immutable outside of this package.
Internal API function that generates the EBML Element Definitions.
This API function returns an array which first element is C<$elid>
and the second is a generated hash. The generated hash is stored
in the @global_elem_list and %global_elem_dict.
The generated hash contains:
=for :list
= name
The EBML Element's name, given through C<$name>.
= elid
The EBML Element's hex id, given through C<$elid>. Used for lookups by L</elem_by_hexid($id)>.
= valtype
The EBML Element's type, given through C<$valtype>, except when C<$valtype> is an arrayref.
= multiple
If C<$name> ends with a C<*>, this is set as true and strips the C<*> from L</name>. Used to
mark elements that may be repeated.
= subelements
An arrayref of elements that may be children of this element, given through C<$valtype> if it
is an arrayref. Sets L</valtype> to C<sub> if there are subelements.
= subids
An arrayref listing all the L</elid>s of subelements, C<uniq>ified.
The following elements are for mpv compatibility:
=for :list
= definename
Name used for generating C #defines.
= fieldname
Name used for generating C struct fields.
= structname
Name used for generating C struct names.
= ebmltype
A pre-#defined constant to describe the element's type.
= valname
Typename used when declaring a struct field referring to this element.
=cut
sub elem {
my %e = (name => shift, elid => shift, valtype => shift);
# strip * from name, set 'multiple' if there was one
$e{multiple} = scalar $e{name} =~ s/\*$//;
# ELEM_DEFINE_TYPE is either MATROSKA or EBML
$e{definename} = "${ELEM_DEFINE_TYPE}_ID_".uc($e{name});
$e{fieldname} = uncamelize $e{name};
$e{structname} = "ebml_$e{fieldname}";
if (ref $e{valtype} eq 'HASH') {
$e{subelements} = $e{valtype};
$e{subids} = uniq map { $_->{elid} } values %{$e{subelements}};
$e{valtype} = 'sub';
$e{ebmltype} = 'EBML_TYPE_SUBELEMENTS';
$e{valname} = "struct $e{structname}";
} else {
$e{ebmltype} = "EBML_TYPE_\U$e{valtype}";
die "Unrecognized value type $e{valtype}" unless
defined ($e{valname} = TYPE_MAP->{$e{valtype}});
}
my $e = \%e;
push @Parse::Matroska::Definitions::global_elem_list, $e;
$Parse::Matroska::Definitions::global_elem_dict{$e{elid}} = $e;
return ($e{elid}, $e);
}
#############################################
### EBML and Matroska document definitons ###
#############################################
=method define_ebml
Internal function that defines the EBML generic grammar.
Must not be called from outside the package.
=cut
sub define_ebml {
local $ELEM_DEFINE_TYPE = 'EBML';
return (
elem('EBML', '1a45dfa3', {
elem('EBMLVersion', '4286', 'uint'),
elem('EBMLReadVersion', '42f7', 'uint'),
elem('EBMLMaxIDLength', '42f2', 'uint'),
elem('EBMLMaxSizeLength', '42f3', 'uint'),
elem('DocType', '4282', 'str'),
elem('DocTypeVersion', '4287', 'uint'),
elem('DocTypeReadVersion', '4285', 'uint'),
}),
elem('CRC32', 'bf', 'binary'),
elem('Void', 'ec', 'binary'),
);
}
=method define_matroska
Internal function that defines the Matroska-specific EBML grammar.
Must not be called from outside the package.
=cut
sub define_matroska {
local $ELEM_DEFINE_TYPE = 'MATROSKA';
return (
elem('Segment', '18538067', {
elem('SeekHead*', '114d9b74', {
elem('Seek*', '4dbb', {
elem('SeekID', '53ab', 'ebml_id'),
elem('SeekPosition', '53ac', 'uint'),
}),
}),
elem('Info*', '1549a966', {
elem('SegmentUID', '73a4', 'binary'),
elem('PrevUID', '3cb923', 'binary'),
elem('NextUID', '3eb923', 'binary'),
elem('TimecodeScale', '2ad7b1', 'uint'),
elem('DateUTC', '4461', 'sint'),
elem('Title', '7ba9', 'str'),
elem('MuxingApp', '4d80', 'str'),
elem('WritingApp', '5741', 'str'),
elem('Duration', '4489', 'float'),
}),
elem('Cluster*', '1f43b675', {
elem('Timecode', 'e7', 'uint'),
elem('BlockGroup*', 'a0', {
elem('Block', 'a1', 'binary'),
elem('BlockDuration', '9b', 'uint'),
elem('ReferenceBlock*', 'fb', 'sint'),
elem('DiscardPadding', '75A2', 'sint'),
}),
elem('SimpleBlock*', 'a3', 'binary'),
}),
elem('Tracks*', '1654ae6b', {
elem('TrackEntry*', 'ae', {
elem('TrackNumber', 'd7', 'uint'),
elem('TrackUID', '73c5', 'uint'),
elem('TrackType', '83', 'uint'),
elem('FlagEnabled', 'b9', 'uint'),
elem('FlagDefault', '88', 'uint'),
elem('FlagForced', '55aa', 'uint'),
elem('FlagLacing', '9c', 'uint'),
elem('MinCache', '6de7', 'uint'),
elem('MaxCache', '6df8', 'uint'),
elem('DefaultDuration', '23e383', 'uint'),
elem('TrackTimecodeScale', '23314f', 'float'),
elem('MaxBlockAdditionID', '55ee', 'uint'),
elem('Name', '536e', 'str'),
elem('Language', '22b59c', 'str'),
elem('CodecID', '86', 'str'),
elem('CodecPrivate', '63a2', 'binary'),
elem('CodecName', '258688', 'str'),
elem('CodecDecodeAll', 'aa', 'uint'),
elem('CodecDelay', '56AA', 'uint'),
elem('SeekPreRoll', '56BB', 'uint'),
elem('Video', 'e0', {
elem('FlagInterlaced', '9a', 'uint'),
elem('PixelWidth', 'b0', 'uint'),
elem('PixelHeight', 'ba', 'uint'),
elem('DisplayWidth', '54b0', 'uint'),
elem('DisplayHeight', '54ba', 'uint'),
elem('DisplayUnit', '54b2', 'uint'),
elem('FrameRate', '2383e3', 'float'),
elem('ColourSpace', '2eb524', 'binary'),
elem('StereoMode', '53b8', 'uint'),
elem('Colour', '55B0', {
elem('MatrixCoefficients', '55B1', 'uint'),
elem('BitsPerChannel', '55B2', 'uint'),
elem('ChromaSubsamplingHorz', '55B3', 'uint'),
elem('ChromaSubsamplingVert', '55B4', 'uint'),
elem('CbSubsamplingHorz', '55B5', 'uint'),
elem('CbSubsamplingVert', '55B6', 'uint'),
elem('ChromaSitingHorz', '55B7', 'uint'),
elem('ChromaSitingVert', '55B8', 'uint'),
elem('Range', '55B9', 'uint'),
elem('TransferCharacteristics', '55BA', 'uint'),
elem('Primaries', '55BB', 'uint'),
elem('MaxCLL', '55BC', 'uint'),
elem('MaxFALL', '55BD', 'uint'),
elem('MasteringMetadata', '55D0', {
elem('PrimaryRChromaticityX', '55D1', 'float'),
elem('PrimaryRChromaticityY', '55D2', 'float'),
elem('PrimaryGChromaticityX', '55D3', 'float'),
elem('PrimaryGChromaticityY', '55D4', 'float'),
elem('PrimaryBChromaticityX', '55D5', 'float'),
elem('PrimaryBChromaticityY', '55D6', 'float'),
elem('WhitePointChromaticityX', '55D7', 'float'),
elem('WhitePointChromaticityY', '55D8', 'float'),
elem('LuminanceMax', '55D9', 'float'),
elem('LuminanceMin', '55DA', 'float'),
}),
}),
}),
elem('Audio', 'e1', {
elem('SamplingFrequency', 'b5', 'float'),
elem('OutputSamplingFrequency', '78b5', 'float'),
elem('Channels', '9f', 'uint'),
elem('BitDepth', '6264', 'uint'),
}),
elem('ContentEncodings', '6d80', {
elem('ContentEncoding*', '6240', {
elem('ContentEncodingOrder', '5031', 'uint'),
elem('ContentEncodingScope', '5032', 'uint'),
elem('ContentEncodingType', '5033', 'uint'),
elem('ContentCompression', '5034', {
elem('ContentCompAlgo', '4254', 'uint'),
elem('ContentCompSettings', '4255', 'binary'),
}),
}),
}),
}),
}),
elem('Cues', '1c53bb6b', {
elem('CuePoint*', 'bb', {
elem('CueTime', 'b3', 'uint'),
elem('CueTrackPositions*', 'b7', {
elem('CueTrack', 'f7', 'uint'),
elem('CueClusterPosition', 'f1', 'uint'),
elem('CueRelativePosition','f0', 'uint'),
elem('CueDuration', 'b2', 'uint'),
}),
}),
}),
elem('Attachments', '1941a469', {
elem('AttachedFile*', '61a7', {
elem('FileDescription', '467e', 'str'),
elem('FileName', '466e', 'str'),
elem('FileMimeType', '4660', 'str'),
elem('FileData', '465c', 'binary'),
elem('FileUID', '46ae', 'uint'),
}),
}),
elem('Chapters', '1043a770', {
elem('EditionEntry*', '45b9', {
elem('EditionUID', '45bc', 'uint'),
elem('EditionFlagHidden', '45bd', 'uint'),
elem('EditionFlagDefault', '45db', 'uint'),
elem('EditionFlagOrdered', '45dd', 'uint'),
elem('ChapterAtom*', 'b6', {
elem('ChapterUID', '73c4', 'uint'),
elem('ChapterTimeStart', '91', 'uint'),
elem('ChapterTimeEnd', '92', 'uint'),
elem('ChapterFlagHidden', '98', 'uint'),
elem('ChapterFlagEnabled', '4598', 'uint'),
elem('ChapterSegmentUID', '6e67', 'binary'),
elem('ChapterSegmentEditionUID', '6ebc', 'uint'),
elem('ChapterDisplay*', '80', {
elem('ChapString', '85', 'str'),
elem('ChapLanguage*', '437c', 'str'),
elem('ChapCountry*', '437e', 'str'),
}),
}),
}),
}),
elem('Tags*', '1254c367', {
elem('Tag*', '7373', {
elem('Targets', '63c0', {
elem('TargetTypeValue', '68ca', 'uint'),
elem('TargetTrackUID', '63c5', 'uint'),
elem('TargetEditionUID', '63c9', 'uint'),
elem('TargetChapterUID', '63c4', 'uint'),
elem('TargetAttachmentUID', '63c6', 'uint'),
}),
elem('SimpleTag*', '67c8', {
elem('TagName', '45a3', 'str'),
elem('TagLanguage', '447a', 'str'),
elem('TagString', '4487', 'str'),
}),
}),
}),
}),
);
}
1;

View File

@ -1,331 +0,0 @@
use 5.008;
use strict;
use warnings;
# ABSTRACT: a mid-level representation of an EBML element
package Parse::Matroska::Element;
use Carp;
use List::Util qw{first};
=head1 SYNOPSIS
use Parse::Matroska::Reader;
my $reader = Parse::Matroska::Reader->new($path);
my $elem = $reader->read_element;
print "ID: $elem->{elid}\n";
print "Name: $elem->{name}\n";
print "Length: $elem->{content_len}\n";
print "Type: $elem->{type}\n";
print "Child count: ", scalar(@{$elem->all_children}), "\n";
if ($elem->{type} eq 'sub') {
while (my $chld = $elem->next_child) {
print "Child Name: $chld->{name}\n";
}
} else {
print "Value: ", $elem->get_value, "\n";
}
=head1 DESCRIPTION
Represents a single Matroska element as decoded by
L<Parse::Matroska::Reader>. This is essentially a hash
augmented with functions for delay-loading of binary
values and children elements.
=head1 NOTE
The API of this module is not yet considered stable.
=attr elid
The EBML Element ID, suitable for passing to
L<Parse::Matroska::Definitions/elem_by_hexid>.
=attr name
The EBML Element's name.
=attr type
The EBML Element's type. Can be C<uint>, C<sint>,
C<float>, C<ebml_id>, C<str> or C<binary>. See L</value>
for details.
Equivalent to
C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>.
=attr value
The EBML Element's value. Should be obtained through
L</get_value>.
Is an unicode string if the L</type> is C<str>, that is,
the string has already been decoded by L<Encode/decode>.
Is C<undef> if the L</type> is C<binary> and the contents
were delay-loaded and not yet read. L</get_value> will
do the delayed load if needed.
Is an arrayref if the L</type> is C<sub>, containing
the children nodes that were already loaded.
Is a hashref if the L</type> is C<ebml_id>, containing
the referred element's information as defined in
L<Parse::Matroska::Definitions>. Calling
C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will
return the same object as $elem->{value}.
=attr full_len
The entire length of this EBML Element, including
the header's.
=attr size_len
The length of the size marker. Used when calculating
L</full_len> from L</content_len>
=attr content_len
The length of the contents of this EBML Element,
which excludes the header.
=attr reader
A weakened reference to the associated
L<Parse::Matroska::Reader>.
=method new(%hash)
Creates a new Element initialized with the hash
given as argument.
=cut
sub new {
my $class = shift;
my $self = {};
bless $self, $class;
$self->initialize(@_);
return $self;
}
=method initialize(%hash)
Called by L</new> on initialization.
=cut
sub initialize {
my ($self, %args) = @_;
for (keys %args) {
$self->{$_} = $args{$_};
}
$self->{depth} = 0 unless $self->{depth};
}
=method skip
Called by the user to ignore the contents of this EBML node.
Needed when ignoring the children of a node.
=cut
sub skip {
my ($self) = @_;
my $reader = $self->{reader};
return unless $reader; # we don't have to skip if there's no reader
my $pos = $reader->getpos;
croak "Too late to skip, reads were already done"
if $pos ne $self->{data_pos};
$reader->skip($self->{content_len});
}
=method get_value($keep_bin)
Returns the value contained by this EBML element.
If the element has children, returns an arrayref to
the children elements that were already encountered.
If the element's type is C<binary> and the value was
delay-loaded, does the reading now.
If $keep_bin is true, the delay-loaded data is kept
as the L</value>, otherwise, further calls to
C<get_value> will reread the data from the L</reader>.
=cut
sub get_value {
my ($self, $keep_bin) = @_;
return undef if $self->{type} eq 'skip';
return $self->{value} if $self->{value};
my $reader = $self->{reader} or
croak "The associated Reader has been deleted";
# delay-loaded 'binary'
if ($self->{type} eq 'binary') {
croak "Cannot seek in the current Reader" unless $self->{data_pos};
# seek to the data position...
$reader->setpos($self->{data_pos});
# read the data, keeping it in value if requested
if ($keep_bin) {
$self->{value} = $reader->readlen($self->{content_len});
return $self->{value};
} else {
return $reader->readlen($self->{content_len});
}
}
}
=method next_child($read_bin)
Builtin iterator; reads and returns the next child element.
Always returns undef if the type isn't C<sub>.
Returns undef at the end of the iterator and resets itself to
point to the first element; so calling L</next_child($read_bin)>
after the iterator returned C<undef> will return the first child.
The optional C<$read_bin> parameter has the children elements
not delay-load their value if their type is C<binary>.
If all children elements have already been read, return
each element in-order as would be given by
L</all_children($recurse,$read_bin)>.
=cut
sub next_child {
my ($self, $read_bin) = @_;
return unless $self->{type} eq 'sub';
if ($self->{_all_children_read}) {
my $idx = $self->{_last_child} ||= 0;
if ($idx == @{$self->{value}}) {
# reset the iterator, returning undef once
$self->{_last_child} = 0;
return;
}
my $ret = $self->{value}->[$idx];
++$idx;
$self->{_last_child} = $idx;
return $ret;
}
my $len = defined $self->{remaining_len}
? $self->{remaining_len}
: $self->{content_len};
if ($len == 0) {
# we've read all children; switch into $self->{value} iteration mode
$self->{_all_children_read} = 1;
# return undef since the iterator will reset
return;
}
$self->{pos_offset} ||= 0;
my $pos = $self->{data_pos};
my $reader = $self->{reader} or croak "The associated reader has been deleted";
$reader->setpos($pos);
$reader->{fh}->seek($self->{pos_offset}, 1) if $pos;
my $chld = $reader->read_element($read_bin);
return undef unless defined $chld;
$self->{pos_offset} += $chld->{full_len};
$self->{remaining_len} = $len - $chld->{full_len};
if ($self->{remaining_len} < 0) {
croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained";
}
$chld->{depth} = $self->{depth} + 1;
$self->{value} ||= [];
push @{$self->{value}}, $chld;
return $chld;
}
=method all_children($recurse,$read_bin)
Calls L</populate_children($recurse,$read_bin)> on self
and returns an arrayref with the children nodes.
Both C<$recurse> and C<$read_bin> are optional and default
to false.
=cut
sub all_children {
my ($self, $recurse, $read_bin) = @_;
$self->populate_children($recurse, $read_bin);
return $self->{value};
}
=method children_by_name($name)
Searches in the already read children elements for all
elements with the EBML name C<$name>. Returns an array
containing all found elements. On scalar context,
returns only the first element found.
Croaks if the element's C<type> isn't C<sub>.
=cut
sub children_by_name {
my ($self, $name) = @_;
return unless defined wantarray; # don't do work if work isn't wanted
croak "Element can't have children" unless $self->{type} eq 'sub';
my @found = grep { $_->{name} eq $name } @{$self->{value}};
return @found if wantarray; # list
return shift @found if defined wantarray; # scalar
}
=method populate_children($recurse,$read_bin)
Populates the internal array of children elements, that is,
requests that the associated L<Matroska::Parser::Reader> reads
all children elements. Returns itself.
Returns false if the element's C<type> isn't C<sub>.
If C<$recurse> is provided and is true, the method will call
itself in the children elements with the same parameters it
received; this will build a full EBML tree.
If C<$read_bin> is provided and is true, disables delay-loading
of the contents of C<binary>-type nodes, reading the contents
to memory.
If both C<$recurse> and C<$read_bin> are true, entire EBML trees
can be loaded without requiring seeks, thus behaving correctly
on unseekable streams. If C<$read_bin> is false, the entire EBML
tree is still loaded, but calling L</get_value> on C<binary>-type
nodes will produce an error on unseekable streams.
=cut
sub populate_children {
my ($self, $recurse, $read_bin) = @_;
return unless $self->{type} eq 'sub';
if (@{$self->{value}} && $recurse) {
# only recurse
foreach (@{$self->{value}}) {
$_->populate_children($recurse, $read_bin);
}
return $self;
}
while (my $chld = $self->next_child($read_bin)) {
$chld->populate_children($recurse, $read_bin) if $recurse;
}
return $self;
}
1;

View File

@ -1,426 +0,0 @@
use 5.008;
use strict;
use warnings;
# ABSTRACT: a low-level reader for EBML files
package Parse::Matroska::Reader;
use Parse::Matroska::Definitions qw{elem_by_hexid};
use Parse::Matroska::Element;
use Carp;
use Scalar::Util qw{openhandle weaken};
use IO::Handle;
use IO::File;
use List::Util qw{first};
use Encode;
use constant BIGINT_TRY => 'Pari,GMP,FastCalc';
use Math::BigInt try => BIGINT_TRY;
use Math::BigRat try => BIGINT_TRY;
=head1 SYNOPSIS
use Parse::Matroska::Reader;
my $reader = Parse::Matroska::Reader->new($path);
$reader->close;
$reader->open(\$string_with_matroska_data);
my $elem = $reader->read_element;
print "Element ID: $elem->{elid}\n";
print "Element name: $elem->{name}\n";
if ($elem->{type} ne 'sub') {
print "Element value: $elem->get_value\n";
} else {
while (my $child = $elem->next_child) {
print "Child element: $child->{name}\n";
}
}
$reader->close;
=head1 DESCRIPTION
Reads EBML data, which is used in Matroska files.
This is a low-level reader which is meant to be used as a backend
for higher level readers. TODO: write the high level readers :)
=head1 NOTE
The API of this module is not yet considered stable.
=method new
Creates a new reader.
Calls L</open($arg)> with its arguments if provided.
=cut
sub new {
my $class = shift;
my $self = {};
bless $self, $class;
$self->open(@_) if @_;
return $self;
}
=method open($arg)
Creates the internal filehandle. The argument can be:
=for :list
* An open filehandle or L<IO::Handle> object.
The filehandle is not C<dup()>ed, so calling L</close> in this
object will close the given filehandle as well.
* A scalar containing a path to a file.
* On perl v5.14 or newer, a scalarref pointing to EBML data.
For similar functionality in older perls, give an L<IO::String> object
or the handle to an already C<open>ed scalarref.
=cut
sub open {
my ($self, $arg) = @_;
$self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw")
or croak "Can't open $arg: $!";
}
=method close
Closes the internal filehandle.
=cut
sub close {
my ($self) = @_;
$self->{fh}->close;
delete $self->{fh};
}
# equivalent to $self->readlen(1), possibly faster
sub _getc {
my ($self) = @_;
my $c = $self->{fh}->getc;
croak "Can't do read of length 1: $!" if !defined $c && $!;
return $c;
}
=method readlen($length)
Reads C<$length> bytes from the internal filehandle.
=cut
sub readlen {
my ($self, $len) = @_;
my $data;
my $readlen = $self->{fh}->read($data, $len);
croak "Can't do read of length $len: $!"
unless defined $readlen;
return $data;
}
# converts a byte string into an integer
# we do so by converting the integer into a hex string (big-endian)
# and then reading the hex-string into an integer
sub _bin2int($) {
my ($bin) = @_;
# if the length is larger than 3
# the resulting integer might be larger than INT_MAX
if (length($bin) > 3) {
return Math::BigInt->from_hex(unpack("H*", $bin));
}
return hex(unpack("H*", $bin));
}
# creates a floating-point number with the given mantissa and exponent
sub _ldexp {
my ($mantissa, $exponent) = @_;
my $r = new Math::BigRat($mantissa);
return $r * Math::BigRat->new(2)**$exponent;
}
# NOTE: the read_* functions are hard to read because they're ports
# of even harder to read python functions.
# TODO: make them readable
=method read_id
Reads an EBML ID atom in hexadecimal string format, suitable
for passing to L<Parse::Matroska::Definitions/elem_by_hexid($id)>.
=cut
sub read_id {
my ($self) = @_;
my $t = $self->_getc;
return undef unless defined $t;
my $i = 0;
my $mask = 1<<7;
if (ord($t) == 0) {
croak "Matroska Syntax error: first byte of ID was \\0"
}
until (ord($t) & $mask) {
++$i;
$mask >>= 1;
}
# return hex string of the bytes we just read
return unpack "H*", ($t . $self->readlen($i));
}
=method read_size
Reads an EBML Data Size atom, which immediately follows
an EBML ID atom.
This returns an array consisting of:
=for :list
0. The length of the Data Size atom.
1. The value encoded in the Data Size atom, which is the length of all the data following it.
=cut
sub read_size {
my ($self) = @_;
my $t = $self->_getc;
my $i = 0;
my $mask = 1<<7;
if (ord($t) == 0) {
croak "Matroska Syntax error: first byte of data size was \\0"
}
until (ord($t) & $mask) {
++$i;
$mask >>= 1;
}
$t = $t & chr($mask-1); # strip length bits (keep only significant bits)
return ($i+1, _bin2int $t . $self->readlen($i));
}
=method read_str($length)
Reads a string of length C<$length> bytes from the internal filehandle.
The string is already L<Encode/decode>d from C<UTF-8>, which is the
standard Matroska string encoding.
=cut
{
my $utf8 = find_encoding("UTF-8");
sub read_str {
my ($self, $length) = @_;
return $utf8->decode($self->readlen($length));
}
}
=method read_uint($length)
Reads an unsigned integer of length C<$length> bytes
from the internal filehandle.
Returns a L<Math::BigInt> object if C<$length> is greater
than 4.
=cut
sub read_uint {
my ($self, $length) = @_;
return _bin2int $self->readlen($length);
}
=method read_sint($length)
Reads a signed integer of length C<$length> bytes
from the internal filehandle.
Returns a L<Math::BigInt> object if C<$length> is greater
than 4.
=cut
sub read_sint {
my ($self, $length) = @_;
my $i = $self->read_uint($length);
# Apply 2's complement to the unsigned int
my $mask = int(2 ** ($length * 8 - 1));
# if the most significant bit is set...
if ($i & $mask) {
# subtract the MSB twice
$i -= 2 * $mask;
}
return $i;
}
=method read_float($length)
Reads an IEEE floating point number of length C<$length>
bytes from the internal filehandle.
Only lengths C<4> and C<8> are supported (C C<float> and C<double>).
=cut
{
my $b1 = new Math::BigInt 1;
sub read_float {
my ($self, $length) = @_;
my $i = new Math::BigInt $self->read_uint($length)->bstr;
my $f;
# These evil expressions reinterpret an unsigned int as IEEE binary floats
if ($length == 4) {
$f = _ldexp(($i & ((1<<23) - 1)) + (1<<23), ($i>>23 & ((1<<8) - 1)) - 150);
$f = -$f if $i & ($b1<<31);
} elsif ($length == 8) {
$f = _ldexp(($i & (($b1<<52) - 1)) + ($b1<<52), ($i>>52 & ((1<<12) - 1)) - 1075);
$f = -$f if $i & ($b1<<63);
} else {
croak "Matroska Syntax error: unsupported IEEE float byte size $length";
}
return $f;
}
}
=method read_ebml_id($length)
Reads an EBML ID when it's encoded as the data inside another
EBML element, that is, when the enclosing element's C<type> is
C<ebml_id>.
This returns a hashref with the EBML element description as
defined in L<Parse::Matroska::Definitions>.
=cut
sub read_ebml_id {
my ($self, $length) = @_;
return elem_by_hexid(unpack("H*", $self->readlen($length)));
}
=method skip($length)
Skips C<$length> bytes in the internal filehandle.
=cut
sub skip {
my ($self, $len) = @_;
return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1);
$self->readlen($len);
return;
}
=method getpos
Wrapper for L<IO::Seekable/$io-E<gt>getpos> in the internal filehandle.
Returns undef if the internal filehandle can't C<getpos>.
=cut
sub getpos {
my ($self) = @_;
return undef unless $self->{fh}->can('getpos');
return $self->{fh}->getpos;
}
=method setpos($pos)
Wrapper for L<IO::Seekable/$io-E<gt>setpos> in the internal filehandle.
Returns C<undef> if the internal filehandle can't C<setpos>.
Croaks if C<setpos> does not seek to the requested position,
that is, if calling C<getpos> does not yield the same object
as the C<$pos> argument.
=cut
sub setpos {
my ($self, $pos) = @_;
return undef unless $pos && $self->{fh}->can('setpos');
my $ret = $self->{fh}->setpos($pos);
croak "Cannot seek to correct position"
unless $self->getpos eq $pos;
return $ret;
}
=method read_element($read_bin)
Reads a full EBML element from the internal filehandle.
Returns a L<Parse::Matroska::Element> object initialized with
the read data. If C<read_bin> is not present or is false, will
delay-load the contents of C<binary> type elements, that is,
they will only be loaded when calling C<get_value> on the
returned L<Parse::Matroska::Element> object.
Does not read the children of the element if its type is
C<sub>. Look into the L<Parse::Matroska::Element> interface
for details in how to read children elements.
Pass a true C<$read_bin> if the stream being read is not
seekable (C<getpos> is undef) and the contents of C<binary>
elements is desired, otherwise seeking errors or internal
filehandle corruption might occur.
=cut
sub read_element {
my ($self, $read_bin) = @_;
return undef if $self->{fh}->eof;
my $elem_pos = $self->getpos;
my $elid = $self->read_id;
my $elem_def = elem_by_hexid($elid);
my ($size_len, $content_len) = $self->read_size;
my $full_len = length($elid)/2 + $size_len + $content_len;
my $elem = Parse::Matroska::Element->new(
elid => $elid,
name => $elem_def && $elem_def->{name},
type => $elem_def && $elem_def->{valtype},
size_len => $size_len,
content_len => $content_len,
full_len => $full_len,
reader => $self,
elem_pos => $elem_pos,
data_pos => $self->getpos,
);
weaken($elem->{reader});
if (defined $elem_def) {
if ($elem->{type} eq 'sub') {
$elem->{value} = [];
} elsif ($elem->{type} eq 'str') {
$elem->{value} = $self->read_str($content_len);
} elsif ($elem->{type} eq 'ebml_id') {
$elem->{value} = $self->read_ebml_id($content_len);
} elsif ($elem->{type} eq 'uint') {
$elem->{value} = $self->read_uint($content_len);
} elsif ($elem->{type} eq 'sint') {
$elem->{value} = $self->read_sint($content_len);
} elsif ($elem->{type} eq 'float') {
$elem->{value} = $self->read_float($content_len);
} elsif ($elem->{type} eq 'skip') {
$self->skip($content_len);
} elsif ($elem->{type} eq 'binary') {
if ($read_bin) {
$elem->{value} = $self->readlen($content_len);
} else {
$self->skip($content_len);
}
} else {
die "Matroska Definition error: type $elem->{valtype} unknown"
}
} else {
$self->skip($content_len);
}
return $elem;
}
1;
=head1 CAVEATS
Children elements have to be processed as soon as an element
with children is found, or their children ignored with
L<Parse::Matroska::Element/skip>. Not doing so doesn't cause
errors but results in an invalid structure, with constant C<0>
depth.
To work correctly in unseekable streams, either the contents
of C<binary>-type elements has to be ignored or the C<read_bin>
flag to C<read_element> has to be true.

View File

@ -1,37 +0,0 @@
use strict;
use warnings;
# ABSTRACT: internally-used helper functions
package Parse::Matroska::Utils;
use Exporter;
our @ISA = qw{Exporter};
our @EXPORT_OK = qw{uniq uncamelize};
=method uniq(@array)
The same as L<List::MoreUtils/"uniq LIST">.
Included to avoid depending on it since it's
not a core module.
=cut
sub uniq(@) {
my %seen;
return grep { !$seen{$_}++ } @_;
}
=method uncamelize($string)
Converts a "StringLikeTHIS" into a
"string_like_this".
=cut
sub uncamelize($) {
local $_ = shift;
# lc followed by UC: lc_UC
s/(?<=[a-z])([A-Z])/_\L$1/g;
# UC followed by two lc: _UClclc
s/([A-Z])(?=[a-z]{2})/_\L$1/g;
# strip leading _ that the second regexp might add; lowercase all
s/^_//; lc
}

View File

@ -1,169 +0,0 @@
#! /usr/bin/env perl
# Generate C definitions for parsing Matroska files.
use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/lib";
use Parse::Matroska::Definitions;
use Parse::Matroska::Reader;
use Getopt::Long;
use List::Util qw{max};
my @global_elem_list = @Parse::Matroska::Definitions::global_elem_list;
Getopt::Long::Configure(qw{auto_version auto_help});
my %opt;
GetOptions(\%opt,
"generate-header",
"generate-definitions",
"full",
);
if ($opt{"generate-header"}) {
generate_c_header();
} elsif ($opt{"generate-definitions"}) {
generate_c_definitions();
} else {
for (@ARGV) {
my $reader = Parse::Matroska::Reader->new($_ eq '-' ? \*STDIN : $_) or die $!;
while (my $elem = $reader->read_element($_ eq '-')) {
process_elem($elem, $_ eq '-');
}
}
}
# Generate declarations for libmpdemux/ebml_types.h
sub generate_c_header {
print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
# Write a #define for the ElementID of each known element
for my $el (@global_elem_list) {
printf "#define %-40s 0x%s\n", $el->{definename}, $el->{elid};
}
print "\n";
# Define a struct for each ElementID that has child elements
for my $el (@global_elem_list) {
next unless $el->{subelements};
print "\nstruct $el->{structname} {\n";
# Figure out the length of the longest variable name
# Used for pretty-printing in the next step
my $l = max(map { length $_->{valname} } values %{$el->{subelements}});
# Output each variable, with pointers for array (multiple) elements
for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) {
printf " %-${l}s %s%s;\n",
$subel->{valname}, $subel->{multiple}?'*':' ', $subel->{fieldname};
}
print "\n";
# Output a counter variable for each element
# (presence/absence for scalars, item count for arrays)
for my $subel (sort values %{$el->{subelements}}) {
print " int n_$subel->{fieldname};\n"
}
print "};\n";
}
print "\n";
# Output extern references for ebml_elem_desc structs for each of the elements
# These are defined by generate_c_definitions
for my $el (@global_elem_list) {
next unless $el->{subelements};
print "extern const struct ebml_elem_desc $el->{structname}_desc;\n";
}
print "\n";
# Output the max number of sub-elements a known element might have
printf "#define MAX_EBML_SUBELEMENTS %d\n",
max(map { scalar keys %{$_->{subelements}} }
grep { $_->{subelements} } @global_elem_list);
}
# Generate definitions for libmpdemux/ebml_defs.c
sub generate_c_definitions {
print "/* Generated by TOOLS/matroska.pl, do not edit manually */\n\n";
# ebml_defs.c uses macros declared in ebml.c
for my $el (@global_elem_list) {
print "\n";
if ($el->{subelements}) {
# set N for the next macros
print "#define N $el->{fieldname}\n";
# define a struct ebml_$N_desc and gets ready to define fields
# this secretly opens two scopes; hence the }}; at the end
print "E_S(\"$el->{name}\", ".scalar(keys %{$el->{subelements}}).")\n";
# define a field for each subelement
# also does lots of macro magic, but doesn't open a scope
for my $subel (sort { $a->{definename} cmp $b->{definename} } values %{$el->{subelements}}) {
print "F($subel->{definename}, $subel->{fieldname}, ".
($subel->{multiple}?'1':'0').")\n";
}
# close the struct
print "}};\n";
# unset N since we've used it
print "#undef N\n";
} else {
print "E(\"$el->{name}\", $el->{fieldname}, $el->{ebmltype})\n";
}
}
}
sub repr {
my @ret;
foreach (@_) {
if (/'/) {
s/"/\\"/g;
push @ret, "\"$_\"";
} else {
push @ret, "'$_'";
}
}
return @ret if wantarray;
return pop @ret if defined wantarray;
return;
}
sub process_elem {
my ($elem, $read_bin) = @_;
unless ($opt{full}) {
if ($elem->{name} eq 'Cluster' || $elem->{name} eq 'Cues') {
$elem->skip;
return;
}
}
die unless $elem;
if ($elem->{type} ne 'skip') {
print "$elem->{depth} $elem->{elid} $elem->{name} size: $elem->{content_len} value: ";
}
if ($elem->{type} eq 'sub') {
print "subelements:\n";
while (my $chld = $elem->next_child($read_bin)) {
process_elem($chld);
}
} elsif ($elem->{type} eq 'binary') {
my $t = "<skipped $elem->{content_len} bytes>";
if ($elem->{content_len} < 20) {
$t = unpack "H*", $elem->get_value;
}
print "binary $t\n";
delete $elem->{value};
} elsif ($elem->{type} eq 'ebml_id') {
print "binary $elem->{value}->{elid} (".($elem->{value}->{name}||"UNKNOWN").")\n";
} elsif ($elem->{type} eq 'skip') {
# skip
} elsif ($elem->{type} eq 'str') {
print "string ". repr($elem->get_value) . "\n";
} else {
print "$elem->{type} ". $elem->get_value ."\n";
}
}

463
TOOLS/matroska.py Executable file
View File

@ -0,0 +1,463 @@
#!/usr/bin/env python
"""
Generate C definitions for parsing Matroska files.
Can also be used to directly parse Matroska files and display their contents.
"""
#
# This file is part of MPlayer.
#
# MPlayer is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# MPlayer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with MPlayer; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# for compatibility with Python 2.x
from __future__ import print_function
elements_ebml = (
'EBML, 1a45dfa3, sub', (
'EBMLVersion, 4286, uint',
'EBMLReadVersion, 42f7, uint',
'EBMLMaxIDLength, 42f2, uint',
'EBMLMaxSizeLength, 42f3, uint',
'DocType, 4282, str',
'DocTypeVersion, 4287, uint',
'DocTypeReadVersion, 4285, uint',
),
'CRC32, bf, binary',
'Void, ec, binary',
)
elements_matroska = (
'Segment, 18538067, sub', (
'SeekHead*, 114d9b74, sub', (
'Seek*, 4dbb, sub', (
'SeekID, 53ab, ebml_id',
'SeekPosition, 53ac, uint',
),
),
'Info*, 1549a966, sub', (
'SegmentUID, 73a4, binary',
'PrevUID, 3cb923, binary',
'NextUID, 3eb923, binary',
'TimecodeScale, 2ad7b1, uint',
'DateUTC, 4461, sint',
'Title, 7ba9, str',
'MuxingApp, 4d80, str',
'WritingApp, 5741, str',
'Duration, 4489, float',
),
'Cluster*, 1f43b675, sub', (
'Timecode, e7, uint',
'BlockGroup*, a0, sub', (
'Block, a1, binary',
'BlockDuration, 9b, uint',
'ReferenceBlock*, fb, sint',
'DiscardPadding, 75A2, sint',
),
'SimpleBlock*, a3, binary',
),
'Tracks*, 1654ae6b, sub', (
'TrackEntry*, ae, sub', (
'TrackNumber, d7, uint',
'TrackUID, 73c5, uint',
'TrackType, 83, uint',
'FlagEnabled, b9, uint',
'FlagDefault, 88, uint',
'FlagForced, 55aa, uint',
'FlagLacing, 9c, uint',
'MinCache, 6de7, uint',
'MaxCache, 6df8, uint',
'DefaultDuration, 23e383, uint',
'TrackTimecodeScale, 23314f, float',
'MaxBlockAdditionID, 55ee, uint',
'Name, 536e, str',
'Language, 22b59c, str',
'CodecID, 86, str',
'CodecPrivate, 63a2, binary',
'CodecName, 258688, str',
'CodecDecodeAll, aa, uint',
'CodecDelay, 56aa, uint',
'SeekPreRoll, 56bb, uint',
'Video, e0, sub', (
'FlagInterlaced, 9a, uint',
'PixelWidth, b0, uint',
'PixelHeight, ba, uint',
'DisplayWidth, 54b0, uint',
'DisplayHeight, 54ba, uint',
'DisplayUnit, 54b2, uint',
'FrameRate, 2383e3, float',
'ColourSpace, 2eb524, binary',
'StereoMode, 53b8, uint',
'Colour, 55b0, sub', (
'MatrixCoefficients, 55B1, uint',
'BitsPerChannel, 55B2, uint',
'ChromaSubsamplingHorz, 55B3, uint',
'ChromaSubsamplingVert, 55B4, uint',
'CbSubsamplingHorz, 55B5, uint',
'CbSubsamplingVert, 55B6, uint',
'ChromaSitingHorz, 55B7, uint',
'ChromaSitingVert, 55B8, uint',
'Range, 55B9, uint',
'TransferCharacteristics, 55BA, uint',
'Primaries, 55BB, uint',
'MaxCLL, 55BC, uint',
'MaxFALL, 55BD, uint',
'MasteringMetadata, 55D0, sub', (
'PrimaryRChromaticityX, 55D1, float',
'PrimaryRChromaticityY, 55D2, float',
'PrimaryGChromaticityX, 55D3, float',
'PrimaryGChromaticityY, 55D4, float',
'PrimaryBChromaticityX, 55D5, float',
'PrimaryBChromaticityY, 55D6, float',
'WhitePointChromaticityX, 55D7, float',
'WhitePointChromaticityY, 55D8, float',
'LuminanceMax, 55D9, float',
'LuminanceMin, 55DA, float',
),
),
),
'Audio, e1, sub', (
'SamplingFrequency, b5, float',
'OutputSamplingFrequency, 78b5, float',
'Channels, 9f, uint',
'BitDepth, 6264, uint',
),
'ContentEncodings, 6d80, sub', (
'ContentEncoding*, 6240, sub', (
'ContentEncodingOrder, 5031, uint',
'ContentEncodingScope, 5032, uint',
'ContentEncodingType, 5033, uint',
'ContentCompression, 5034, sub', (
'ContentCompAlgo, 4254, uint',
'ContentCompSettings, 4255, binary',
),
),
),
),
),
'Cues, 1c53bb6b, sub', (
'CuePoint*, bb, sub', (
'CueTime, b3, uint',
'CueTrackPositions*, b7, sub', (
'CueTrack, f7, uint',
'CueClusterPosition, f1, uint',
'CueRelativePosition, f0, uint',
'CueDuration, b2, uint',
),
),
),
'Attachments, 1941a469, sub', (
'AttachedFile*, 61a7, sub', (
'FileDescription, 467e, str',
'FileName, 466e, str',
'FileMimeType, 4660, str',
'FileData, 465c, binary',
'FileUID, 46ae, uint',
),
),
'Chapters, 1043a770, sub', (
'EditionEntry*, 45b9, sub', (
'EditionUID, 45bc, uint',
'EditionFlagHidden, 45bd, uint',
'EditionFlagDefault, 45db, uint',
'EditionFlagOrdered, 45dd, uint',
'ChapterAtom*, b6, sub', (
'ChapterUID, 73c4, uint',
'ChapterTimeStart, 91, uint',
'ChapterTimeEnd, 92, uint',
'ChapterFlagHidden, 98, uint',
'ChapterFlagEnabled, 4598, uint',
'ChapterSegmentUID, 6e67, binary',
'ChapterSegmentEditionUID, 6ebc, uint',
'ChapterDisplay*, 80, sub', (
'ChapString, 85, str',
'ChapLanguage*, 437c, str',
'ChapCountry*, 437e, str',
),
),
),
),
'Tags*, 1254c367, sub', (
'Tag*, 7373, sub', (
'Targets, 63c0, sub', (
'TargetTypeValue, 68ca, uint',
'TargetTrackUID, 63c5, uint',
'TargetEditionUID, 63c9, uint',
'TargetChapterUID, 63c4, uint',
'TargetAttachmentUID, 63c6, uint',
),
'SimpleTag*, 67c8, sub', (
'TagName, 45a3, str',
'TagLanguage, 447a, str',
'TagString, 4487, str'
),
),
),
),
)
import sys
from math import ldexp
from binascii import hexlify
def byte2num(s):
return int(hexlify(s), 16)
class EOF(Exception): pass
def camelcase_to_words(name):
parts = []
start = 0
for i in range(1, len(name)):
if name[i].isupper() and (name[i-1].islower() or
name[i+1:i+2].islower()):
parts.append(name[start:i])
start = i
parts.append(name[start:])
return '_'.join(parts).lower()
class MatroskaElement(object):
def __init__(self, name, elid, valtype, namespace):
self.name = name
self.definename = '{0}_ID_{1}'.format(namespace, name.upper())
self.fieldname = camelcase_to_words(name)
self.structname = 'ebml_' + self.fieldname
self.elid = elid
self.valtype = valtype
if valtype == 'sub':
self.ebmltype = 'EBML_TYPE_SUBELEMENTS'
self.valname = 'struct ' + self.structname
else:
self.ebmltype = 'EBML_TYPE_' + valtype.upper()
try:
self.valname = {'uint': 'uint64_t', 'str': 'char *',
'binary': 'bstr', 'ebml_id': 'uint32_t',
'float': 'double', 'sint': 'int64_t',
}[valtype]
except KeyError:
raise SyntaxError('Unrecognized value type ' + valtype)
self.subelements = ()
def add_subelements(self, subelements):
self.subelements = subelements
self.subids = set(x[0].elid for x in subelements)
elementd = {}
elementlist = []
def parse_elems(l, namespace):
subelements = []
for el in l:
if isinstance(el, str):
name, hexid, eltype = [x.strip() for x in el.split(',')]
multiple = name.endswith('*')
name = name.strip('*')
new = MatroskaElement(name, hexid, eltype, namespace)
elementd[hexid] = new
elementlist.append(new)
subelements.append((new, multiple))
else:
new.add_subelements(parse_elems(el, namespace))
return subelements
parse_elems(elements_ebml, 'EBML')
parse_elems(elements_matroska, 'MATROSKA')
def generate_C_header():
print('// Generated by TOOLS/matroska.py, do not edit manually')
print()
for el in elementlist:
print('#define {0.definename:40} 0x{0.elid}'.format(el))
print()
for el in reversed(elementlist):
if not el.subelements:
continue
print()
print('struct {0.structname} {{'.format(el))
l = max(len(subel.valname) for subel, multiple in el.subelements)+1
for subel, multiple in el.subelements:
print(' {e.valname:{l}} {star}{e.fieldname};'.format(
e=subel, l=l, star=' *'[multiple]))
print()
for subel, multiple in el.subelements:
print(' int n_{0.fieldname};'.format(subel))
print('};')
for el in elementlist:
if not el.subelements:
continue
print('extern const struct ebml_elem_desc {0.structname}_desc;'.format(
el))
print()
print('#define MAX_EBML_SUBELEMENTS', max(len(el.subelements)
for el in elementlist))
def generate_C_definitions():
print('// Generated by TOOLS/matroska.py, do not edit manually')
print()
for el in reversed(elementlist):
print()
if el.subelements:
print('#define N', el.fieldname)
print('E_S("{0}", {1})'.format(el.name, len(el.subelements)))
for subel, multiple in el.subelements:
print('F({0.definename}, {0.fieldname}, {1})'.format(
subel, int(multiple)))
print('}};')
print('#undef N')
else:
print('E("{0.name}", {0.fieldname}, {0.ebmltype})'.format(el))
def read(s, length):
t = s.read(length)
if len(t) != length:
raise EOF
return t
def read_id(s):
t = read(s, 1)
i = 0
mask = 128
if ord(t) == 0:
raise SyntaxError
while not ord(t) & mask:
i += 1
mask >>= 1
t += read(s, i)
return t
def read_vint(s):
t = read(s, 1)
i = 0
mask = 128
if ord(t) == 0:
raise SyntaxError
while not ord(t) & mask:
i += 1
mask >>= 1
t = bytes((ord(t) & (mask - 1),))
t += read(s, i)
return i+1, byte2num(t)
def read_str(s, length):
return read(s, length)
def read_uint(s, length):
t = read(s, length)
return byte2num(t)
def read_sint(s, length):
i = read_uint(s, length)
mask = 1 << (length * 8 - 1)
if i & mask:
i -= 2 * mask
return i
def read_float(s, length):
t = read(s, length)
i = byte2num(t)
if length == 4:
f = ldexp((i & 0x7fffff) + (1 << 23), (i >> 23 & 0xff) - 150)
if i & (1 << 31):
f = -f
elif length == 8:
f = ldexp((i & ((1 << 52) - 1)) + (1 << 52), (i >> 52 & 0x7ff) - 1075)
if i & (1 << 63):
f = -f
else:
raise SyntaxError
return f
def parse_one(s, depth, parent, maxlen):
elid = hexlify(read_id(s)).decode('ascii')
elem = elementd.get(elid)
if parent is not None and elid not in parent.subids and elid not in ('ec', 'bf'):
print('Unexpected:', elid)
if 1:
raise NotImplementedError
size, length = read_vint(s)
this_length = len(elid) / 2 + size + length
if elem is not None:
if elem.valtype != 'skip':
print(depth, elid, elem.name, 'size:', length, 'value:', end=' ')
if elem.valtype == 'sub':
print('subelements:')
while length > 0:
length -= parse_one(s, depth + 1, elem, length)
if length < 0:
raise SyntaxError
elif elem.valtype == 'str':
print('string', repr(read_str(s, length).decode('utf8', 'replace')))
elif elem.valtype in ('binary', 'ebml_id'):
t = read_str(s, length)
dec = ''
if elem.valtype == 'ebml_id':
idelem = elementd.get(hexlify(t).decode('ascii'))
if idelem is None:
dec = '(UNKNOWN)'
else:
dec = '({0.name})'.format(idelem)
if len(t) < 20:
t = hexlify(t).decode('ascii')
else:
t = '<skipped {0} bytes>'.format(len(t))
print('binary', t, dec)
elif elem.valtype == 'uint':
print('uint', read_uint(s, length))
elif elem.valtype == 'sint':
print('sint', read_sint(s, length))
elif elem.valtype == 'float':
print('float', read_float(s, length))
elif elem.valtype == 'skip':
read(s, length)
else:
raise NotImplementedError
else:
print(depth, 'Unknown element:', elid, 'size:', length)
read(s, length)
return this_length
def parse_toplevel(s):
parse_one(s, 0, None, 1 << 63)
if sys.argv[1] == '--generate-header':
generate_C_header()
elif sys.argv[1] == '--generate-definitions':
generate_C_definitions()
else:
s = open(sys.argv[1], "rb")
while 1:
start = s.tell()
try:
parse_toplevel(s)
except EOF:
if s.tell() != start:
raise Exception("Unexpected end of file")
break

View File

@ -2,11 +2,11 @@ from waflib.Build import BuildContext
import os
def __file2string_cmd__(ctx):
return '"${{BIN_PERL}}" "{0}/TOOLS/file2string.pl" "${{SRC}}" > "${{TGT}}"' \
return '"${{BIN_PYTHON}}" "{0}/TOOLS/file2string.py" "${{SRC}}" > "${{TGT}}"' \
.format(ctx.srcnode.abspath())
def __matroska_cmd__(ctx, argument):
return '"${{BIN_PERL}}" "{0}/TOOLS/matroska.pl" "{1}" "${{SRC}}" > "${{TGT}}"' \
return '"${{BIN_PYTHON}}" "{0}/TOOLS/matroska.py" "{1}" "${{SRC}}" > "${{TGT}}"' \
.format(ctx.srcnode.abspath(), argument)
def __zshcomp_cmd__(ctx, argument):

View File

@ -81,6 +81,7 @@ build_options = [
}, {
'name': '--zsh-comp',
'desc': 'zsh completion',
'func': check_ctx_vars('BIN_PERL'),
'func': check_true,
'default': 'disable',
}, {
@ -995,11 +996,12 @@ def configure(ctx):
ctx.find_program(cc, var='CC')
ctx.find_program(pkg_config, var='PKG_CONFIG')
ctx.find_program(ar, var='AR')
ctx.find_program('perl', var='BIN_PERL')
ctx.find_program('python', var='BIN_PYTHON')
ctx.find_program('rst2html', var='RST2HTML', mandatory=False)
ctx.find_program('rst2man', var='RST2MAN', mandatory=False)
ctx.find_program('rst2pdf', var='RST2PDF', mandatory=False)
ctx.find_program(windres, var='WINDRES', mandatory=False)
ctx.find_program('perl', var='BIN_PERL', mandatory=False)
ctx.load('compiler_c')
ctx.load('waf_customizations')