datatype = $fields[1]; $t->name = $fields[2]; $t->validParents = array(); for ($i = 0; $i + 3 < count($fields); $i++) { if ($fields[$i+3] == '*' || $fields[$i+3] == 'root') { $t->validParents[$i] = $fields[$i+3]; } else { $t->validParents[$i] = hexdec($fields[$i+3]); } } $this->_els[$id] = $t; $this->_ids[strtoupper($t->name)] = $id; } } public function exists($id) { return isset($this->_els[$id]); } public function name($id) { if (!isset($this->_els[$id])) return NULL; return $this->_els[$id]->name; } public function id($name) { $name = strtoupper($name); if (!isset($this->_ids[$name])) return NULL; return $this->_ids[$name]; } public function datatype($id) { if ($id == 'root') return 'container'; if (!isset($this->_els[$id])) return 'binary'; return $this->_els[$id]->datatype; } public function validChild($id1, $id2) { if (!isset($this->_els[$id2])) return TRUE; $parents = $this->_els[$id2]->validParents; return in_array('*', $parents) || in_array($id1, $parents); } } // Matroska element types global $EBML_ELEMENTS; $EBML_ELEMENTS = new EBMLElementTypeList(dirname(__FILE__) . '/matroska-elements.txt'); // Decode big-endian integer function ebmlDecodeInt($data, $signed=FALSE, $carryIn=0) { $n = $carryIn; if (strlen($data) > 8) throw new Exception('not supported: integer too long'); for ($i = 0; $i < strlen($data); $i++) { if ($n > (PHP_INT_MAX >> 8) || $n < ((-PHP_INT_MAX-1) >> 8)) { $n = floatval($n); } $n = $n * 0x100 + ord($data[$i]); if ($i == 0 && $signed && ($n & 0x80) != 0) { $n -= 0x100; } } return $n; } // Decode big-endian IEEE float function ebmlDecodeFloat($data) { switch (strlen($data)) { case 0: return 0; case 4: switch(pack('f', 1e9)) { case '(knN': $arr = unpack('f', strrev($data)); return $arr[1]; case 'Nnk(': $arr = unpack('f', $data); return $arr[1]; default: error_log('cannot decode floats'); return NULL; } case 8: switch(pack('d', 1e9)) { case "\x00\x00\x00\x00\x65\xcd\xcd\x41": $arr = unpack('d', strrev($data)); return $arr[1]; case "\x41\xcd\xcd\x65\x00\x00\x00\x00": $arr = unpack('d', $data); return $arr[1]; default: error_log('cannot decode floats'); return NULL; } default: error_log('unsupported float length'); return NULL; } } // Decode big-endian signed offset from Jan 01, 2000 in nanoseconds // Convert to offset from Jan 01, 1970 in seconds function ebmlDecodeDate($data) { return ebmlDecodeInt($data, TRUE) * 1e-9 + 946684800; } // Decode data of specified datatype function ebmlDecode($data, $datatype) { switch ($datatype) { case 'int': return ebmlDecodeInt($data, TRUE); case 'uint': return ebmlDecodeInt($data, FALSE); case 'float': return ebmlDecodeFloat($data); case 'string': return chop($data, "\0"); case 'date': return ebmlDecodeDate($data); case 'binary': return $data; default: throw new Exception('unknown datatype'); } } // Methods for reading data from section of EBML file class EBMLReader { private $_fileHandle; private $_offset; private $_size; private $_position; public function __construct($fileHandle, $offset=0, $size=NULL) { $this->_fileHandle = $fileHandle; $this->_offset = $offset; $this->_size = $size; $this->_position = 0; } // Tell position within data section public function position() { return $this->_position; } // Set position within data section public function setPosition($position) { $this->_position = $position; } // Total size of data section (NULL if unknown) public function size() { return $this->_size; } // Set end of data section public function setSize($size) { if ($this->_size === NULL) { $this->_size = $size; } else { throw new Exception('size already set'); } } // Determine whether we are at end of data public function endOfData() { if ($this->_size === NULL) { fseek($this->_fileHandle, $this->_offset + $this->_position); fread($this->_fileHandle, 1); if (feof($this->_fileHandle)) { $this->_size = $this->_position; return TRUE; } else { return FALSE; } } else { return $this->_position >= $this->_size; } } // Create EBMLReader containing $size bytes and advance public function nextSlice($size) { $slice = new EBMLReader($this->_fileHandle, $this->_offset + $this->_position, $size); if ($size !== NULL) { $this->_position += $size; if ($this->_size !== NULL && $this->_position > $this->_size) { throw new Exception('unexpected end of data'); } } return $slice; } // Read entire region public function readAll() { if ($this->_size == 0) return ''; if ($this->_size === NULL) throw new Exception('unknown length'); fseek($this->_fileHandle, $this->_offset); $data = fread($this->_fileHandle, $this->_size); if ($data === FALSE || strlen($data) != $this->_size) { throw new Exception('error reading from file'); } return $data; } // Read $size bytes public function read($size) { return $this->nextSlice($size)->readAll(); } // Read variable-length integer public function readVarInt($signed=FALSE) { // Read size and remove flag $n = ord($this->read(1)); $size = 0; if ($n == 0) { throw new Exception('not supported: variable-length integer too long'); } $flag = 0x80; while (($n & $flag) == 0) { $flag = $flag >> 1; $size++; } $n -= $flag; // Read remaining data $rawInt = $this->read($size); // Check for all ones if ($n == $flag - 1 && $rawInt == str_repeat("\xFF", $size)) { return NULL; } // Range shift for signed integers if ($signed) { if ($flag == 0x01) { $n = ord($rawInt[0]) - 0x80; $rawInt = $rawInt.substr(1); } else { $n -= ($flag >> 1); } } // Convert to integer $n = ebmlDecodeInt($rawInt, FALSE, $n); // Range shift for signed integers if ($signed) { if ($n == PHP_INT_MAX) { $n = floatval($n); } $n++; } return $n; } } // EBML element class EBMLElement { private $_id; private $_name; private $_datatype; private $_content; private $_headSize; public function __construct($id, $content, $headSize) { global $EBML_ELEMENTS; $this->_id = $id; $this->_name = $EBML_ELEMENTS->name($this->_id); $this->_datatype = $EBML_ELEMENTS->datatype($this->_id); $this->_content = $content; $this->_headSize = $headSize; } public function id() {return $this->_id;} public function name() {return $this->_name;} public function datatype() {return $this->_datatype;} public function content() {return $this->_content;} public function headSize() {return $this->_headSize;} // Total size of element (including ID and datasize) public function size() { return $this->_headSize + $this->_content->size(); } // Read and interpret content public function value() { if ($this->_datatype == 'binary') { return $this->_content; } else { return ebmlDecode($this->_content->readAll(), $this->_datatype); } } } // Iterate over EBML elements in data class EBMLElementList extends EBMLElement implements Iterator { private $_cache; private $_position; private static $MAX_ELEMENTS = 10000; public function __construct($id, $content, $headSize) { parent::__construct($id, $content, $headSize); $this->_cache = array(); $this->_position = 0; } public function rewind() { $this->_position = 0; } public function current() { if ($this->valid()) { return $this->_cache[$this->_position]; } else { return NULL; } } public function key() { return $this->_position; } public function next() { $this->_position += $this->current()->size(); if ($this->content()->size() !== NULL && $this->_position > $this->content()->size()) { throw new Exception('unexpected end of data'); } } public function valid() { global $EBML_ELEMENTS; if (isset($this->_cache[$this->_position])) return TRUE; $this->content()->setPosition($this->_position); if ($this->content()->endOfData()) return FALSE; $id = $this->content()->readVarInt(); if ($id === NULL) throw new Exception('invalid ID'); if ($this->content()->size() === NULL && !$EBML_ELEMENTS->validChild($this->id(), $id)) { $this->content()->setSize($this->_position); return FALSE; } $size = $this->content()->readVarInt(); $headSize = $this->content()->position() - $this->_position; $content = $this->content()->nextSlice($size); if ($EBML_ELEMENTS->datatype($id) == 'container') { $element = new EBMLElementList($id, $content, $headSize); } else { if ($size === NULL) { throw new Exception('non-container element of unknown size'); } $element = new EBMLElement($id, $content, $headSize); } $this->_cache[$this->_position] = $element; return TRUE; } // Total size of element (including ID and size) public function size() { if ($this->content()->size() === NULL) { $iElement = 0; foreach ($this as $element) { // iterate over elements to find end $iElement++; if ($iElement > self::$MAX_ELEMENTS) throw new Exception('not supported: too many elements'); } } return $this->headSize() + $this->content()->size(); } // Read and interpret content public function value() { return $this; } // Get element value by name public function get($name, $defaultValue=NULL) { $iElement = 0; foreach ($this as $element) { $iElement++; if ($iElement > self::$MAX_ELEMENTS) throw new Exception('not supported: too many elements'); if (strtoupper($element->name()) == strtoupper($name)) { return $element->value(); } } return $defaultValue; } } // Parse block class MatroskaBlock { const LACING_NONE = 0; const LACING_XIPH = 1; const LACING_EBML = 3; const LACING_FIXED = 2; public $trackNumber; public $timecode; public $keyframe; public $invisible; public $lacing; public $discardable; public $frames; public function __construct($reader) { # Header $this->trackNumber = $reader->readVarInt(); $this->timecode = ebmlDecodeInt($reader->read(2), TRUE); $flags = ord($reader->read(1)); if (($flags & 0x70) != 0) { throw new Exception('reserved flags set'); } $this->keyframe = (($flags & 0x80) != 0); $this->invisible = (($flags & 0x08) != 0); $this->lacing = ($flags >> 1) & 0x03; $this->discardable = (($flags & 0x01) != 0); # Lacing sizes if ($this->lacing == self::LACING_NONE) { $nsizes = 0; } else { $nsizes = ord($reader->read(1)); } $sizes = array(); switch ($this->lacing) { case self::LACING_XIPH: for ($i = 0; $i < $nsizes; $i++) { $size = 0; $x = 255; while ($x == 255) { $x = ord($reader->read(1)); $size += $x; if ($size > 65536) throw new Exception('not supported: laced frame too long'); } $sizes[$i] = $size; } break; case self::LACING_EBML: $size = 0; for ($i = 0; $i < $nsizes; $i++) { $dsize = $reader->readVarInt($i != 0); if ($dsize === NULL || $size + $dsize < 0) { throw new Exception('invalid frame size'); } $size += $dsize; $sizes[$i] = $size; } break; case self::LACING_FIXED: $lenRemaining = $reader->size() - $reader->position(); if ($lenRemaining % ($nsizes + 1) != 0) { throw new Exception('data size not divisible by frame count'); } $size = (int) ($lenRemaining / ($nsizes + 1)); for ($i = 0; $i < $nsizes; $i++) { $sizes[$i] = $size; } break; } # Frames $this->frames = array(); for ($i = 0; $i < $nsizes; $i++) { $this->frames[$i] = $reader->nextSlice($sizes[$i]); } $this->frames[$nsizes] = $reader->nextSlice($reader->size() - $reader->position()); } } // Create element list from $fileHandle function readMatroska($fileHandle) { $reader = new EBMLReader($fileHandle); if ($reader->read(4) != "\x1a\x45\xdf\xa3") { throw new Exception('not an EBML file'); } $root = new EBMLElementList('root', $reader, 0); $header = $root->get('EBML'); $ebmlVersion = $header->get('EBMLReadVersion', 1); $docType = $header->get('DocType'); $docTypeVersion = $header->get('DocTypeReadVersion', 1); if ($ebmlVersion != 1) { throw new Exception('unsupported EBML version'); } if ($docType != 'matroska' && $docType != 'webm') { throw new Exception ('unsupported document type'); } if ($docTypeVersion < 1 || $docTypeVersion > 4) { throw new Exception ('unsupported document type version'); } return $root; } function ebmlEncodeVarInt($n) { $data = ''; $flag = 0x80; while ($n >= $flag) { if ($flag == 0) { throw new Exception('not supported: number too large'); } $data = chr($n & 0xFF) . $data; $n = $n >> 8; $flag = $flag >> 1; } $data = chr($n | $flag) . $data; return $data; } function ebmlEncodeElementName($name) { global $EBML_ELEMENTS; return ebmlEncodeVarInt($EBML_ELEMENTS->id($name)); } function ebmlEncodeElement($name, $content) { return ebmlEncodeElementName($name) . ebmlEncodeVarInt(strlen($content)) . $content; }