This commit is contained in:
widevinedump
2021-12-23 15:51:08 +05:30
parent aba5a2d349
commit f910bf33ca
83 changed files with 8442 additions and 0 deletions

0
pyhls/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

549
pyhls/m3u8_parser.py Normal file
View File

@@ -0,0 +1,549 @@
import datetime
import iso8601
import itertools
import re
from pyhls import protocol
ATTRIBUTELISTPATTERN = re.compile(r'''((?:[^,"']|"[^"]*"|'[^']*')+)''')
def cast_date_time(value):
return iso8601.parse_date(value)
def format_date_time(value):
return value.isoformat()
class ParseError(Exception):
def __init__(self, lineno, line):
self.lineno = lineno
self.line = line
def __str__(self):
return 'Syntax error in manifest on line %d: %s' % (self.lineno, self.line)
def parse_m3u8(content, strict=False, custom_tags_parser=None):
'''
Given a M3U8 playlist content returns a dictionary with all data found
'''
data = {
'media_sequence': 0,
'is_variant': False,
'is_endlist': False,
'is_i_frames_only': False,
'is_independent_segments': False,
'playlist_type': None,
'playlists': [],
'segments': [],
'iframe_playlists': [],
'media': [],
'keys': [],
'rendition_reports': [],
'skip': {},
'part_inf': {},
'session_data': [],
'session_keys': [],
}
state = {
'expect_segment': False,
'expect_playlist': False,
'current_key': None,
'current_segment_map': None,
}
lineno = 0
for line in string_to_lines(content):
lineno += 1
line = line.strip()
if line.startswith(protocol.ext_x_byterange):
_parse_byterange(line, state)
state['expect_segment'] = True
elif line.startswith(protocol.ext_x_targetduration):
_parse_simple_parameter(line, data, float)
elif line.startswith(protocol.ext_x_media_sequence):
_parse_simple_parameter(line, data, int)
elif line.startswith(protocol.ext_x_discontinuity_sequence):
_parse_simple_parameter(line, data, int)
elif line.startswith(protocol.ext_x_program_date_time):
_, program_date_time = _parse_simple_parameter_raw_value(line, cast_date_time)
if not data.get('program_date_time'):
data['program_date_time'] = program_date_time
state['current_program_date_time'] = program_date_time
state['program_date_time'] = program_date_time
elif line.startswith(protocol.ext_x_discontinuity):
state['discontinuity'] = True
elif line.startswith(protocol.ext_x_cue_out_cont):
_parse_cueout_cont(line, state)
state['cue_out'] = True
elif line.startswith(protocol.ext_x_cue_out):
_parse_cueout(line, state, string_to_lines(content)[lineno - 2])
state['cue_out_start'] = True
state['cue_out'] = True
elif line.startswith(protocol.ext_x_cue_in):
state['cue_in'] = True
elif line.startswith(protocol.ext_x_cue_span):
state['cue_out'] = True
elif line.startswith(protocol.ext_x_version):
_parse_simple_parameter(line, data, int)
elif line.startswith(protocol.ext_x_allow_cache):
_parse_simple_parameter(line, data)
elif line.startswith(protocol.ext_x_key):
key = _parse_key(line)
state['current_key'] = key
if key not in data['keys']:
data['keys'].append(key)
elif line.startswith(protocol.extinf):
_parse_extinf(line, data, state, lineno, strict)
state['expect_segment'] = True
elif line.startswith(protocol.ext_x_stream_inf):
state['expect_playlist'] = True
_parse_stream_inf(line, data, state)
elif line.startswith(protocol.ext_x_i_frame_stream_inf):
_parse_i_frame_stream_inf(line, data)
elif line.startswith(protocol.ext_x_media):
_parse_media(line, data, state)
elif line.startswith(protocol.ext_x_playlist_type):
_parse_simple_parameter(line, data)
elif line.startswith(protocol.ext_i_frames_only):
data['is_i_frames_only'] = True
elif line.startswith(protocol.ext_is_independent_segments):
data['is_independent_segments'] = True
elif line.startswith(protocol.ext_x_endlist):
data['is_endlist'] = True
elif line.startswith(protocol.ext_x_map):
quoted_parser = remove_quotes_parser('uri')
segment_map_info = _parse_attribute_list(protocol.ext_x_map, line, quoted_parser)
state['current_segment_map'] = segment_map_info
# left for backward compatibility
data['segment_map'] = segment_map_info
elif line.startswith(protocol.ext_x_start):
attribute_parser = {
"time_offset": lambda x: float(x)
}
start_info = _parse_attribute_list(protocol.ext_x_start, line, attribute_parser)
data['start'] = start_info
elif line.startswith(protocol.ext_x_server_control):
_parse_server_control(line, data, state)
elif line.startswith(protocol.ext_x_part_inf):
_parse_part_inf(line, data, state)
elif line.startswith(protocol.ext_x_rendition_report):
_parse_rendition_report(line, data, state)
elif line.startswith(protocol.ext_x_part):
_parse_part(line, data, state)
elif line.startswith(protocol.ext_x_skip):
_parse_skip(line, data, state)
elif line.startswith(protocol.ext_x_session_data):
_parse_session_data(line, data, state)
elif line.startswith(protocol.ext_x_session_key):
_parse_session_key(line, data, state)
elif line.startswith(protocol.ext_x_preload_hint):
_parse_preload_hint(line, data, state)
elif line.startswith(protocol.ext_x_daterange):
_parse_daterange(line, data, state)
elif line.startswith(protocol.ext_x_gap):
state['gap'] = True
# Comments and whitespace
elif line.startswith('#'):
if callable(custom_tags_parser):
custom_tags_parser(line, data, lineno)
elif line.strip() == '':
# blank lines are legal
pass
elif state['expect_segment']:
_parse_ts_chunk(line, data, state)
state['expect_segment'] = False
elif state['expect_playlist']:
_parse_variant_playlist(line, data, state)
state['expect_playlist'] = False
elif strict:
raise ParseError(lineno, line)
# there could be remaining partial segments
if 'segment' in state:
data['segments'].append(state.pop('segment'))
return data
def _parse_key(line):
params = ATTRIBUTELISTPATTERN.split(line.replace(protocol.ext_x_key + ':', ''))[1::2]
key = {}
for param in params:
name, value = param.split('=', 1)
key[normalize_attribute(name)] = remove_quotes(value)
return key
def _parse_extinf(line, data, state, lineno, strict):
chunks = line.replace(protocol.extinf + ':', '').split(',', 1)
if len(chunks) == 2:
duration, title = chunks
elif len(chunks) == 1:
if strict:
raise ParseError(lineno, line)
else:
duration = chunks[0]
title = ''
if 'segment' not in state:
state['segment'] = {}
state['segment']['duration'] = float(duration)
state['segment']['title'] = title
def _parse_ts_chunk(line, data, state):
segment = state.pop('segment')
if state.get('program_date_time'):
segment['program_date_time'] = state.pop('program_date_time')
if state.get('current_program_date_time'):
segment['current_program_date_time'] = state['current_program_date_time']
state['current_program_date_time'] += datetime.timedelta(seconds=segment['duration'])
segment['uri'] = line
segment['cue_in'] = state.pop('cue_in', False)
segment['cue_out'] = state.pop('cue_out', False)
segment['cue_out_start'] = state.pop('cue_out_start', False)
if state.get('current_cue_out_scte35'):
segment['scte35'] = state['current_cue_out_scte35']
if state.get('current_cue_out_duration'):
segment['scte35_duration'] = state['current_cue_out_duration']
segment['discontinuity'] = state.pop('discontinuity', False)
if state.get('current_key'):
segment['key'] = state['current_key']
else:
# For unencrypted segments, the initial key would be None
if None not in data['keys']:
data['keys'].append(None)
if state.get('current_segment_map'):
segment['init_section'] = state['current_segment_map']
segment['dateranges'] = state.pop('dateranges', None)
segment['gap_tag'] = state.pop('gap', None)
data['segments'].append(segment)
def _parse_attribute_list(prefix, line, atribute_parser):
params = ATTRIBUTELISTPATTERN.split(line.replace(prefix + ':', ''))[1::2]
attributes = {}
for param in params:
name, value = param.split('=', 1)
name = normalize_attribute(name)
if name in atribute_parser:
value = atribute_parser[name](value)
attributes[name] = value
return attributes
def _parse_stream_inf(line, data, state):
data['is_variant'] = True
data['media_sequence'] = None
atribute_parser = remove_quotes_parser('codecs', 'audio', 'video', 'subtitles', 'closed_captions')
atribute_parser["program_id"] = int
atribute_parser["bandwidth"] = lambda x: int(float(x))
atribute_parser["average_bandwidth"] = int
atribute_parser["frame_rate"] = float
atribute_parser["video_range"] = str
state['stream_info'] = _parse_attribute_list(protocol.ext_x_stream_inf, line, atribute_parser)
def _parse_i_frame_stream_inf(line, data):
atribute_parser = remove_quotes_parser('codecs', 'uri')
atribute_parser["program_id"] = int
atribute_parser["bandwidth"] = int
atribute_parser["average_bandwidth"] = int
atribute_parser["video_range"] = str
iframe_stream_info = _parse_attribute_list(protocol.ext_x_i_frame_stream_inf, line, atribute_parser)
iframe_playlist = {'uri': iframe_stream_info.pop('uri'),
'iframe_stream_info': iframe_stream_info}
data['iframe_playlists'].append(iframe_playlist)
def _parse_media(line, data, state):
quoted = remove_quotes_parser('uri', 'group_id', 'language', 'assoc_language', 'name', 'instream_id', 'characteristics', 'channels')
media = _parse_attribute_list(protocol.ext_x_media, line, quoted)
data['media'].append(media)
def _parse_variant_playlist(line, data, state):
playlist = {'uri': line,
'stream_info': state.pop('stream_info')}
data['playlists'].append(playlist)
def _parse_byterange(line, state):
if 'segment' not in state:
state['segment'] = {}
state['segment']['byterange'] = line.replace(protocol.ext_x_byterange + ':', '')
def _parse_simple_parameter_raw_value(line, cast_to=str, normalize=False):
param, value = line.split(':', 1)
param = normalize_attribute(param.replace('#EXT-X-', ''))
if normalize:
value = value.strip().lower()
return param, cast_to(value)
def _parse_and_set_simple_parameter_raw_value(line, data, cast_to=str, normalize=False):
param, value = _parse_simple_parameter_raw_value(line, cast_to, normalize)
data[param] = value
return data[param]
def _parse_simple_parameter(line, data, cast_to=str):
return _parse_and_set_simple_parameter_raw_value(line, data, cast_to, True)
def _parse_cueout_cont(line, state):
param, value = line.split(':', 1)
res = re.match('.*Duration=(.*),SCTE35=(.*)$', value)
if res:
state['current_cue_out_duration'] = res.group(1)
state['current_cue_out_scte35'] = res.group(2)
def _cueout_no_duration(line):
# this needs to be called first since line.split in all other
# parsers will throw a ValueError if passed just this tag
if line == protocol.ext_x_cue_out:
return (None, None)
def _cueout_elemental(line, state, prevline):
param, value = line.split(':', 1)
res = re.match('.*EXT-OATCLS-SCTE35:(.*)$', prevline)
if res:
return (res.group(1), value)
else:
return None
def _cueout_envivio(line, state, prevline):
param, value = line.split(':', 1)
res = re.match('.*DURATION=(.*),.*,CUE="(.*)"', value)
if res:
return (res.group(2), res.group(1))
else:
return None
def _cueout_duration(line):
# this needs to be called after _cueout_elemental
# as it would capture those cues incompletely
# This was added seperately rather than modifying "simple"
param, value = line.split(':', 1)
res = re.match(r'DURATION=(.*)', value)
if res:
return (None, res.group(1))
def _cueout_simple(line):
# this needs to be called after _cueout_elemental
# as it would capture those cues incompletely
param, value = line.split(':', 1)
res = re.match(r'^(\d+(?:\.\d)?\d*)$', value)
if res:
return (None, res.group(1))
def _parse_cueout(line, state, prevline):
_cueout_state = (_cueout_no_duration(line)
or _cueout_elemental(line, state, prevline)
or _cueout_envivio(line, state, prevline)
or _cueout_duration(line)
or _cueout_simple(line))
if _cueout_state:
state['current_cue_out_scte35'] = _cueout_state[0]
state['current_cue_out_duration'] = _cueout_state[1]
def _parse_server_control(line, data, state):
attribute_parser = {
"can_block_reload": str,
"hold_back": lambda x: float(x),
"part_hold_back": lambda x: float(x),
"can_skip_until": lambda x: float(x)
}
data['server_control'] = _parse_attribute_list(
protocol.ext_x_server_control, line, attribute_parser
)
def _parse_part_inf(line, data, state):
attribute_parser = {
"part_target": lambda x: float(x)
}
data['part_inf'] = _parse_attribute_list(
protocol.ext_x_part_inf, line, attribute_parser
)
def _parse_rendition_report(line, data, state):
attribute_parser = remove_quotes_parser('uri')
attribute_parser['last_msn'] = int
attribute_parser['last_part'] = int
rendition_report = _parse_attribute_list(
protocol.ext_x_rendition_report, line, attribute_parser
)
data['rendition_reports'].append(rendition_report)
def _parse_part(line, data, state):
attribute_parser = remove_quotes_parser('uri')
attribute_parser['duration'] = lambda x: float(x)
attribute_parser['independent'] = str
attribute_parser['gap'] = str
attribute_parser['byterange'] = str
part = _parse_attribute_list(protocol.ext_x_part, line, attribute_parser)
# this should always be true according to spec
if state.get('current_program_date_time'):
part['program_date_time'] = state['current_program_date_time']
state['current_program_date_time'] += datetime.timedelta(seconds=part['duration'])
part['dateranges'] = state.pop('dateranges', None)
part['gap_tag'] = state.pop('gap', None)
if 'segment' not in state:
state['segment'] = {}
segment = state['segment']
if 'parts' not in segment:
segment['parts'] = []
segment['parts'].append(part)
def _parse_skip(line, data, state):
attribute_parser = {
"skipped_segments": int
}
data['skip'] = _parse_attribute_list(protocol.ext_x_skip, line, attribute_parser)
def _parse_session_data(line, data, state):
quoted = remove_quotes_parser('data_id', 'value', 'uri', 'language')
session_data = _parse_attribute_list(protocol.ext_x_session_data, line, quoted)
data['session_data'].append(session_data)
def _parse_session_key(line, data, state):
params = ATTRIBUTELISTPATTERN.split(line.replace(protocol.ext_x_session_key + ':', ''))[1::2]
key = {}
for param in params:
name, value = param.split('=', 1)
key[normalize_attribute(name)] = remove_quotes(value)
data['session_keys'].append(key)
def _parse_preload_hint(line, data, state):
attribute_parser = remove_quotes_parser('uri')
attribute_parser['type'] = str
attribute_parser['byterange_start'] = int
attribute_parser['byterange_length'] = int
data['preload_hint'] = _parse_attribute_list(
protocol.ext_x_preload_hint, line, attribute_parser
)
def _parse_daterange(line, date, state):
attribute_parser = remove_quotes_parser('id', 'class', 'start_date', 'end_date')
attribute_parser['duration'] = float
attribute_parser['planned_duration'] = float
attribute_parser['end_on_next'] = str
attribute_parser['scte35_cmd'] = str
attribute_parser['scte35_out'] = str
attribute_parser['scte35_in'] = str
parsed = _parse_attribute_list(
protocol.ext_x_daterange, line, attribute_parser
)
if 'dateranges' not in state:
state['dateranges'] = []
state['dateranges'].append(parsed)
def string_to_lines(string):
return string.strip().splitlines()
def remove_quotes_parser(*attrs):
return dict(zip(attrs, itertools.repeat(remove_quotes)))
def remove_quotes(string):
'''
Remove quotes from string.
Ex.:
"foo" -> foo
'foo' -> foo
'foo -> 'foo
'''
quotes = ('"', "'")
if string.startswith(quotes) and string.endswith(quotes):
return string[1:-1]
return string
def normalize_attribute(attribute):
return attribute.replace('-', '_').lower().strip()
def is_url(uri):
return uri.startswith(('https://', 'http://'))

34
pyhls/protocol.py Normal file
View File

@@ -0,0 +1,34 @@
ext_x_targetduration = '#EXT-X-TARGETDURATION'
ext_x_media_sequence = '#EXT-X-MEDIA-SEQUENCE'
ext_x_discontinuity_sequence = '#EXT-X-DISCONTINUITY-SEQUENCE'
ext_x_program_date_time = '#EXT-X-PROGRAM-DATE-TIME'
ext_x_media = '#EXT-X-MEDIA'
ext_x_playlist_type = '#EXT-X-PLAYLIST-TYPE'
ext_x_key = '#EXT-X-KEY'
ext_x_stream_inf = '#EXT-X-STREAM-INF'
ext_x_version = '#EXT-X-VERSION'
ext_x_allow_cache = '#EXT-X-ALLOW-CACHE'
ext_x_endlist = '#EXT-X-ENDLIST'
extinf = '#EXTINF'
ext_i_frames_only = '#EXT-X-I-FRAMES-ONLY'
ext_x_byterange = '#EXT-X-BYTERANGE'
ext_x_i_frame_stream_inf = '#EXT-X-I-FRAME-STREAM-INF'
ext_x_discontinuity = '#EXT-X-DISCONTINUITY'
ext_x_cue_out = '#EXT-X-CUE-OUT'
ext_x_cue_out_cont = '#EXT-X-CUE-OUT-CONT'
ext_x_cue_in = '#EXT-X-CUE-IN'
ext_x_cue_span = '#EXT-X-CUE-SPAN'
ext_x_scte35 = '#EXT-OATCLS-SCTE35'
ext_is_independent_segments = '#EXT-X-INDEPENDENT-SEGMENTS'
ext_x_map = '#EXT-X-MAP'
ext_x_start = '#EXT-X-START'
ext_x_server_control = '#EXT-X-SERVER-CONTROL'
ext_x_part_inf = '#EXT-X-PART-INF'
ext_x_part = '#EXT-X-PART'
ext_x_rendition_report = '#EXT-X-RENDITION-REPORT'
ext_x_skip = '#EXT-X-SKIP'
ext_x_session_data = '#EXT-X-SESSION-DATA'
ext_x_session_key = '#EXT-X-SESSION-KEY'
ext_x_preload_hint = '#EXT-X-PRELOAD-HINT'
ext_x_daterange = "#EXT-X-DATERANGE"
ext_x_gap = "#EXT-X-GAP"