This repository has been archived on 2024-07-02. You can view files and clone it, but cannot push or open issues or pull requests.
NETFLIX-DL-6.1.0/utils/modules/pycaption/srt.py

156 lines
5.1 KiB
Python
Raw Normal View History

2021-12-27 21:54:34 +00:00
from copy import deepcopy
import six
from .base import (
BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode)
from .exceptions import CaptionReadNoCaptions, InvalidInputError
class SRTReader(BaseReader):
def detect(self, content):
lines = content.splitlines()
if lines[0].isdigit() and '-->' in lines[1]:
return True
else:
return False
def read(self, content, lang='en-US'):
if type(content) != six.text_type:
raise InvalidInputError('The content is not a unicode string.')
lines = content.splitlines()
start_line = 0
captions = CaptionList()
while start_line < len(lines):
if not lines[start_line].isdigit():
break
end_line = self._find_text_line(start_line, lines)
timing = lines[start_line + 1].split('-->')
start = self._srttomicro(timing[0].strip(' \r\n'))
end = self._srttomicro(timing[1].strip(' \r\n'))
nodes = []
for line in lines[start_line + 2:end_line - 1]:
# skip extra blank lines
if not nodes or line != '':
nodes.append(CaptionNode.create_text(line))
nodes.append(CaptionNode.create_break())
if len(nodes):
# remove last line break from end of caption list
nodes.pop()
caption = Caption(start, end, nodes)
captions.append(caption)
start_line = end_line
caption_set = CaptionSet({lang: captions})
if caption_set.is_empty():
raise CaptionReadNoCaptions("empty caption file")
return caption_set
def _srttomicro(self, stamp):
timesplit = stamp.split(':')
if ',' not in timesplit[2]:
timesplit[2] += ',000'
secsplit = timesplit[2].split(',')
microseconds = (int(timesplit[0]) * 3600000000 +
int(timesplit[1]) * 60000000 +
int(secsplit[0]) * 1000000 +
int(secsplit[1]) * 1000)
return microseconds
def _find_text_line(self, start_line, lines):
end_line = start_line
found = False
while end_line < len(lines):
if lines[end_line].strip() == "":
found = True
elif found is True:
end_line -= 1
break
end_line += 1
return end_line + 1
class SRTWriter(BaseWriter):
def write(self, caption_set):
caption_set = deepcopy(caption_set)
srt_captions = []
for lang in caption_set.get_languages():
srt_captions.append(
self._recreate_lang(caption_set.get_captions(lang))
)
caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions)
return caption_content
def _recreate_lang(self, captions):
# Merge caption's that are on the exact same timestamp otherwise some
# players will play them in reversed order, libass specifically which is
# used quite a lot, including VLC and MPV.
# Fixes #189 - https://github.com/pbs/pycaption/issues/189
new_captions = []
i = 0
while len(captions) > i:
# if there's a caption after this, and they have the same timestamps
if len(captions) > i+1 and captions[i].start == captions[i+1].start and captions[i].end == captions[i+1].end:
# merge them together as a new caption
new_caption = Caption(start=captions[i].start, end=captions[i].end, nodes=captions[i].nodes + captions[i+1].nodes)
# delete the caption after this as we merged them to the current one
del captions[i]
else:
# don't do anything different
new_caption = captions[i]
# add final caption to new list
new_captions.append(new_caption)
# increment index
i += 1
captions = new_captions
srt = ''
count = 1
for caption in captions:
srt += '%s\n' % count
start = caption.format_start(msec_separator=',')
end = caption.format_end(msec_separator=',')
timestamp = '%s --> %s\n' % (start[:12], end[:12])
srt += timestamp.replace('.', ',')
new_content = ''
for node in caption.nodes:
new_content = self._recreate_line(new_content, node)
# Eliminate excessive line breaks
new_content = new_content.strip()
while '\n\n' in new_content:
new_content = new_content.replace('\n\n', '\n')
srt += "%s%s" % (new_content, '\n\n')
count += 1
return srt[:-1] # remove unwanted newline at end of file
def _recreate_line(self, srt, line):
if line.type_ == CaptionNode.TEXT:
return srt + '%s ' % line.content
elif line.type_ == CaptionNode.BREAK:
return srt + '\n'
else:
return srt