upload
This commit is contained in:
155
utils/modules/pycaption/srt.py
Normal file
155
utils/modules/pycaption/srt.py
Normal file
@@ -0,0 +1,155 @@
|
||||
from copy import deepcopy
|
||||
import six
|
||||
|
||||
from .base import (
|
||||
BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode)
|
||||
from .exceptions import CaptionReadNoCaptions, InvalidInputError
|
||||
|
||||
|
||||
class SRTReader(BaseReader):
|
||||
def detect(self, content):
|
||||
lines = content.splitlines()
|
||||
if lines[0].isdigit() and '-->' in lines[1]:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def read(self, content, lang='en-US'):
|
||||
if type(content) != six.text_type:
|
||||
raise InvalidInputError('The content is not a unicode string.')
|
||||
|
||||
lines = content.splitlines()
|
||||
start_line = 0
|
||||
captions = CaptionList()
|
||||
|
||||
while start_line < len(lines):
|
||||
if not lines[start_line].isdigit():
|
||||
break
|
||||
|
||||
end_line = self._find_text_line(start_line, lines)
|
||||
|
||||
timing = lines[start_line + 1].split('-->')
|
||||
start = self._srttomicro(timing[0].strip(' \r\n'))
|
||||
end = self._srttomicro(timing[1].strip(' \r\n'))
|
||||
|
||||
nodes = []
|
||||
|
||||
for line in lines[start_line + 2:end_line - 1]:
|
||||
# skip extra blank lines
|
||||
if not nodes or line != '':
|
||||
nodes.append(CaptionNode.create_text(line))
|
||||
nodes.append(CaptionNode.create_break())
|
||||
|
||||
if len(nodes):
|
||||
# remove last line break from end of caption list
|
||||
nodes.pop()
|
||||
caption = Caption(start, end, nodes)
|
||||
captions.append(caption)
|
||||
|
||||
start_line = end_line
|
||||
|
||||
caption_set = CaptionSet({lang: captions})
|
||||
|
||||
if caption_set.is_empty():
|
||||
raise CaptionReadNoCaptions("empty caption file")
|
||||
|
||||
return caption_set
|
||||
|
||||
def _srttomicro(self, stamp):
|
||||
timesplit = stamp.split(':')
|
||||
if ',' not in timesplit[2]:
|
||||
timesplit[2] += ',000'
|
||||
secsplit = timesplit[2].split(',')
|
||||
microseconds = (int(timesplit[0]) * 3600000000 +
|
||||
int(timesplit[1]) * 60000000 +
|
||||
int(secsplit[0]) * 1000000 +
|
||||
int(secsplit[1]) * 1000)
|
||||
|
||||
return microseconds
|
||||
|
||||
def _find_text_line(self, start_line, lines):
|
||||
end_line = start_line
|
||||
|
||||
found = False
|
||||
while end_line < len(lines):
|
||||
if lines[end_line].strip() == "":
|
||||
found = True
|
||||
elif found is True:
|
||||
end_line -= 1
|
||||
break
|
||||
end_line += 1
|
||||
|
||||
return end_line + 1
|
||||
|
||||
|
||||
class SRTWriter(BaseWriter):
|
||||
def write(self, caption_set):
|
||||
caption_set = deepcopy(caption_set)
|
||||
|
||||
srt_captions = []
|
||||
|
||||
for lang in caption_set.get_languages():
|
||||
srt_captions.append(
|
||||
self._recreate_lang(caption_set.get_captions(lang))
|
||||
)
|
||||
|
||||
caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions)
|
||||
return caption_content
|
||||
|
||||
def _recreate_lang(self, captions):
|
||||
|
||||
# Merge caption's that are on the exact same timestamp otherwise some
|
||||
# players will play them in reversed order, libass specifically which is
|
||||
# used quite a lot, including VLC and MPV.
|
||||
# Fixes #189 - https://github.com/pbs/pycaption/issues/189
|
||||
new_captions = []
|
||||
i = 0
|
||||
while len(captions) > i:
|
||||
# if there's a caption after this, and they have the same timestamps
|
||||
if len(captions) > i+1 and captions[i].start == captions[i+1].start and captions[i].end == captions[i+1].end:
|
||||
# merge them together as a new caption
|
||||
new_caption = Caption(start=captions[i].start, end=captions[i].end, nodes=captions[i].nodes + captions[i+1].nodes)
|
||||
# delete the caption after this as we merged them to the current one
|
||||
del captions[i]
|
||||
else:
|
||||
# don't do anything different
|
||||
new_caption = captions[i]
|
||||
# add final caption to new list
|
||||
new_captions.append(new_caption)
|
||||
# increment index
|
||||
i += 1
|
||||
captions = new_captions
|
||||
|
||||
srt = ''
|
||||
count = 1
|
||||
|
||||
for caption in captions:
|
||||
srt += '%s\n' % count
|
||||
|
||||
start = caption.format_start(msec_separator=',')
|
||||
end = caption.format_end(msec_separator=',')
|
||||
timestamp = '%s --> %s\n' % (start[:12], end[:12])
|
||||
|
||||
srt += timestamp.replace('.', ',')
|
||||
|
||||
new_content = ''
|
||||
for node in caption.nodes:
|
||||
new_content = self._recreate_line(new_content, node)
|
||||
|
||||
# Eliminate excessive line breaks
|
||||
new_content = new_content.strip()
|
||||
while '\n\n' in new_content:
|
||||
new_content = new_content.replace('\n\n', '\n')
|
||||
|
||||
srt += "%s%s" % (new_content, '\n\n')
|
||||
count += 1
|
||||
|
||||
return srt[:-1] # remove unwanted newline at end of file
|
||||
|
||||
def _recreate_line(self, srt, line):
|
||||
if line.type_ == CaptionNode.TEXT:
|
||||
return srt + '%s ' % line.content
|
||||
elif line.type_ == CaptionNode.BREAK:
|
||||
return srt + '\n'
|
||||
else:
|
||||
return srt
|
||||
Reference in New Issue
Block a user