471 lines
16 KiB
Python
471 lines
16 KiB
Python
import re
|
|
import six
|
|
import sys
|
|
import datetime
|
|
from copy import deepcopy
|
|
|
|
|
|
from .base import (
|
|
BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode
|
|
)
|
|
|
|
from .geometry import Layout
|
|
|
|
from .exceptions import (
|
|
CaptionReadError, CaptionReadSyntaxError, CaptionReadNoCaptions,
|
|
InvalidInputError
|
|
)
|
|
|
|
# A WebVTT timing line has both start/end times and layout related settings
|
|
# (referred to as 'cue settings' in the documentation)
|
|
# The following pattern captures [start], [end] and [cue settings] if existent
|
|
from pycaption.geometry import HorizontalAlignmentEnum
|
|
|
|
TIMING_LINE_PATTERN = re.compile('^(\S+)\s+-->\s+(\S+)(?:\s+(.*?))?\s*$')
|
|
TIMESTAMP_PATTERN = re.compile('^(\d+):(\d{2})(:\d{2})?\.(\d{3})')
|
|
VOICE_SPAN_PATTERN = re.compile('<v(\\.\\w+)* ([^>]*)>')
|
|
OTHER_SPAN_PATTERN = (
|
|
re.compile(
|
|
'</?([cibuv]|ruby|rt|lang|(\d+):(\d{2})(:\d{2})?\.(\d{3})).*?>'
|
|
)
|
|
) # These WebVTT tags are stripped off the cues on conversion
|
|
|
|
WEBVTT_VERSION_OF = {
|
|
HorizontalAlignmentEnum.LEFT: 'left',
|
|
HorizontalAlignmentEnum.CENTER: 'middle',
|
|
HorizontalAlignmentEnum.RIGHT: 'right',
|
|
HorizontalAlignmentEnum.START: 'start',
|
|
HorizontalAlignmentEnum.END: 'end'
|
|
}
|
|
|
|
DEFAULT_ALIGNMENT = 'middle'
|
|
|
|
|
|
def microseconds(h, m, s, f):
|
|
"""
|
|
Returns an integer representing a number of microseconds
|
|
:rtype: int
|
|
"""
|
|
return (int(h) * 3600 + int(m) * 60 + int(s)) * 1000000 + int(f) * 1000
|
|
|
|
|
|
class WebVTTReader(BaseReader):
|
|
def __init__(self, ignore_timing_errors=True, *args, **kwargs):
|
|
"""
|
|
:param ignore_timing_errors: Whether to ignore timing checks
|
|
"""
|
|
self.ignore_timing_errors = ignore_timing_errors
|
|
|
|
def detect(self, content):
|
|
return 'WEBVTT' in content
|
|
|
|
def read(self, content, lang='en-US'):
|
|
if type(content) != six.text_type:
|
|
raise InvalidInputError('The content is not a unicode string.')
|
|
|
|
caption_set = CaptionSet({lang: self._parse(content.splitlines())})
|
|
|
|
if caption_set.is_empty():
|
|
raise CaptionReadNoCaptions("empty caption file")
|
|
|
|
return caption_set
|
|
|
|
def _parse(self, lines):
|
|
captions = CaptionList()
|
|
start = None
|
|
end = None
|
|
nodes = []
|
|
layout_info = None
|
|
found_timing = False
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
if '-->' in line:
|
|
found_timing = True
|
|
timing_line = i
|
|
last_start_time = captions[-1].start if captions else 0
|
|
try:
|
|
start, end, layout_info = self._parse_timing_line(
|
|
line, last_start_time)
|
|
except CaptionReadError as e:
|
|
new_message = '%s (line %d)' % (e.args[0], timing_line)
|
|
six.reraise(type(e), type(e)(new_message), sys.exc_info()[2])
|
|
|
|
elif '' == line:
|
|
if found_timing:
|
|
if not nodes:
|
|
raise CaptionReadSyntaxError(
|
|
'Cue without content. (line %d)' % timing_line)
|
|
else:
|
|
found_timing = False
|
|
caption = Caption(
|
|
start, end, nodes, layout_info=layout_info)
|
|
captions.append(caption)
|
|
nodes = []
|
|
else:
|
|
if found_timing:
|
|
if nodes:
|
|
nodes.append(CaptionNode.create_break())
|
|
nodes.append(CaptionNode.create_text(
|
|
self._decode(line)))
|
|
else:
|
|
# it's a comment or some metadata; ignore it
|
|
pass
|
|
|
|
# Add a last caption if there are remaining nodes
|
|
if nodes:
|
|
caption = Caption(start, end, nodes, layout_info=layout_info)
|
|
captions.append(caption)
|
|
|
|
return captions
|
|
|
|
def _remove_styles(self, line):
|
|
partial_result = VOICE_SPAN_PATTERN.sub('\\2: ', line)
|
|
return OTHER_SPAN_PATTERN.sub('', partial_result)
|
|
|
|
def _validate_timings(self, start, end, last_start_time):
|
|
if start is None:
|
|
raise CaptionReadSyntaxError(
|
|
'Invalid cue start timestamp.')
|
|
if end is None:
|
|
raise CaptionReadSyntaxError('Invalid cue end timestamp.')
|
|
if start > end:
|
|
raise CaptionReadError(
|
|
'End timestamp is not greater than start timestamp.')
|
|
if start < last_start_time:
|
|
raise CaptionReadError(
|
|
'Start timestamp is not greater than or equal'
|
|
'to start timestamp of previous cue.')
|
|
|
|
def _parse_timing_line(self, line, last_start_time):
|
|
"""
|
|
:returns: Tuple (int, int, Layout)
|
|
"""
|
|
m = TIMING_LINE_PATTERN.search(line)
|
|
if not m:
|
|
raise CaptionReadSyntaxError(
|
|
'Invalid timing format.')
|
|
|
|
start = self._parse_timestamp(m.group(1))
|
|
end = self._parse_timestamp(m.group(2))
|
|
|
|
cue_settings = m.group(3)
|
|
|
|
if not self.ignore_timing_errors:
|
|
self._validate_timings(start, end, last_start_time)
|
|
|
|
layout_info = None
|
|
if cue_settings:
|
|
layout_info = Layout(webvtt_positioning=cue_settings)
|
|
|
|
return start, end, layout_info
|
|
|
|
def _parse_timestamp(self, timestamp):
|
|
"""Returns an integer representing a number of microseconds
|
|
:rtype: int
|
|
"""
|
|
m = TIMESTAMP_PATTERN.search(timestamp)
|
|
if not m:
|
|
raise CaptionReadSyntaxError(
|
|
'Invalid timing format.')
|
|
|
|
m = m.groups()
|
|
|
|
if m[2]:
|
|
# Timestamp takes the form of [hours]:[minutes]:[seconds].[milliseconds]
|
|
return microseconds(m[0], m[1], m[2].replace(":", ""), m[3])
|
|
else:
|
|
# Timestamp takes the form of [minutes]:[seconds].[milliseconds]
|
|
return microseconds(0, m[0], m[1], m[3])
|
|
|
|
def _decode(self, s):
|
|
"""
|
|
Convert cue text from WebVTT XML-like format to plain unicode.
|
|
:type s: unicode
|
|
"""
|
|
s = s.strip()
|
|
# Covert voice span
|
|
s = VOICE_SPAN_PATTERN.sub('\\2: ', s)
|
|
# TODO: Add support for other WebVTT tags. For now just strip them
|
|
# off the text.
|
|
s = OTHER_SPAN_PATTERN.sub('', s)
|
|
# Replace WebVTT special XML codes with plain unicode values
|
|
s = s.replace('<', '<')
|
|
s = s.replace('>', '>')
|
|
s = s.replace('‎', '\u200e')
|
|
s = s.replace('‏', '\u200f')
|
|
s = s.replace(' ', '\u00a0')
|
|
# Must do ampersand last
|
|
s = s.replace('&', '&')
|
|
return s
|
|
|
|
|
|
class WebVTTWriter(BaseWriter):
|
|
HEADER = 'WEBVTT\n\n'
|
|
global_layout = None
|
|
video_width = None
|
|
video_height = None
|
|
|
|
def write(self, caption_set):
|
|
"""
|
|
:type caption_set: CaptionSet
|
|
"""
|
|
output = self.HEADER
|
|
|
|
if caption_set.is_empty():
|
|
return output
|
|
|
|
caption_set = deepcopy(caption_set)
|
|
|
|
# TODO: styles. These go into a separate CSS file, which doesn't really
|
|
# fit the API here. Figure that out. Though some style stuff can be
|
|
# done in-line. This format is a little bit crazy.
|
|
|
|
# WebVTT's language support seems to be a bit crazy, so let's just
|
|
# support a single one for now.
|
|
lang = list(caption_set.get_languages())[0]
|
|
|
|
self.global_layout = caption_set.get_layout_info(lang)
|
|
|
|
captions = caption_set.get_captions(lang)
|
|
|
|
return output + '\n'.join(
|
|
[self._write_caption(caption_set, caption) for caption in captions])
|
|
|
|
def _timestamp(self, ts):
|
|
td = datetime.timedelta(microseconds=ts)
|
|
mm, ss = divmod(td.seconds, 60)
|
|
hh, mm = divmod(mm, 60)
|
|
s = "%02d:%02d.%03d" % (mm, ss, td.microseconds/1000)
|
|
if hh:
|
|
s = "%d:%s" % (hh, s)
|
|
return s
|
|
|
|
def _tags_for_style(self, style):
|
|
if style == 'italics':
|
|
return ['<i>', '</i>']
|
|
elif style == 'underline':
|
|
return ['<u>', '</u>']
|
|
elif style == 'bold':
|
|
return ['<b>', '</b>']
|
|
else:
|
|
return ['', '']
|
|
|
|
def _calculate_resulting_style(self, style, caption_set):
|
|
resulting_style = {}
|
|
|
|
style_classes = []
|
|
if 'classes' in style:
|
|
style_classes = style['classes']
|
|
elif 'class' in style:
|
|
style_classes = [style['class']]
|
|
|
|
for style_class in style_classes:
|
|
sub_style = caption_set.get_style(style_class).copy()
|
|
# Recursively resolve class attributes and calculate style
|
|
resulting_style.update(self._calculate_resulting_style(sub_style, caption_set))
|
|
|
|
resulting_style.update(style)
|
|
|
|
return resulting_style
|
|
|
|
def _write_caption(self, caption_set, caption):
|
|
"""
|
|
:type caption: Caption
|
|
"""
|
|
layout_groups = self._layout_groups(caption.nodes, caption_set)
|
|
|
|
start = self._timestamp(caption.start)
|
|
end = self._timestamp(caption.end)
|
|
timespan = "{} --> {}".format(start, end)
|
|
|
|
output = ''
|
|
|
|
cue_style_tags = ['', '']
|
|
|
|
style = self._calculate_resulting_style(caption.style, caption_set)
|
|
for key, value in sorted(style.items()):
|
|
if value:
|
|
tags = self._tags_for_style(key)
|
|
# print "tags: " + str(tags) + "\n"
|
|
cue_style_tags[0] += tags[0]
|
|
cue_style_tags[1] = tags[1] + cue_style_tags[1]
|
|
|
|
for cue_text, layout in layout_groups:
|
|
if not layout:
|
|
layout = caption.layout_info or self.global_layout
|
|
cue_settings = self._cue_settings_from(layout)
|
|
output += timespan + cue_settings + '\n'
|
|
output += cue_style_tags[0] + cue_text + cue_style_tags[1] + '\n'
|
|
|
|
return output
|
|
|
|
def _cue_settings_from(self, layout):
|
|
"""
|
|
Return WebVTT cue settings string based on layout info
|
|
:type layout: Layout
|
|
:rtype: unicode
|
|
"""
|
|
if not layout:
|
|
return ''
|
|
|
|
# If it's converting from WebVTT to WebVTT, keep positioning info
|
|
# unchanged
|
|
if layout.webvtt_positioning:
|
|
return ' {}'.format(layout.webvtt_positioning)
|
|
|
|
left_offset = None
|
|
top_offset = None
|
|
cue_width = None
|
|
alignment = None
|
|
|
|
already_relative = False
|
|
if not self.relativize:
|
|
if layout.is_relative():
|
|
already_relative = True
|
|
else:
|
|
# There are absolute positioning values for this cue but the
|
|
# Writer is explicitly configured not to do any relativization.
|
|
# Ignore all positioning for this cue.
|
|
return ''
|
|
|
|
# Ensure that all positioning values are measured using percentage.
|
|
# This may raise an exception if layout.is_relative() == False
|
|
# If you want to avoid it, you have to turn off relativization by
|
|
# initializing this Writer with relativize=False.
|
|
if not already_relative:
|
|
layout = layout.as_percentage_of(
|
|
self.video_width, self.video_height)
|
|
|
|
# Ensure that when there's a left offset the caption is not pushed out
|
|
# of the screen. If the execution got this far it means origin and
|
|
# extent are already relative by now.
|
|
if self.fit_to_screen:
|
|
layout = layout.fit_to_screen()
|
|
|
|
if layout.origin:
|
|
left_offset = layout.origin.x
|
|
top_offset = layout.origin.y
|
|
|
|
if layout.extent:
|
|
cue_width = layout.extent.horizontal
|
|
|
|
if layout.padding:
|
|
if layout.padding.start and left_offset:
|
|
# Since there is no padding in WebVTT, the left padding is
|
|
# added to the total left offset (if it is defined and not
|
|
# relative),
|
|
if left_offset:
|
|
left_offset += layout.padding.start
|
|
# and removed from the total cue width
|
|
if cue_width:
|
|
cue_width -= layout.padding.start
|
|
# the right padding is cut out of the total cue width,
|
|
if layout.padding.end and cue_width:
|
|
cue_width -= layout.padding.end
|
|
# the top padding is added to the top offset
|
|
# (if it is defined and not relative)
|
|
if layout.padding.before and top_offset:
|
|
top_offset += layout.padding.before
|
|
# and the bottom padding is ignored because the cue box is only as
|
|
# long vertically as the text it contains and nothing can be cut
|
|
# out
|
|
|
|
try:
|
|
alignment = WEBVTT_VERSION_OF[layout.alignment.horizontal]
|
|
except (AttributeError, KeyError):
|
|
pass
|
|
|
|
cue_settings = ''
|
|
|
|
if alignment and alignment != 'middle':
|
|
cue_settings += " align:" + alignment
|
|
if left_offset:
|
|
cue_settings += " position:{},start".format(six.text_type(left_offset))
|
|
if top_offset:
|
|
cue_settings += " line:" + six.text_type(top_offset)
|
|
if cue_width:
|
|
cue_settings += " size:" + six.text_type(cue_width)
|
|
|
|
return cue_settings
|
|
|
|
def _layout_groups(self, nodes, caption_set):
|
|
"""
|
|
Convert a Caption's nodes to WebVTT cue or cues (depending on
|
|
whether they have the same positioning or not).
|
|
"""
|
|
if not nodes:
|
|
return []
|
|
|
|
current_layout = None
|
|
|
|
# A list with layout groups. Since WebVTT only support positioning
|
|
# for different cues, each layout group has to be represented in a
|
|
# new cue with the same timing but different positioning settings.
|
|
layout_groups = []
|
|
# A properly encoded WebVTT string (plain unicode must be properly
|
|
# escaped before being appended to this string)
|
|
s = ''
|
|
for i, node in enumerate(nodes):
|
|
if node.type_ == CaptionNode.TEXT:
|
|
if s and current_layout and node.layout_info != current_layout:
|
|
# If the positioning changes from one text node to
|
|
# another, a new WebVTT cue has to be created.
|
|
layout_groups.append((s, current_layout))
|
|
s = ''
|
|
# ATTENTION: This is where the plain unicode node content is
|
|
# finally encoded as WebVTT.
|
|
s += self._encode(node.content) or ' '
|
|
current_layout = node.layout_info
|
|
elif node.type_ == CaptionNode.STYLE:
|
|
resulting_style = self._calculate_resulting_style(node.content, caption_set)
|
|
|
|
styles = ['italics', 'underline', 'bold']
|
|
if not node.start:
|
|
styles.reverse()
|
|
|
|
for style in styles:
|
|
if style in resulting_style and resulting_style[style]:
|
|
tags = self._tags_for_style(style)
|
|
if node.start:
|
|
s += tags[0]
|
|
else:
|
|
s += tags[1]
|
|
|
|
# TODO: Refactor pycaption and eliminate the concept of a
|
|
# "Style node"
|
|
elif node.type_ == CaptionNode.BREAK:
|
|
if i > 0 and nodes[i - 1].type_ != CaptionNode.TEXT:
|
|
s += ' '
|
|
if i == 0: # cue text starts with a break
|
|
s += ' '
|
|
s += '\n'
|
|
|
|
if s:
|
|
layout_groups.append((s, current_layout))
|
|
return layout_groups
|
|
|
|
def _encode(self, s):
|
|
"""
|
|
Convert cue text from plain unicode to WebVTT XML-like format
|
|
escaping illegal characters. For a list of illegal characters see:
|
|
- http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-text-span
|
|
:type s: unicode
|
|
"""
|
|
s = s.replace('&', '&')
|
|
s = s.replace('<', '<')
|
|
|
|
# The substring "-->" is also not allowed according to this:
|
|
# - http://dev.w3.org/html5/webvtt/#dfn-webvtt-cue-block
|
|
s = s.replace('-->', '-->')
|
|
|
|
# The following characters have escaping codes for some reason, but
|
|
# they're not illegal, so for now I'll leave this commented out so that
|
|
# we stay as close as possible to the specification and avoid doing
|
|
# extra stuff "just to be safe".
|
|
# s = s.replace(u'>', u'>')
|
|
# s = s.replace(u'\u200e', u'‎')
|
|
# s = s.replace(u'\u200f', u'‏')
|
|
# s = s.replace(u'\u00a0', u' ')
|
|
return s
|