upload

2021-09-01 02:57:54 +05:00
parent 9df940f1fd
commit bf3c3712dd
222 changed files with 1007430 additions and 0 deletions
--- a/utils/modules/pycaption/dfxp/extras.py
+++ b/utils/modules/pycaption/dfxp/extras.py
@@ -0,0 +1,248 @@
+# We thought about making pycaption.base objects immutable. This would be nice
+# in a lot of cases, but since the transformations on them could be quite
+# complex, the deepcopy method is good enough sometimes.
+from copy import deepcopy
+
+from .base import DFXPWriter, DFXP_DEFAULT_REGION
+from ..base import BaseWriter, CaptionNode, merge_concurrent_captions
+
+from xml.sax.saxutils import escape
+from bs4 import BeautifulSoup
+
+LEGACY_DFXP_BASE_MARKUP = '''
+<tt xmlns="http://www.w3.org/ns/ttml"
+    xmlns:tts="http://www.w3.org/ns/ttml#styling">
+    <head>
+        <styling/>
+        <layout/>
+    </head>
+    <body/>
+</tt>
+'''
+
+LEGACY_DFXP_DEFAULT_STYLE = {
+    'color': 'white',
+    'font-family': 'monospace',
+    'font-size': '1c',
+}
+
+LEGACY_DFXP_DEFAULT_STYLE_ID = 'default'
+LEGACY_DFXP_DEFAULT_REGION_ID = 'bottom'
+
+LEGACY_DFXP_DEFAULT_REGION = {
+    'text-align': 'center',
+    'display-align': 'after'
+}
+
+
+class SinglePositioningDFXPWriter(DFXPWriter):
+    """A dfxp writer, that ignores all positioning, using a single provided value
+    """
+    def __init__(self, default_positioning=DFXP_DEFAULT_REGION,
+                 *args, **kwargs):
+        super(SinglePositioningDFXPWriter, self).__init__(*args, **kwargs)
+        self.default_positioning = default_positioning
+
+    def write(self, captions_set, force=''):
+        """Writes a DFXP file using the positioning provided in the initializer
+
+        :type captions_set: pycaption.base.CaptionSet
+        :param force: only write this language, if available in the CaptionSet
+        :rtype: unicode
+        """
+        captions_set = self._create_single_positioning_caption_set(
+            captions_set, self.default_positioning)
+
+        return super(SinglePositioningDFXPWriter, self).write(captions_set, force)  # noqa
+
+    @staticmethod
+    def _create_single_positioning_caption_set(caption_set, positioning):
+        """Return a caption where all the positioning information was
+        replaced from positioning
+
+        :type caption_set: pycaption.base.CaptionSet
+        :rtype: pycaption.base.CaptionSet
+        """
+        # If SinglePositioningDFXPWriter would modify the state of the caption
+        # set, any writer using the same caption_set thereafter would be
+        # affected. At the moment we know we don't use any other writers, but
+        # this is important and mustn't be neglected
+        caption_set = deepcopy(caption_set)
+        caption_set = merge_concurrent_captions(caption_set)
+        caption_set.layout_info = positioning
+
+        for lang in caption_set.get_languages():
+            caption_set.set_layout_info(lang, positioning)
+
+            caption_list = caption_set.get_captions(lang)
+            for caption in caption_list:
+                caption.layout_info = positioning
+
+                for node in caption.nodes:
+                    if hasattr(node, 'layout_info'):
+                        node.layout_info = positioning
+
+        for _, style in caption_set.get_styles():
+            if 'text-align' in style:
+                style.pop('text-align')
+
+        return caption_set
+
+
+class LegacyDFXPWriter(BaseWriter):
+    """Ported the legacy DFXPWriter from 0.4.5"""
+    def __init__(self, *args, **kw):
+        self.p_style = False
+        self.open_span = False
+
+    def write(self, caption_set, force=''):
+        caption_set = deepcopy(caption_set)
+        caption_set = merge_concurrent_captions(caption_set)
+
+        dfxp = BeautifulSoup(LEGACY_DFXP_BASE_MARKUP, 'lxml-xml')
+        dfxp.find('tt')['xml:lang'] = "en"
+
+        for style_id, style in caption_set.get_styles():
+            if style != {}:
+                dfxp = self._recreate_styling_tag(style_id, style, dfxp)
+        if not caption_set.get_styles():
+            dfxp = self._recreate_styling_tag(
+                LEGACY_DFXP_DEFAULT_STYLE_ID, LEGACY_DFXP_DEFAULT_STYLE, dfxp)
+
+        # XXX For now we will always use this default region. In the future if
+        # regions are provided, they will be kept
+        dfxp = self._recreate_region_tag(
+            LEGACY_DFXP_DEFAULT_REGION_ID, LEGACY_DFXP_DEFAULT_REGION, dfxp)
+
+        body = dfxp.find('body')
+
+        if force:
+            langs = [self._force_language(force, caption_set.get_languages())]
+        else:
+            langs = caption_set.get_languages()
+
+        for lang in langs:
+            div = dfxp.new_tag('div')
+            div['xml:lang'] = '%s' % lang
+
+            for caption in caption_set.get_captions(lang):
+                if caption.style:
+                    caption_style = caption.style
+                    caption_style.update({'region': LEGACY_DFXP_DEFAULT_REGION_ID})
+                else:
+                    caption_style = {'class': LEGACY_DFXP_DEFAULT_STYLE_ID,
+                                     'region': LEGACY_DFXP_DEFAULT_REGION_ID}
+                p = self._recreate_p_tag(caption, caption_style, dfxp)
+                div.append(p)
+
+            body.append(div)
+
+        caption_content = dfxp.prettify(formatter=None)
+        return caption_content
+
+    # force the DFXP to only have one language, trying to match on "force"
+    def _force_language(self, force, langs):
+        for lang in langs:
+            if force == lang:
+                return lang
+
+        return langs[-1]
+
+    def _recreate_region_tag(self, region_id, styling, dfxp):
+        dfxp_region = dfxp.new_tag('region')
+        dfxp_region.attrs.update({'xml:id': region_id})
+
+        attributes = self._recreate_style(styling, dfxp)
+        dfxp_region.attrs.update(attributes)
+
+        new_tag = dfxp.new_tag('region')
+        new_tag.attrs.update({'xml:id': region_id})
+        if dfxp_region != new_tag:
+            dfxp.find('layout').append(dfxp_region)
+        return dfxp
+
+    def _recreate_styling_tag(self, style, content, dfxp):
+        dfxp_style = dfxp.new_tag('style')
+        dfxp_style.attrs.update({'xml:id': style})
+
+        attributes = self._recreate_style(content, dfxp)
+        dfxp_style.attrs.update(attributes)
+
+        new_tag = dfxp.new_tag('style')
+        new_tag.attrs.update({'xml:id': style})
+        if dfxp_style != new_tag:
+            dfxp.find('styling').append(dfxp_style)
+
+        return dfxp
+
+    def _recreate_p_tag(self, caption, caption_style, dfxp):
+        start = caption.format_start()
+        end = caption.format_end()
+        p = dfxp.new_tag("p", begin=start, end=end)
+        p.string = self._recreate_text(caption, dfxp)
+
+        if dfxp.find("style", {"xml:id": "p"}):
+            p['style'] = 'p'
+
+        p.attrs.update(self._recreate_style(caption_style, dfxp))
+
+        return p
+
+    def _recreate_text(self, caption, dfxp):
+        line = ''
+
+        for node in caption.nodes:
+            if node.type_ == CaptionNode.TEXT:
+                line += escape(node.content) + ' '
+
+            elif node.type_ == CaptionNode.BREAK:
+                line = line.rstrip() + '<br/>\n    '
+
+            elif node.type_ == CaptionNode.STYLE:
+                line = self._recreate_span(line, node, dfxp)
+
+        return line.rstrip()
+
+    def _recreate_span(self, line, node, dfxp):
+        if node.start:
+            styles = ''
+
+            content_with_style = self._recreate_style(node.content, dfxp)
+            for style, value in list(content_with_style.items()):
+                styles += ' %s="%s"' % (style, value)
+
+            if styles:
+                if self.open_span:
+                    line = line.rstrip() + '</span> '
+                line += '<span%s>' % styles
+                self.open_span = True
+
+        elif self.open_span:
+            line = line.rstrip() + '</span> '
+            self.open_span = False
+
+        return line
+
+    def _recreate_style(self, content, dfxp):
+        dfxp_style = {}
+
+        if 'region' in content:
+            if dfxp.find('region', {'xml:id': content['region']}):
+                dfxp_style['region'] = content['region']
+        if 'class' in content:
+            if dfxp.find("style", {"xml:id": content['class']}):
+                dfxp_style['style'] = content['class']
+        if 'text-align' in content:
+            dfxp_style['tts:textAlign'] = content['text-align']
+        if 'italics' in content:
+            dfxp_style['tts:fontStyle'] = 'italic'
+        if 'font-family' in content:
+            dfxp_style['tts:fontFamily'] = content['font-family']
+        if 'font-size' in content:
+            dfxp_style['tts:fontSize'] = content['font-size']
+        if 'color' in content:
+            dfxp_style['tts:color'] = content['color']
+        if 'display-align' in content:
+            dfxp_style['tts:displayAlign'] = content['display-align']
+
+        return dfxp_style