upload

2021-09-01 02:57:54 +05:00
parent 9df940f1fd
commit bf3c3712dd
222 changed files with 1007430 additions and 0 deletions
--- a/utils/modules/pycaption/scc/init.py
+++ b/utils/modules/pycaption/scc/init.py
@@ -0,0 +1,696 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+3 types of SCC captions:
+    Roll-Up
+    Paint-On
+    Pop-On
+
+Commands:
+    94ae - [ENM] - Erase Non-displayed(buffer) Memory
+    942c - [EDM] - Erase Displayed Memory
+    9420 - [RCL] - Resume Caption Loading
+    9429 - [RDC] - Resume Direct Captioning
+
+    9425, 9426, 94a7 - [RU2], [RU3], [RU4] (roll up captions 2,3 or 4 rows)
+        - these commands set the number of expected lines
+
+    94ad - (in CEA-608-E: 142d) - [CR] carriage return.
+        - This actually rolls the captions up as many rows as specified by
+        [RU1], [RU2], or [RU3]
+
+    80 - no-op char. Doesn't do anything, but must be used with other
+        characters, to make a 2 byte word
+
+    97a1, 97a2, 9723 - [TO] move 1, 2 or 3 columns - Tab Over command
+        - this moves the positioning 1, 2, or 3 columns to the right
+        - Nothing regarding this is implemented.
+
+    942f - [EOC] - display the buffer on the screen - End Of Caption
+    ... - [PAC] - Preamble address code (can set positioning and style)
+        - All the PACs are specified by the first and second byte combined
+        from pycaption.scc.constants.PAC_BYTES_TO_POSITIONING_MAP
+
+    9429 - [RDC] - Resume Direct Captioning
+    94a4 - (in CEA-608-E: 1424) - [DER] Delete to End of Row
+
+
+Pop-On:
+    The commands should usually appear in this order. Not strict though, and
+    the the commands don't have to necessarily be on the same row.
+
+    1. 94ae [ENM] (erase non displayed memory)
+    2. 9420 [RCL] (resume caption loading => this command here means we're using Pop-On captions)
+    2.1? [ENM] - if step 0 was skipped?
+    3. [PAC] Positioning/ styling command (can position on columns divisible by 4)
+        The control chars is called Preamble Address Code [PAC].
+    4. If positioning needs to be on columns not divisible by 4, use a [TO] command
+    5. text
+    6. 942c [EDM] - optionally, erase the currently displayed caption
+    7. 942f [EOC] display the caption
+
+
+Roll-Up:
+    1. [RU2], [RU3] or [RU4]    - sets Roll-Up style and depth
+        - these set the Roll-Up style: (characteristic command)
+    2. [CR] to roll the display up 1 row...lol?
+    3. [PAC] - sets the indent of the base row
+
+
+Paint-On:
+    1. [RDC] - sets the Paint-On style (characteristic command)
+    2. [PAC]
+    3. text
+    4. [PAC]
+    5. text or [DER]
+
+There are some rules regarding the parity of the commands.
+
+This resource:
+http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_FORMAT.HTML
+ specifies that there are interpreters which only work if the commands have an
+ odd parity. This however is not consistent, and we might not handle well
+ these cases. Odd parity of a command means that, converting toe word into
+ binary, should result in an odd number of '1's. The PAC commands obey this
+ rule, but some do not. Some commands that do not are found in the COMMANDS
+ dictionary. This is legacy logic, that I didn't know how to handle, and
+ just carried over when implementing positioning.
+"""
+
+import re
+import math
+import textwrap
+from copy import deepcopy
+
+import six
+
+from pycaption.base import (
+    BaseReader, BaseWriter, CaptionSet, CaptionNode,
+)
+from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError
+from .constants import (
+    HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS,
+    MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
+    SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP,
+    PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED,
+)
+from .specialized_collections import (
+    TimingCorrectingCaptionList, NotifyingDict, CaptionCreator,
+    InstructionNodeCreator)
+from .state_machines import DefaultProvidingPositionTracker
+
+
+class NodeCreatorFactory(object):
+    """Will return instances of the given node_creator.
+
+    This is used as a means of creating new InstructionNodeCreator instances,
+    because these need to share state beyond their garbage collection, but
+    storing the information at the class level is not good either, because
+    this information must be erased after the reader's .read() operation
+    completes.
+    """
+    def __init__(self, position_tracker,
+                 node_creator=InstructionNodeCreator):
+        self.position_tracker = position_tracker
+        self.node_creator = node_creator
+
+    def new_creator(self):
+        """Returns a new instance of self.node_creator, initialized with
+        the same italics_tracker, and position_tracker
+        """
+        return self.node_creator(position_tracker=self.position_tracker)
+
+    def from_list(self, roll_rows):
+        """Wraps the node_creator's method with the same name
+
+        :param roll_rows: list of node_creator instances
+
+        :return: a node_creator instance
+        """
+        return self.node_creator.from_list(
+            roll_rows,
+            position_tracker=self.position_tracker
+        )
+
+
+def get_corrected_end_time(caption):
+    """If the last caption was never explicitly ended, set its end time to
+    start + 4 seconds
+
+    :param Caption caption: the last caption
+    :rtype: int
+    """
+    if caption.end:
+        return caption.end
+
+    return caption.start + 4 * 1000 * 1000
+
+
+class SCCReader(BaseReader):
+    """Converts a given unicode string to a CaptionSet.
+
+    This can be then later used for converting into any other supported formats
+    """
+    def __init__(self, *args, **kw):
+        self.caption_stash = CaptionCreator()
+        self.time_translator = _SccTimeTranslator()
+
+        self.node_creator_factory = NodeCreatorFactory(
+            DefaultProvidingPositionTracker()
+        )
+
+        self.last_command = ''
+
+        self.buffer_dict = NotifyingDict()
+
+        self.buffer_dict['pop'] = self.node_creator_factory.new_creator()
+        self.buffer_dict['paint'] = self.node_creator_factory.new_creator()
+        self.buffer_dict['roll'] = self.node_creator_factory.new_creator()
+
+        # Call this method when the active key changes
+        self.buffer_dict.add_change_observer(self._flush_implicit_buffers)
+        self.buffer_dict.set_active('pop')
+
+        self.roll_rows = []
+        self.roll_rows_expected = 0
+        self.simulate_roll_up = False
+
+        self.time = 0
+
+    def detect(self, content):
+        """Checks whether the given content is a proper SCC file
+
+        :type content: unicode
+
+        :rtype: bool
+        """
+        lines = content.splitlines()
+        if lines[0] == HEADER:
+            return True
+        else:
+            return False
+
+    def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
+        """Converts the unicode string into a CaptionSet
+
+        :type content: six.text_type
+        :param content: The SCC content to be converted to a CaptionSet
+
+        :type lang: six.text_type
+        :param lang: The language of the caption
+
+        :type simulate_roll_up: bool
+        :param simulate_roll_up: If True, when converting to other formats,
+            the resulting captions will contain all the rows that were visible
+            on the screen when the captions were rolling up.
+
+        :type offset: int
+        :param offset:
+
+        :rtype: CaptionSet
+        """
+        if type(content) != six.text_type:
+            raise InvalidInputError('The content is not a unicode string.')
+
+        self.simulate_roll_up = simulate_roll_up
+        self.time_translator.offset = offset * 1000000
+        # split lines
+        lines = content.splitlines()
+
+        # loop through each line except the first
+        for line in lines[1:]:
+            self._translate_line(line)
+
+        self._flush_implicit_buffers()
+
+        captions = CaptionSet({lang: self.caption_stash.get_all()})
+
+        # check captions for incorrect lengths
+        for cap in captions.get_captions(lang):
+            # if there's an end time on a caption and the difference is
+            # less than .05s kill it (this is likely caused by a standalone
+            # EOC marker in the SCC file)
+            if 0 < cap.end - cap.start < 50000:
+                raise ValueError('unsupported length found in SCC input file: ' + str(cap))
+
+        if captions.is_empty():
+            raise CaptionReadNoCaptions("empty caption file")
+        else:
+            last_caption = captions.get_captions(lang)[-1]
+            last_caption.end = get_corrected_end_time(last_caption)
+
+        return captions
+
+    def _fix_last_timing(self, timing):
+        """HACK HACK: Certain Paint-On captions don't specify the 942f [EOC]
+        (End Of Caption) command on the same line.
+        If this is a 942f line, also simulate a 942c (Erase Displayed Memory)
+        to properly set the timing on the last caption.
+
+        This method needs some serious attention, because it proves the timing
+        calculation is not done well for Pop-On captions
+        """
+        # Calculate the end time from the current line
+        time_translator = _SccTimeTranslator()
+        time_translator.start_at(timing)
+        time_translator.offset = self.time_translator.offset
+
+        # But use the current time translator for the start time
+        self.caption_stash.create_and_store(
+            self.buffer, self.time_translator.get_time())
+
+        self.caption_stash.correct_last_timing(time_translator.get_time())
+        self.buffer = self.node_creator_factory.node_creator()
+
+    def _flush_implicit_buffers(self, old_key=None, *args):
+        """Convert to Captions those buffers whose behavior is implicit.
+
+        The Paint-On buffer is explicit. New captions are created from it
+        with the command 'End Of Caption' [EOC], '942f'
+
+        The other 2 buffers, Roll-Up and Paint-On we treat as "more" implicit,
+        meaning that they can be displayed by a command on the next row.
+        If they're on the last row however, or if the caption type is changing,
+        we make sure to convert the buffers to text, so we don't lose any info.
+        """
+        if old_key == 'pop':
+            return
+
+        elif old_key is None or old_key == 'roll':
+            if not self.buffer.is_empty():
+                self._roll_up()
+
+        elif old_key is None or old_key == 'paint':
+            # xxx - perhaps the self.buffer property is sufficient
+            if not self.buffer_dict['paint'].is_empty():
+                self.caption_stash.create_and_store(
+                    self.buffer_dict['paint'], self.time)
+
+    def _translate_line(self, line):
+        # ignore blank lines
+        if line.strip() == '':
+            return
+
+        # split line in timestamp and words
+        r = re.compile(r"([0-9:;]*)([\s\t]*)((.)*)")
+        parts = r.findall(line.lower())
+
+        # XXX!!!!!! THESE 2 LINES ARE A HACK
+        if parts[0][2].strip() == '942f':
+            self._fix_last_timing(timing=parts[0][0])
+
+        self.time_translator.start_at(parts[0][0])
+
+        # loop through each word
+        for word in parts[0][2].split(' '):
+            # ignore empty results
+            if word.strip() != '':
+                self._translate_word(word)
+
+    def _translate_word(self, word):
+        # count frames for timing
+        self.time_translator.increment_frames()
+
+        # first check if word is a command
+        # TODO - check that all the positioning commands are here, or use
+        # some other strategy to determine if the word is a command.
+        if word in COMMANDS or _is_pac_command(word):
+            self._translate_command(word)
+
+        # second, check if word is a special character
+        elif word in SPECIAL_CHARS:
+            self._translate_special_char(word)
+
+        elif word in EXTENDED_CHARS:
+            self._translate_extended_char(word)
+
+        # third, try to convert word into 2 characters
+        else:
+            self._translate_characters(word)
+
+    def _handle_double_command(self, word):
+        # ensure we don't accidentally use the same command twice
+        if word == self.last_command:
+            self.last_command = ''
+            return True
+        else:
+            self.last_command = word
+            return False
+
+    def _translate_special_char(self, word):
+        # XXX - this looks highly buggy. Why should special chars be ignored
+        # when printed 2 times one after another?
+        if self._handle_double_command(word):
+            return
+
+        self.buffer.add_chars(SPECIAL_CHARS[word])
+
+    def _translate_extended_char(self, word):
+        # XXX - this looks highly buggy. Why would a special char be ignored
+        # if it's printed 2 times one after another?
+        if self._handle_double_command(word):
+            return
+
+        # add to buffer
+        self.buffer.add_chars(EXTENDED_CHARS[word])
+
+    def _translate_command(self, word):
+        if self._handle_double_command(word):
+            return
+
+        # if command is pop_up
+        if word == '9420':
+            self.buffer_dict.set_active('pop')
+
+        # command is paint_on [Resume Direct Captioning]
+        elif word == '9429':
+            self.buffer_dict.set_active('paint')
+
+            self.roll_rows_expected = 1
+            if not self.buffer.is_empty():
+                self.caption_stash.create_and_store(
+                    self.buffer, self.time
+                )
+                self.buffer = self.node_creator_factory.new_creator()
+
+            self.time = self.time_translator.get_time()
+
+        # if command is roll_up 2, 3 or 4 rows
+        elif word in ('9425', '9426', '94a7'):
+            self.buffer_dict.set_active('roll')
+
+            # count how many lines are expected
+            if word == '9425':
+                self.roll_rows_expected = 2
+            elif word == '9426':
+                self.roll_rows_expected = 3
+            elif word == '94a7':
+                self.roll_rows_expected = 4
+
+            # if content is in the queue, turn it into a caption
+            if not self.buffer.is_empty():
+                self.caption_stash.create_and_store(
+                    self.buffer, self.time)
+                self.buffer = self.node_creator_factory.new_creator()
+
+            # set rows to empty, configure start time for caption
+            self.roll_rows = []
+            self.time = self.time_translator.get_time()
+
+        # clear pop_on buffer
+        elif word == '94ae':
+            self.buffer = self.node_creator_factory.new_creator()
+
+        # display pop_on buffer [End Of Caption]
+        elif word == '942f':
+            self.time = self.time_translator.get_time()
+            self.caption_stash.create_and_store(self.buffer, self.time)
+            self.buffer = self.node_creator_factory.new_creator()
+
+        # roll up captions [Carriage Return]
+        elif word == '94ad':
+            # display roll-up buffer
+            if not self.buffer.is_empty():
+                self._roll_up()
+
+        # clear screen
+        elif word == '942c':
+            self.roll_rows = []
+
+            # XXX - The 942c command has nothing to do with paint-ons
+            # This however is legacy code, and will break lots of tests if
+            # the proper buffer (self.buffer) is used.
+            # Most likely using `self.buffer` instead of the paint buffer
+            # is the right thing to do, but this needs some further attention.
+            if not self.buffer_dict['paint'].is_empty():
+                self.caption_stash.create_and_store(
+                    self.buffer_dict['paint'], self.time)
+                self.buffer = self.node_creator_factory.new_creator()
+
+            # attempt to add proper end time to last caption(s)
+            self.caption_stash.correct_last_timing(
+                self.time_translator.get_time())
+
+        # if command not one of the aforementioned, add to buffer
+        else:
+            self.buffer.interpret_command(word)
+
+    def _translate_characters(self, word):
+        # split word into the 2 bytes
+        byte1 = word[:2]
+        byte2 = word[2:]
+
+        # check to see if the the bytes are recognized characters
+        if byte1 not in CHARACTERS or byte2 not in CHARACTERS:
+            return
+
+        self.buffer.add_chars(CHARACTERS[byte1], CHARACTERS[byte2])
+
+    @property
+    def buffer(self):
+        """Returns the currently active buffer
+        """
+        return self.buffer_dict.get_active()
+
+    @buffer.setter
+    def buffer(self, value):
+        """Sets a new value to the active key
+
+        :param value: any object
+        """
+        try:
+            key = self.buffer_dict.active_key
+            self.buffer_dict[key] = value
+        except TypeError:
+            pass
+
+    def _roll_up(self):
+        # We expect the active buffer to be the rol buffer
+        if self.simulate_roll_up:
+            if self.roll_rows_expected > 1:
+                if len(self.roll_rows) >= self.roll_rows_expected:
+                    self.roll_rows.pop(0)
+
+                self.roll_rows.append(self.buffer)
+                self.buffer = self.node_creator_factory.from_list(
+                    self.roll_rows)
+
+        # convert buffer and empty
+        self.caption_stash.create_and_store(self.buffer, self.time)
+        self.buffer = self.node_creator_factory.new_creator()
+
+        # configure time
+        self.time = self.time_translator.get_time()
+
+        # try to insert the proper ending time for the previous caption
+        self.caption_stash.correct_last_timing(self.time, force=True)
+
+
+class SCCWriter(BaseWriter):
+
+    def __init__(self, *args, **kw):
+        super(SCCWriter, self).__init__(*args, **kw)
+
+    def write(self, caption_set):
+        output = HEADER + '\n\n'
+
+        if caption_set.is_empty():
+            return output
+
+        caption_set = deepcopy(caption_set)
+
+        # Only support one language.
+        lang = list(caption_set.get_languages())[0]
+        captions = caption_set.get_captions(lang)
+
+        # PASS 1: compute codes for each caption
+        codes = [(self._text_to_code(caption), caption.start, caption.end)
+                 for caption in captions]
+
+        # PASS 2:
+        # Advance start times so as to have time to write to the pop-on
+        # buffer; possibly remove the previous clear-screen command
+        for index, (code, start, end) in enumerate(codes):
+            code_words = len(code) / 5 + 8
+            code_time_microseconds = code_words * MICROSECONDS_PER_CODEWORD
+            code_start = start - code_time_microseconds
+            if index == 0:
+                continue
+            previous_code, previous_start, previous_end = codes[index-1]
+            if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start:
+                codes[index-1] = (previous_code, previous_start, None)
+            codes[index] = (code, code_start, end)
+
+        # PASS 3:
+        # Write captions.
+        for (code, start, end) in codes:
+            output += ('%s\t' % self._format_timestamp(start))
+            output += '94ae 94ae 9420 9420 '
+            output += code
+            output += '942c 942c 942f 942f\n\n'
+            if end is not None:
+                output += '%s\t942c 942c\n\n' % self._format_timestamp(end)
+
+        return output
+
+    # Wrap lines at 32 chars
+    @staticmethod
+    def _layout_line(caption):
+        def caption_node_to_text(caption_node):
+            if caption_node.type_ == CaptionNode.TEXT:
+                return six.text_type(caption_node.content)
+            elif caption_node.type_ == CaptionNode.BREAK:
+                return '\n'
+        caption_text = ''.join(
+            [caption_node_to_text(node) for node in caption.nodes])
+        inner_lines = caption_text.split('\n')
+        inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
+        return '\n'.join(inner_lines_laid_out)
+
+    @staticmethod
+    def _maybe_align(code):
+        # Finish a half-word with a no-op so we can move to a full word
+        if len(code) % 5 == 2:
+            code += '80 '
+        return code
+
+    @staticmethod
+    def _maybe_space(code):
+        if len(code) % 5 == 4:
+            code += ' '
+        return code
+
+    def _print_character(self, code, char):
+        try:
+            char_code = CHARACTER_TO_CODE[char]
+        except KeyError:
+            try:
+                char_code = SPECIAL_OR_EXTENDED_CHAR_TO_CODE[char]
+            except KeyError:
+                char_code = '91b6'  # Use £ as "unknown character" symbol
+
+        if len(char_code) == 2:
+            return code + char_code
+        elif len(char_code) == 4:
+            return self._maybe_align(code) + char_code
+        else:
+            # This should not happen!
+            return code
+
+    def _text_to_code(self, s):
+        code = ''
+        lines = self._layout_line(s).split('\n')
+        for row, line in enumerate(lines):
+            row += 16 - len(lines)
+            # Move cursor to column 0 of the destination row
+            for _ in range(2):
+                code += ('%s%s ' % (PAC_HIGH_BYTE_BY_ROW[row],
+                                    PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]))
+            # Print the line using the SCC encoding
+            for char in line:
+                code = self._print_character(code, char)
+                code = self._maybe_space(code)
+            code = self._maybe_align(code)
+        return code
+
+    @staticmethod
+    def _format_timestamp(microseconds):
+        seconds_float = microseconds / 1000.0 / 1000.0
+        # Convert to non-drop-frame timecode
+        seconds_float *= 1000.0 / 1001.0
+        hours = math.floor(seconds_float / 3600)
+        seconds_float -= hours * 3600
+        minutes = math.floor(seconds_float / 60)
+        seconds_float -= minutes * 60
+        seconds = math.floor(seconds_float)
+        seconds_float -= seconds
+        frames = math.floor(seconds_float * 30)
+        return '%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frames)
+
+
+class _SccTimeTranslator(object):
+    """Converts SCC time to microseconds, keeping track of frames passed
+    """
+    def __init__(self):
+        self._time = '00:00:00;00'
+
+        # microseconds. The offset from which we begin the time calculation
+        self.offset = 0
+        self._frames = 0
+
+    def get_time(self):
+        """Returns the time, in microseconds. Takes into account the number of
+        frames passed, and the offset
+
+        :rtype: int
+        """
+        return self._translate_time(
+            self._time[:-2] + six.text_type(int(self._time[-2:]) + self._frames),
+            self.offset
+        )
+
+    @staticmethod
+    def _translate_time(stamp, offset):
+        """
+        :param stamp:
+        :type offset: int
+        :param offset: Subtract this many microseconds from the calculated time
+            Helpful for when the captions are off by some time interval.
+        :rtype: int
+        """
+        if ';' in stamp:
+            # Drop-frame timebase runs at the same rate as wall clock
+            seconds_per_timestamp_second = 1.0
+        else:
+            # Non-drop-frame timebase runs "slow"
+            # 1 second of timecode is longer than an actual second (1.001s)
+            seconds_per_timestamp_second = 1001.0 / 1000.0
+
+        time_split = stamp.replace(';', ':').split(':')
+
+        timestamp_seconds = (int(time_split[0]) * 3600 +
+                             int(time_split[1]) * 60 +
+                             int(time_split[2]) +
+                             int(time_split[3]) / 30.0)
+
+        seconds = timestamp_seconds * seconds_per_timestamp_second
+        microseconds = seconds * 1000 * 1000 - offset
+
+        if microseconds < 0:
+            microseconds = 0
+
+        return microseconds
+
+    def start_at(self, timespec):
+        """Reset the counter to the given time
+
+        :type timespec: unicode
+        """
+        self._time = timespec
+        self._frames = 0
+
+    def increment_frames(self):
+        """After a command was processed, we'd increment the number of frames
+        """
+        self._frames += 1
+
+
+def _is_pac_command(word):
+    """Checks whether the given word is a Preamble Address Code [PAC] command
+
+    :type word: unicode
+    :param word: 4 letter unicode command
+
+    :rtype: bool
+    """
+    if not word or len(word) != 4:
+        return False
+
+    byte1, byte2 = word[:2], word[2:]
+
+    try:
+        PAC_BYTES_TO_POSITIONING_MAP[byte1][byte2]
+    except KeyError:
+        return False
+    else:
+        return True
--- a/utils/modules/pycaption/scc/pycache/init.cpython-36.pyc
+++ b/utils/modules/pycaption/scc/pycache/init.cpython-36.pyc
--- a/utils/modules/pycaption/scc/pycache/init.cpython-37.pyc
+++ b/utils/modules/pycaption/scc/pycache/init.cpython-37.pyc
--- a/utils/modules/pycaption/scc/pycache/init.cpython-38.pyc
+++ b/utils/modules/pycaption/scc/pycache/init.cpython-38.pyc
--- a/utils/modules/pycaption/scc/pycache/init.cpython-39.pyc
+++ b/utils/modules/pycaption/scc/pycache/init.cpython-39.pyc
--- a/utils/modules/pycaption/scc/pycache/constants.cpython-36.pyc
+++ b/utils/modules/pycaption/scc/pycache/constants.cpython-36.pyc
--- a/utils/modules/pycaption/scc/pycache/constants.cpython-37.pyc
+++ b/utils/modules/pycaption/scc/pycache/constants.cpython-37.pyc
--- a/utils/modules/pycaption/scc/pycache/constants.cpython-38.pyc
+++ b/utils/modules/pycaption/scc/pycache/constants.cpython-38.pyc
--- a/utils/modules/pycaption/scc/pycache/constants.cpython-39.pyc
+++ b/utils/modules/pycaption/scc/pycache/constants.cpython-39.pyc
--- a/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-36.pyc
+++ b/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-36.pyc
--- a/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-37.pyc
+++ b/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-37.pyc
--- a/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-38.pyc
+++ b/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-38.pyc
--- a/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-39.pyc
+++ b/utils/modules/pycaption/scc/pycache/specialized_collections.cpython-39.pyc
--- a/utils/modules/pycaption/scc/pycache/state_machines.cpython-36.pyc
+++ b/utils/modules/pycaption/scc/pycache/state_machines.cpython-36.pyc
--- a/utils/modules/pycaption/scc/pycache/state_machines.cpython-37.pyc
+++ b/utils/modules/pycaption/scc/pycache/state_machines.cpython-37.pyc
--- a/utils/modules/pycaption/scc/pycache/state_machines.cpython-38.pyc
+++ b/utils/modules/pycaption/scc/pycache/state_machines.cpython-38.pyc
--- a/utils/modules/pycaption/scc/pycache/state_machines.cpython-39.pyc
+++ b/utils/modules/pycaption/scc/pycache/state_machines.cpython-39.pyc
--- a/utils/modules/pycaption/scc/constants.py
+++ b/utils/modules/pycaption/scc/constants.py
@@ -0,0 +1,984 @@
+# -*- coding: utf-8 -*-
+
+from itertools import product
+from future.utils import viewitems
+
+COMMANDS = {
+    '9420': '',
+    '9429': '',
+    '9425': '',
+    '9426': '',
+    '94a7': '',
+    '942a': '',
+    '94ab': '',
+    '942c': '',
+    '94ae': '',
+    '942f': '',
+    '9779': '<$>{break}<$>',
+    '9775': '<$>{break}<$>',
+    '9776': '<$>{break}<$>',
+    '9770': '<$>{break}<$>',
+    '9773': '<$>{break}<$>',
+    '10c8': '<$>{break}<$>',
+    '10c2': '<$>{break}<$>',
+    '166e': '<$>{break}<$>{italic}<$>',
+    '166d': '<$>{break}<$>',
+    '166b': '<$>{break}<$>',
+    '10c4': '<$>{break}<$>',
+    '9473': '<$>{break}<$>',
+    '977f': '<$>{break}<$>',
+    '977a': '<$>{break}<$>',
+    '1668': '<$>{break}<$>',
+    '1667': '<$>{break}<$>',
+    '1664': '<$>{break}<$>',
+    '1661': '<$>{break}<$>',
+    '10ce': '<$>{break}<$>{italic}<$>',
+    '94c8': '<$>{break}<$>',
+    '94c7': '<$>{break}<$>',
+    '94c4': '<$>{break}<$>',
+    '94c2': '<$>{break}<$>',
+    '94c1': '<$>{break}<$>',
+    '915e': '<$>{break}<$>',
+    '915d': '<$>{break}<$>',
+    '915b': '<$>{break}<$>',
+    '925d': '<$>{break}<$>',
+    '925e': '<$>{break}<$>',
+    '925b': '<$>{break}<$>',
+    '97e6': '<$>{break}<$>',
+    '97e5': '<$>{break}<$>',
+    '97e3': '<$>{break}<$>',
+    '97e0': '<$>{break}<$>',
+    '97e9': '<$>{break}<$>',
+    '9154': '<$>{break}<$>',
+    '9157': '<$>{break}<$>',
+    '9151': '<$>{break}<$>',
+    '9258': '<$>{break}<$>',
+    '9152': '<$>{break}<$>',
+    '9257': '<$>{break}<$>',
+    '9254': '<$>{break}<$>',
+    '9252': '<$>{break}<$>',
+    '9158': '<$>{break}<$>',
+    '9251': '<$>{break}<$>',
+    '94cd': '<$>{break}<$>',
+    '94ce': '<$>{break}<$>{italic}<$>',
+    '94cb': '<$>{break}<$>',
+    '97ef': '<$>{break}<$>{italic}<$>',
+    '1373': '<$>{break}<$>',
+    '97ec': '<$>{break}<$>',
+    '97ea': '<$>{break}<$>',
+    '15c7': '<$>{break}<$>',
+    '974f': '<$>{break}<$>{italic}<$>',
+    '10c1': '<$>{break}<$>',
+    '974a': '<$>{break}<$>',
+    '974c': '<$>{break}<$>',
+    '10c7': '<$>{break}<$>',
+    '976d': '<$>{break}<$>',
+    '15d6': '<$>{break}<$>',
+    '15d5': '<$>{break}<$>',
+    '15d3': '<$>{break}<$>',
+    '15d0': '<$>{break}<$>',
+    '15d9': '<$>{break}<$>',
+    '9745': '<$>{break}<$>',
+    '9746': '<$>{break}<$>',
+    '9740': '<$>{break}<$>',
+    '9743': '<$>{break}<$>',
+    '9749': '<$>{break}<$>',
+    '15df': '<$>{break}<$>',
+    '15dc': '<$>{break}<$>',
+    '15da': '<$>{break}<$>',
+    '15f8': '<$>{break}<$>',
+    '94fe': '<$>{break}<$>',
+    '94fd': '<$>{break}<$>',
+    '94fc': '<$>{break}<$>',
+    '94fb': '<$>{break}<$>',
+    '944f': '<$>{break}<$>{italic}<$>',
+    '944c': '<$>{break}<$>',
+    '944a': '<$>{break}<$>',
+    '92fc': '<$>{break}<$>',
+    '1051': '<$>{break}<$>',
+    '1052': '<$>{break}<$>',
+    '1054': '<$>{break}<$>',
+    '92fe': '<$>{break}<$>',
+    '92fd': '<$>{break}<$>',
+    '1058': '<$>{break}<$>',
+    '157a': '<$>{break}<$>',
+    '157f': '<$>{break}<$>',
+    '9279': '<$>{break}<$>',
+    '94f4': '<$>{break}<$>',
+    '94f7': '<$>{break}<$>',
+    '94f1': '<$>{break}<$>',
+    '9449': '<$>{break}<$>',
+    '92fb': '<$>{break}<$>',
+    '9446': '<$>{break}<$>',
+    '9445': '<$>{break}<$>',
+    '9443': '<$>{break}<$>',
+    '94f8': '<$>{break}<$>',
+    '9440': '<$>{break}<$>',
+    '1057': '<$>{break}<$>',
+    '9245': '<$>{break}<$>',
+    '92f2': '<$>{break}<$>',
+    '1579': '<$>{break}<$>',
+    '92f7': '<$>{break}<$>',
+    '105e': '<$>{break}<$>',
+    '92f4': '<$>{break}<$>',
+    '1573': '<$>{break}<$>',
+    '1570': '<$>{break}<$>',
+    '1576': '<$>{break}<$>',
+    '1575': '<$>{break}<$>',
+    '16c1': '<$>{break}<$>',
+    '16c2': '<$>{break}<$>',
+    '9168': '<$>{break}<$>',
+    '16c7': '<$>{break}<$>',
+    '9164': '<$>{break}<$>',
+    '9167': '<$>{break}<$>',
+    '9161': '<$>{break}<$>',
+    '9162': '<$>{break}<$>',
+    '947f': '<$>{break}<$>',
+    '91c2': '<$>{break}<$>',
+    '91c1': '<$>{break}<$>',
+    '91c7': '<$>{break}<$>',
+    '91c4': '<$>{break}<$>',
+    '13e3': '<$>{break}<$>',
+    '91c8': '<$>{break}<$>',
+    '91d0': '<$>{break}<$>',
+    '13e5': '<$>{break}<$>',
+    '13c8': '<$>{break}<$>',
+    '16cb': '<$>{break}<$>',
+    '16cd': '<$>{break}<$>',
+    '16ce': '<$>{break}<$>{italic}<$>',
+    '916d': '<$>{break}<$>',
+    '916e': '<$>{break}<$>{italic}<$>',
+    '916b': '<$>{break}<$>',
+    '91d5': '<$>{break}<$>',
+    '137a': '<$>{break}<$>',
+    '91cb': '<$>{break}<$>',
+    '91ce': '<$>{break}<$>{italic}<$>',
+    '91cd': '<$>{break}<$>',
+    '13ec': '<$>{break}<$>',
+    '13c1': '<$>{break}<$>',
+    '13ea': '<$>{break}<$>',
+    '13ef': '<$>{break}<$>{italic}<$>',
+    '94f2': '<$>{break}<$>',
+    '97fb': '<$>{break}<$>',
+    '97fc': '<$>{break}<$>',
+    '1658': '<$>{break}<$>',
+    '97fd': '<$>{break}<$>',
+    '97fe': '<$>{break}<$>',
+    '1652': '<$>{break}<$>',
+    '1651': '<$>{break}<$>',
+    '1657': '<$>{break}<$>',
+    '1654': '<$>{break}<$>',
+    '10cb': '<$>{break}<$>',
+    '97f2': '<$>{break}<$>',
+    '97f1': '<$>{break}<$>',
+    '97f7': '<$>{break}<$>',
+    '97f4': '<$>{break}<$>',
+    '165b': '<$>{break}<$>',
+    '97f8': '<$>{break}<$>',
+    '165d': '<$>{break}<$>',
+    '165e': '<$>{break}<$>',
+    '15cd': '<$>{break}<$>',
+    '10cd': '<$>{break}<$>',
+    '9767': '<$>{break}<$>',
+    '9249': '<$>{break}<$>',
+    '1349': '<$>{break}<$>',
+    '91d9': '<$>{break}<$>',
+    '1340': '<$>{break}<$>',
+    '91d3': '<$>{break}<$>',
+    '9243': '<$>{break}<$>',
+    '1343': '<$>{break}<$>',
+    '91d6': '<$>{break}<$>',
+    '1345': '<$>{break}<$>',
+    '1346': '<$>{break}<$>',
+    '9246': '<$>{break}<$>',
+    '94e9': '<$>{break}<$>',
+    '94e5': '<$>{break}<$>',
+    '94e6': '<$>{break}<$>',
+    '94e0': '<$>{break}<$>',
+    '94e3': '<$>{break}<$>',
+    '15ea': '<$>{break}<$>',
+    '15ec': '<$>{break}<$>',
+    '15ef': '<$>{break}<$>{italic}<$>',
+    '16fe': '<$>{break}<$>',
+    '16fd': '<$>{break}<$>',
+    '16fc': '<$>{break}<$>',
+    '16fb': '<$>{break}<$>',
+    '1367': '<$>{break}<$>',
+    '94ef': '<$>{break}<$>{italic}<$>',
+    '94ea': '<$>{break}<$>',
+    '94ec': '<$>{break}<$>',
+    '924a': '<$>{break}<$>',
+    '91dc': '<$>{break}<$>',
+    '924c': '<$>{break}<$>',
+    '91da': '<$>{break}<$>',
+    '91df': '<$>{break}<$>',
+    '134f': '<$>{break}<$>{italic}<$>',
+    '924f': '<$>{break}<$>{italic}<$>',
+    '16f8': '<$>{break}<$>',
+    '16f7': '<$>{break}<$>',
+    '16f4': '<$>{break}<$>',
+    '16f2': '<$>{break}<$>',
+    '16f1': '<$>{break}<$>',
+    '15e0': '<$>{break}<$>',
+    '15e3': '<$>{break}<$>',
+    '15e5': '<$>{break}<$>',
+    '15e6': '<$>{break}<$>',
+    '15e9': '<$>{break}<$>',
+    '9757': '<$>{break}<$>',
+    '9754': '<$>{break}<$>',
+    '9752': '<$>{break}<$>',
+    '9751': '<$>{break}<$>',
+    '9758': '<$>{break}<$>',
+    '92f1': '<$>{break}<$>',
+    '104c': '<$>{break}<$>',
+    '104a': '<$>{break}<$>',
+    '104f': '<$>{break}<$>{italic}<$>',
+    '105d': '<$>{break}<$>',
+    '92f8': '<$>{break}<$>',
+    '975e': '<$>{break}<$>',
+    '975d': '<$>{break}<$>',
+    '975b': '<$>{break}<$>',
+    '1043': '<$>{break}<$>',
+    '1040': '<$>{break}<$>',
+    '1046': '<$>{break}<$>',
+    '1045': '<$>{break}<$>',
+    '1049': '<$>{break}<$>',
+    '9479': '<$>{break}<$>',
+    '917f': '<$>{break}<$>',
+    '9470': '<$>{break}<$>',
+    '9476': '<$>{break}<$>',
+    '917a': '<$>{break}<$>',
+    '9475': '<$>{break}<$>',
+    '927a': '<$>{break}<$>',
+    '927f': '<$>{break}<$>',
+    '134a': '<$>{break}<$>',
+    '15fb': '<$>{break}<$>',
+    '15fc': '<$>{break}<$>',
+    '15fd': '<$>{break}<$>',
+    '15fe': '<$>{break}<$>',
+    '1546': '<$>{break}<$>',
+    '1545': '<$>{break}<$>',
+    '1543': '<$>{break}<$>',
+    '1540': '<$>{break}<$>',
+    '1549': '<$>{break}<$>',
+    '13fd': '<$>{break}<$>',
+    '13fe': '<$>{break}<$>',
+    '13fb': '<$>{break}<$>',
+    '13fc': '<$>{break}<$>',
+    '92e9': '<$>{break}<$>',
+    '92e6': '<$>{break}<$>',
+    '9458': '<$>{break}<$>',
+    '92e5': '<$>{break}<$>',
+    '92e3': '<$>{break}<$>',
+    '92e0': '<$>{break}<$>',
+    '9270': '<$>{break}<$>',
+    '9273': '<$>{break}<$>',
+    '9275': '<$>{break}<$>',
+    '9276': '<$>{break}<$>',
+    '15f1': '<$>{break}<$>',
+    '15f2': '<$>{break}<$>',
+    '15f4': '<$>{break}<$>',
+    '15f7': '<$>{break}<$>',
+    '9179': '<$>{break}<$>',
+    '9176': '<$>{break}<$>',
+    '9175': '<$>{break}<$>',
+    '947a': '<$>{break}<$>',
+    '9173': '<$>{break}<$>',
+    '9170': '<$>{break}<$>',
+    '13f7': '<$>{break}<$>',
+    '13f4': '<$>{break}<$>',
+    '13f2': '<$>{break}<$>',
+    '13f1': '<$>{break}<$>',
+    '92ef': '<$>{break}<$>{italic}<$>',
+    '92ec': '<$>{break}<$>',
+    '13f8': '<$>{break}<$>',
+    '92ea': '<$>{break}<$>',
+    '154f': '<$>{break}<$>{italic}<$>',
+    '154c': '<$>{break}<$>',
+    '154a': '<$>{break}<$>',
+    '16c4': '<$>{break}<$>',
+    '16c8': '<$>{break}<$>',
+    '97c8': '<$>{break}<$>',
+    '164f': '<$>{break}<$>{italic}<$>',
+    '164a': '<$>{break}<$>',
+    '164c': '<$>{break}<$>',
+    '1645': '<$>{break}<$>',
+    '1646': '<$>{break}<$>',
+    '1640': '<$>{break}<$>',
+    '1643': '<$>{break}<$>',
+    '1649': '<$>{break}<$>',
+    '94df': '<$>{break}<$>',
+    '94dc': '<$>{break}<$>',
+    '94da': '<$>{break}<$>',
+    '135b': '<$>{break}<$>',
+    '135e': '<$>{break}<$>',
+    '135d': '<$>{break}<$>',
+    '1370': '<$>{break}<$>',
+    '9240': '<$>{break}<$>',
+    '13e9': '<$>{break}<$>',
+    '1375': '<$>{break}<$>',
+    '1679': '<$>{break}<$>',
+    '1358': '<$>{break}<$>',
+    '1352': '<$>{break}<$>',
+    '1351': '<$>{break}<$>',
+    '1376': '<$>{break}<$>',
+    '1357': '<$>{break}<$>',
+    '1354': '<$>{break}<$>',
+    '1379': '<$>{break}<$>',
+    '94d9': '<$>{break}<$>',
+    '94d6': '<$>{break}<$>',
+    '94d5': '<$>{break}<$>',
+    '15462': '<$>{break}<$>',
+    '94d3': '<$>{break}<$>',
+    '94d0': '<$>{break}<$>',
+    '13e0': '<$>{break}<$>',
+    '13e6': '<$>{break}<$>',
+    '976b': '<$>{break}<$>',
+    '15c4': '<$>{break}<$>',
+    '15c2': '<$>{break}<$>',
+    '15c1': '<$>{break}<$>',
+    '976e': '<$>{break}<$>{italic}<$>',
+    '134c': '<$>{break}<$>',
+    '15c8': '<$>{break}<$>',
+    '92c8': '<$>{break}<$>',
+    '16e9': '<$>{break}<$>',
+    '16e3': '<$>{break}<$>',
+    '16e0': '<$>{break}<$>',
+    '16e6': '<$>{break}<$>',
+    '16e5': '<$>{break}<$>',
+    '91e5': '<$>{break}<$>',
+    '91e6': '<$>{break}<$>',
+    '91e0': '<$>{break}<$>',
+    '91e3': '<$>{break}<$>',
+    '13c4': '<$>{break}<$>',
+    '13c7': '<$>{break}<$>',
+    '91e9': '<$>{break}<$>',
+    '13c2': '<$>{break}<$>',
+    '9762': '<$>{break}<$>',
+    '15ce': '<$>{break}<$>{italic}<$>',
+    '9761': '<$>{break}<$>',
+    '15cb': '<$>{break}<$>',
+    '9764': '<$>{break}<$>',
+    '9768': '<$>{break}<$>',
+    '91ef': '<$>{break}<$>{italic}<$>',
+    '91ea': '<$>{break}<$>',
+    '91ec': '<$>{break}<$>',
+    '13ce': '<$>{break}<$>{italic}<$>',
+    '13cd': '<$>{break}<$>',
+    '97da': '<$>{break}<$>',
+    '13cb': '<$>{break}<$>',
+    '13462': '<$>{break}<$>',
+    '16ec': '<$>{break}<$>',
+    '16ea': '<$>{break}<$>',
+    '16ef': '<$>{break}<$>{italic}<$>',
+    '97c1': '<$>{break}<$>',
+    '97c2': '<$>{break}<$>',
+    '97c4': '<$>{break}<$>',
+    '97c7': '<$>{break}<$>',
+    '92cd': '<$>{break}<$>',
+    '92ce': '<$>{break}<$>{italic}<$>',
+    '92cb': '<$>{break}<$>',
+    '92da': '<$>{break}<$>',
+    '92dc': '<$>{break}<$>',
+    '92df': '<$>{break}<$>',
+    '97df': '<$>{break}<$>',
+    '155b': '<$>{break}<$>',
+    '155e': '<$>{break}<$>',
+    '155d': '<$>{break}<$>',
+    '97dc': '<$>{break}<$>',
+    '1675': '<$>{break}<$>',
+    '1676': '<$>{break}<$>',
+    '1670': '<$>{break}<$>',
+    '1673': '<$>{break}<$>',
+    '16462': '<$>{break}<$>',
+    '97cb': '<$>{break}<$>',
+    '97ce': '<$>{break}<$>{italic}<$>',
+    '97cd': '<$>{break}<$>',
+    '92c4': '<$>{break}<$>',
+    '92c7': '<$>{break}<$>',
+    '92c1': '<$>{break}<$>',
+    '92c2': '<$>{break}<$>',
+    '1551': '<$>{break}<$>',
+    '97d5': '<$>{break}<$>',
+    '97d6': '<$>{break}<$>',
+    '1552': '<$>{break}<$>',
+    '97d0': '<$>{break}<$>',
+    '1554': '<$>{break}<$>',
+    '1557': '<$>{break}<$>',
+    '97d3': '<$>{break}<$>',
+    '1558': '<$>{break}<$>',
+    '167f': '<$>{break}<$>',
+    '137f': '<$>{break}<$>',
+    '167a': '<$>{break}<$>',
+    '92d9': '<$>{break}<$>',
+    '92d0': '<$>{break}<$>',
+    '92d3': '<$>{break}<$>',
+    '92d5': '<$>{break}<$>',
+    '92d6': '<$>{break}<$>',
+    '10dc': '<$>{break}<$>',
+    '9262': '<$>{break}<$>',
+    '9261': '<$>{break}<$>',
+    '91f8': '<$>{break}<$>',
+    '10df': '<$>{break}<$>',
+    '9264': '<$>{break}<$>',
+    '91f4': '<$>{break}<$>',
+    '91f7': '<$>{break}<$>',
+    '91f1': '<$>{break}<$>',
+    '91f2': '<$>{break}<$>',
+    '97d9': '<$>{break}<$>',
+    '9149': '<$>{break}<$>',
+    '9143': '<$>{break}<$>',
+    '9140': '<$>{break}<$>',
+    '9146': '<$>{break}<$>',
+    '9145': '<$>{break}<$>',
+    '9464': '<$>{break}<$>',
+    '9467': '<$>{break}<$>',
+    '9461': '<$>{break}<$>',
+    '9462': '<$>{break}<$>',
+    '9468': '<$>{break}<$>',
+    '914c': '<$>{break}<$>',
+    '914a': '<$>{break}<$>',
+    '914f': '<$>{break}<$>{italic}<$>',
+    '10d3': '<$>{break}<$>',
+    '926b': '<$>{break}<$>',
+    '10d0': '<$>{break}<$>',
+    '10d6': '<$>{break}<$>',
+    '926e': '<$>{break}<$>{italic}<$>',
+    '926d': '<$>{break}<$>',
+    '91fd': '<$>{break}<$>',
+    '91fe': '<$>{break}<$>',
+    '10d9': '<$>{break}<$>',
+    '91fb': '<$>{break}<$>',
+    '91fc': '<$>{break}<$>',
+    '946e': '<$>{break}<$>{italic}<$>',
+    '946d': '<$>{break}<$>',
+    '946b': '<$>{break}<$>',
+    '10da': '<$>{break}<$>',
+    '10d5': '<$>{break}<$>',
+    '9267': '<$>{break}<$>',
+    '9268': '<$>{break}<$>',
+    '16df': '<$>{break}<$>',
+    '16da': '<$>{break}<$>',
+    '16dc': '<$>{break}<$>',
+    '9454': '<$>{break}<$>',
+    '9457': '<$>{break}<$>',
+    '9451': '<$>{break}<$>',
+    '9452': '<$>{break}<$>',
+    '136d': '<$>{break}<$>',
+    '136e': '<$>{break}<$>{italic}<$>',
+    '136b': '<$>{break}<$>',
+    '13d9': '<$>{break}<$>',
+    '13da': '<$>{break}<$>',
+    '13dc': '<$>{break}<$>',
+    '13df': '<$>{break}<$>',
+    '1568': '<$>{break}<$>',
+    '1561': '<$>{break}<$>',
+    '1564': '<$>{break}<$>',
+    '1567': '<$>{break}<$>',
+    '16d5': '<$>{break}<$>',
+    '16d6': '<$>{break}<$>',
+    '16d0': '<$>{break}<$>',
+    '16d3': '<$>{break}<$>',
+    '945d': '<$>{break}<$>',
+    '945e': '<$>{break}<$>',
+    '16d9': '<$>{break}<$>',
+    '945b': '<$>{break}<$>',
+    '156b': '<$>{break}<$>',
+    '156d': '<$>{break}<$>',
+    '156e': '<$>{break}<$>{italic}<$>',
+    '105b': '<$>{break}<$>',
+    '1364': '<$>{break}<$>',
+    '1368': '<$>{break}<$>',
+    '1361': '<$>{break}<$>',
+    '13d0': '<$>{break}<$>',
+    '13d3': '<$>{break}<$>',
+    '13d5': '<$>{break}<$>',
+    '13d6': '<$>{break}<$>',
+    '97a1': '',
+    '97a2': '',
+    '9723': '',
+    '94a1': '',
+    '94a4': '',
+    '94ad': '',
+    '1020': '',
+    '10a1': '',
+    '10a2': '',
+    '1023': '',
+    '10a4': '',
+    '1025': '',
+    '1026': '',
+    '10a7': '',
+    '10a8': '',
+    '1029': '',
+    '102a': '',
+    '10ab': '',
+    '102c': '',
+    '10ad': '',
+    '10ae': '',
+    '102f': '',
+    '97ad': '',
+    '97a4': '',
+    '9725': '',
+    '9726': '',
+    '97a7': '',
+    '97a8': '',
+    '9729': '',
+    '972a': '',
+    '9120': '<$>{end-italic}<$>',
+    '91a1': '',
+    '91a2': '',
+    '9123': '',
+    '91a4': '',
+    '9125': '',
+    '9126': '',
+    '91a7': '',
+    '91a8': '',
+    '9129': '',
+    '912a': '',
+    '91ab': '',
+    '912c': '',
+    '91ad': '',
+    '97ae': '',
+    '972f': '',
+    '91ae': '<$>{italic}<$>',
+    '912f': '<$>{italic}<$>',
+    '94a8': '',
+    '9423': '',
+    '94a2': '',
+}
+
+
+CHARACTERS = {
+    '20': ' ',
+    'a1': '!',
+    'a2': '"',
+    '23': '#',
+    'a4': '$',
+    '25': '%',
+    '26': '&',
+    'a7': '\'',
+    'a8': '(',
+    '29': ')',
+    '2a': 'á',
+    'ab': '+',
+    '2c': ',',
+    'ad': '-',
+    'ae': '.',
+    '2f': '/',
+    'b0': '0',
+    '31': '1',
+    '32': '2',
+    'b3': '3',
+    '34': '4',
+    'b5': '5',
+    'b6': '6',
+    '37': '7',
+    '38': '8',
+    'b9': '9',
+    'ba': ':',
+    '3b': ';',
+    'bc': '<',
+    '3d': '=',
+    '3e': '>',
+    'bf': '?',
+    '40': '@',
+    'c1': 'A',
+    'c2': 'B',
+    '43': 'C',
+    'c4': 'D',
+    '45': 'E',
+    '46': 'F',
+    'c7': 'G',
+    'c8': 'H',
+    '49': 'I',
+    '4a': 'J',
+    'cb': 'K',
+    '4c': 'L',
+    'cd': 'M',
+    'ce': 'N',
+    '4f': 'O',
+    'd0': 'P',
+    '51': 'Q',
+    '52': 'R',
+    'd3': 'S',
+    '54': 'T',
+    'd5': 'U',
+    'd6': 'V',
+    '57': 'W',
+    '58': 'X',
+    'd9': 'Y',
+    'da': 'Z',
+    '5b': '[',
+    'dc': 'é',
+    '5d': ']',
+    '5e': 'í',
+    'df': 'ó',
+    'e0': 'ú',
+    '61': 'a',
+    '62': 'b',
+    'e3': 'c',
+    '64': 'd',
+    'e5': 'e',
+    'e6': 'f',
+    '67': 'g',
+    '68': 'h',
+    'e9': 'i',
+    'ea': 'j',
+    '6b': 'k',
+    'ec': 'l',
+    '6d': 'm',
+    '6e': 'n',
+    'ef': 'o',
+    '70': 'p',
+    'f1': 'q',
+    'f2': 'r',
+    '73': 's',
+    'f4': 't',
+    '75': 'u',
+    '76': 'v',
+    'f7': 'w',
+    'f8': 'x',
+    '79': 'y',
+    '7a': 'z',
+    'fb': 'ç',
+    '7c': '÷',
+    'fd': 'Ñ',
+    'fe': 'ñ',
+    '7f': '',
+    '80': ''
+}
+
+
+SPECIAL_CHARS = {
+    '91b0': '®',
+    '9131': '°',
+    '9132': '½',
+    '91b3': '¿',
+    '91b4': '™',
+    '91b5': '¢',
+    '91b6': '£',
+    '9137': '♪',
+    '9138': 'à',
+    '91b9': ' ',
+    '91ba': 'è',
+    '913b': 'â',
+    '91bc': 'ê',
+    '913d': 'î',
+    '913e': 'ô',
+    '91bf': 'û'
+}
+
+
+EXTENDED_CHARS = {
+    '9220': 'Á',
+    '92a1': 'É',
+    '92a2': 'Ó',
+    '9223': 'Ú',
+    '92a4': 'Ü',
+    '9225': 'ü',
+    '9226': '‘',
+    '92a7': '¡',
+    '92a8': '*',
+    '9229': '’',
+    '922a': '—',
+    '92ab': '©',
+    '922c': '℠',
+    '92ad': '•',
+    '92ae': '“',
+    '922f': '”',
+    '92b0': 'À',
+    '9231': 'Â',
+    '9232': 'Ç',
+    '92b3': 'È',
+    '9234': 'Ê',
+    '92b5': 'Ë',
+    '92b6': 'ë',
+    '9237': 'Î',
+    '9238': 'Ï',
+    '92b9': 'ï',
+    '92ba': 'Ô',
+    '923b': 'Ù',
+    '92bc': 'ù',
+    '923d': 'Û',
+    '923e': '«',
+    '92bf': '»',
+    '1320': 'Ã',
+    '13a1': 'ã',
+    '13a2': 'Í',
+    '1323': 'Ì',
+    '13a4': 'ì',
+    '1325': 'Ò',
+    '1326': 'ò',
+    '13a7': 'Õ',
+    '13a8': 'õ',
+    '1329': '{',
+    '132a': '}',
+    '13ab': '\\',
+    '132c': '^',
+    '13ad': '_',
+    '13ae': '¦',
+    '132f': '~',
+    '13b0': 'Ä',
+    '1331': 'ä',
+    '1332': 'Ö',
+    '13b3': 'ö',
+    '1334': 'ß',
+    '13b5': '¥',
+    '13b6': '¤',
+    '1337': '|',
+    '1338': 'Å',
+    '13b9': 'å',
+    '13ba': 'Ø',
+    '133b': 'ø',
+    '13bc': '┌',
+    '133d': '┐',
+    '133e': '└',
+    '13bf': '┘',
+}
+
+
+# Cursor positioning codes
+PAC_HIGH_BYTE_BY_ROW = [
+    'xx',
+    '91',
+    '91',
+    '92',
+    '92',
+    '15',
+    '15',
+    '16',
+    '16',
+    '97',
+    '97',
+    '10',
+    '13',
+    '13',
+    '94',
+    '94'
+]
+PAC_LOW_BYTE_BY_ROW_RESTRICTED = [
+    'xx',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    '70',
+    'd0',
+    'd0',
+    '70',
+    'd0',
+    '70'
+]
+
+# High order bytes come first, then each key contains a list of low bytes.
+# Any of the values in that list, coupled with the high order byte will
+# map to the (row, column) tuple.
+# This particular dictionary will get transformed to a more suitable form for
+# usage like PAC_BYTES_TO_POSITIONING_MAP[u'91'][u'd6'] = (1, 12)
+PAC_BYTES_TO_POSITIONING_MAP = {
+    '91': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (1, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (2, 0),  # noqa
+        ('52', 'd3'): (1, 4),
+        ('54', 'd5'): (1, 8),
+        ('d6', '57'): (1, 12),
+        ('58', 'd9'): (1, 16),
+        ('da', '5b'): (1, 20),
+        ('dc', '5d'): (1, 24),
+        ('5e', 'df'): (1, 28),
+
+        ('f2', '73'): (2, 4),
+        ('f4', '75'): (2, 8),
+        ('76', 'f7'): (2, 12),
+        ('f8', '79'): (2, 16),
+        ('7a', 'fb'): (2, 20),
+        ('7c', 'fd'): (2, 24),
+        ('fe', '7f'): (2, 28)
+    },
+    '92': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (3, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (4, 0),  # noqa
+        ('52', 'd3'): (3, 4),
+        ('54', 'd5'): (3, 8),
+        ('d6', '57'): (3, 12),
+        ('58', 'd9'): (3, 16),
+        ('da', '5b'): (3, 20),
+        ('dc', '5d'): (3, 24),
+        ('5e', 'df'): (3, 28),
+
+        ('f2', '73'): (4, 4),
+        ('f4', '75'): (4, 8),
+        ('76', 'f7'): (4, 12),
+        ('f8', '79'): (4, 16),
+        ('7a', 'fb'): (4, 20),
+        ('7c', 'fd'): (4, 24),
+        ('fe', '7f'): (4, 28)
+    },
+    '15': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (5, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (6, 0),  # noqa
+        ('52', 'd3'): (5, 4),
+        ('54', 'd5'): (5, 8),
+        ('d6', '57'): (5, 12),
+        ('58', 'd9'): (5, 16),
+        ('da', '5b'): (5, 20),
+        ('dc', '5d'): (5, 24),
+        ('5e', 'df'): (5, 28),
+
+        ('f2', '73'): (6, 4),
+        ('f4', '75'): (6, 8),
+        ('76', 'f7'): (6, 12),
+        ('f8', '79'): (6, 16),
+        ('7a', 'fb'): (6, 20),
+        ('7c', 'fd'): (6, 24),
+        ('fe', '7f'): (6, 28)
+    },
+    '16': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (7, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (8, 0),  # noqa
+        ('52', 'd3'): (7, 4),
+        ('54', 'd5'): (7, 8),
+        ('d6', '57'): (7, 12),
+        ('58', 'd9'): (7, 16),
+        ('da', '5b'): (7, 20),
+        ('dc', '5d'): (7, 24),
+        ('5e', 'df'): (7, 28),
+
+        ('f2', '73'): (8, 4),
+        ('f4', '75'): (8, 8),
+        ('76', 'f7'): (8, 12),
+        ('f8', '79'): (8, 16),
+        ('7a', 'fb'): (8, 20),
+        ('7c', 'fd'): (8, 24),
+        ('fe', '7f'): (8, 28)
+    },
+    '97': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (9, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (10, 0),  # noqa
+        ('52', 'd3'): (9, 4),
+        ('54', 'd5'): (9, 8),
+        ('d6', '57'): (9, 12),
+        ('58', 'd9'): (9, 16),
+        ('da', '5b'): (9, 20),
+        ('dc', '5d'): (9, 24),
+        ('5e', 'df'): (9, 28),
+
+        ('f2', '73'): (10, 4),
+        ('f4', '75'): (10, 8),
+        ('76', 'f7'): (10, 12),
+        ('f8', '79'): (10, 16),
+        ('7a', 'fb'): (10, 20),
+        ('7c', 'fd'): (10, 24),
+        ('fe', '7f'): (10, 28)
+    },
+    '10': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (11, 0),  # noqa
+        ('52', 'd3'): (11, 4),
+        ('54', 'd5'): (11, 8),
+        ('d6', '57'): (11, 12),
+        ('58', 'd9'): (11, 16),
+        ('da', '5b'): (11, 20),
+        ('dc', '5d'): (11, 24),
+        ('5e', 'df'): (11, 28),
+    },
+    '13': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (12, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (13, 0),  # noqa
+        ('52', 'd3'): (12, 4),
+        ('54', 'd5'): (12, 8),
+        ('d6', '57'): (12, 12),
+        ('58', 'd9'): (12, 16),
+        ('da', '5b'): (12, 20),
+        ('dc', '5d'): (12, 24),
+        ('5e', 'df'): (12, 28),
+
+        ('f2', '73'): (13, 4),
+        ('f4', '75'): (13, 8),
+        ('76', 'f7'): (13, 12),
+        ('f8', '79'): (13, 16),
+        ('7a', 'fb'): (13, 20),
+        ('7c', 'fd'): (13, 24),
+        ('fe', '7f'): (13, 28)
+    },
+    '94': {
+        ('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (14, 0),  # noqa
+        ('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (15, 0),  # noqa
+        ('52', 'd3'): (14, 4),
+        ('54', 'd5'): (14, 8),
+        ('d6', '57'): (14, 12),
+        ('58', 'd9'): (14, 16),
+        ('da', '5b'): (14, 20),
+        ('dc', '5d'): (14, 24),
+        ('5e', 'df'): (14, 28),
+
+        ('f2', '73'): (15, 4),
+        ('f4', '75'): (15, 8),
+        ('76', 'f7'): (15, 12),
+        ('f8', '79'): (15, 16),
+        ('7a', 'fb'): (15, 20),
+        ('7c', 'fd'): (15, 24),
+        ('fe', '7f'): (15, 28)
+    }
+}
+
+
+def _create_position_to_bytes_map(bytes_to_pos):
+    result = {}
+    for high_byte, low_byte_dict in list(bytes_to_pos.items()):
+
+        # must contain mappings to column, to the tuple of possible values
+        for low_byte_list in list(low_byte_dict.keys()):
+            column = bytes_to_pos[high_byte][low_byte_list][1]
+
+            row = bytes_to_pos[high_byte][low_byte_list][0]
+            if row not in result:
+                result[row] = {}
+
+            result[row][column] = (
+                tuple(product([high_byte], low_byte_list)))
+    return result
+
+# (Almost) the reverse of PAC_BYTES_TO_POSITIONING_MAP. Call with arguments
+# like for example [15][4] to get the tuple ((u'94', u'f2'), (u'94', u'73'))
+POSITIONING_TO_PAC_MAP = _create_position_to_bytes_map(
+    PAC_BYTES_TO_POSITIONING_MAP
+)
+
+
+def _restructure_bytes_to_position_map(byte_to_pos_map):
+    return {
+        k_: {
+            low_byte: byte_to_pos_map[k_][low_byte_list]
+            for low_byte_list in list(v_.keys()) for low_byte in low_byte_list
+        }
+        for k_, v_ in list(byte_to_pos_map.items())
+    }
+
+# Now use the dict with arguments like [u'91'][u'75'] directly.
+PAC_BYTES_TO_POSITIONING_MAP = _restructure_bytes_to_position_map(
+    PAC_BYTES_TO_POSITIONING_MAP)
+
+
+# Inverted character lookup
+CHARACTER_TO_CODE = {
+    character: code
+    for code, character in viewitems(CHARACTERS)
+}
+
+SPECIAL_OR_EXTENDED_CHAR_TO_CODE = {
+    character: code for code, character in viewitems(EXTENDED_CHARS)
+}
+SPECIAL_OR_EXTENDED_CHAR_TO_CODE.update(
+    {character: code for code, character in viewitems(SPECIAL_CHARS)}
+)
+
+# Time to transmit a single codeword = 1 second / 29.97
+MICROSECONDS_PER_CODEWORD = 1000.0 * 1000.0 / (30.0 * 1000.0 / 1001.0)
+
+
+HEADER = 'Scenarist_SCC V1.0'
--- a/utils/modules/pycaption/scc/specialized_collections.py
+++ b/utils/modules/pycaption/scc/specialized_collections.py
@@ -0,0 +1,823 @@
+from ..base import CaptionList, Caption, CaptionNode
+from ..geometry import (UnitEnum, Size, Layout, Point, Alignment,
+                        VerticalAlignmentEnum, HorizontalAlignmentEnum)
+
+from .constants import PAC_BYTES_TO_POSITIONING_MAP, COMMANDS
+import collections
+
+
+class PreCaption(object):
+    """
+    The Caption class has been refactored and now its instances must be used as
+    immutable objects. Some of the code in this module, however, relied on the
+    fact that Caption instances were mutable. For backwards compatibility,
+    therefore, this class was created to work as a mutable caption data holder
+    used to eventually instantiate an actual Caption object.
+    """
+
+    def __init__(self, start=0, end=0):
+        self.start = start
+        self.end = end
+        self.nodes = []
+        self.style = {}
+        self.layout_info = None
+
+    def to_real_caption(self):
+        return Caption(
+            self.start, self.end, self.nodes, self.style, self.layout_info
+        )
+
+
+class TimingCorrectingCaptionList(list):
+    """List of captions. When appending new elements, it will correct the end time
+    of the last ones, so they end when the new caption gets added.
+
+    "last ones" could mean the last caption `append`ed or all of the last
+    captions with which this list was `extended`
+
+    Also, doesn't allow Nones or empty captions
+    """
+    def __init__(self, *args, **kwargs):
+        super(TimingCorrectingCaptionList, self).__init__(*args, **kwargs)
+        self._last_batch = ()
+
+    def append(self, p_object):
+        """When appending a new caption to the list, make sure the last one
+        has an end. Also, don't add empty captions
+
+        :type p_object: Caption | None
+        """
+        if p_object is None or not p_object.nodes:
+            return
+
+        self._update_last_batch(self._last_batch, p_object)
+
+        self._last_batch = (p_object,)
+
+        super(TimingCorrectingCaptionList, self).append(p_object)
+
+    def extend(self, iterable):
+        """Adds the elements in the iterable to the list, regarding the first
+        caption's start time as the end time for the previously added
+        caption(s)
+
+        :param iterable: an iterable of Caption instances
+        """
+        appendable_items = [item for item in iterable if item and item.nodes]
+        self._update_last_batch(self._last_batch, *appendable_items)
+
+        self._last_batch = tuple(appendable_items)
+
+        super(TimingCorrectingCaptionList, self).extend(appendable_items)
+
+    @staticmethod
+    def _update_last_batch(batch, *new_captions):
+        """Given a batch of captions, sets their end time equal to the start
+        time of the first caption in *new_captions
+
+        The start time of the first caption in new_captions should never be 0.
+        This means an invalid SCC file.
+
+        :type batch: tuple[Caption]
+        :type new_captions: tuple[Caption]
+        """
+        if not new_captions:
+            return
+        if not new_captions[0]:
+            return
+        if not new_captions[0].nodes:
+            return
+
+        new_caption = new_captions[0]
+
+        if batch and batch[-1].end == 0:
+            for caption in batch:
+                caption.end = new_caption.start
+
+
+class NotifyingDict(dict):
+    """Dictionary-like object, that treats one key as 'active',
+    and notifies observers if the active key changed
+    """
+    # Need an unhashable object as initial value for the active key.
+    # That way we're sure this was never a key in the dict.
+    _guard = {}
+
+    def __init__(self, *args, **kwargs):
+        super(NotifyingDict, self).__init__(*args, **kwargs)
+        self.active_key = self._guard
+        self.observers = []
+
+    def set_active(self, key):
+        """Sets the active key
+
+        :param key: any hashable object
+        """
+        if key not in self:
+            raise ValueError('No such key present')
+
+        # Notify observers of the change
+        if key != self.active_key:
+            for observer in self.observers:
+                observer(self.active_key, key)
+
+        self.active_key = key
+
+    def get_active(self):
+        """Returns the value corresponding to the active key
+        """
+        if self.active_key is self._guard:
+            raise KeyError('No active key set')
+
+        return self[self.active_key]
+
+    def add_change_observer(self, observer):
+        """Receives a callable function, which it will call if the active
+        element changes.
+
+        The observer will receive 2 positional arguments: the old and new key
+
+        :param observer: any callable that can be called with 2 positional
+            arguments
+        """
+        if not isinstance(observer, collections.Callable):
+            raise TypeError('The observer should be callable')
+
+        self.observers.append(observer)
+
+
+class CaptionCreator(object):
+    """Creates and maintains a collection of Captions
+    """
+    def __init__(self):
+        self._collection = TimingCorrectingCaptionList()
+
+        # subset of self._collection;
+        # captions here will be susceptible to time corrections
+        self._still_editing = []
+
+    def correct_last_timing(self, end_time, force=False):
+        """Called to set the time on the last Caption(s) stored with no end
+        time
+
+        :type force: bool
+        :param force: Set the end time even if there's already an end time
+
+        :type end_time: float
+        :param end_time: microseconds; the end of the caption;
+        """
+        if not self._still_editing:
+            return
+
+        if force:
+            # Select all last captions
+            captions_to_correct = self._still_editing
+        elif self._still_editing[-1].end == 0:
+            # Only select the last captions if they haven't gotten their
+            # end time set yet
+            captions_to_correct = self._still_editing
+        else:
+            return
+
+        for caption in captions_to_correct:
+            caption.end = end_time
+
+    def create_and_store(self, node_buffer, start):
+        """Interpreter method, will convert the buffer into one or more Caption
+        objects, storing them internally.
+
+        This method relies on the InstructionNodeCreator's ability to generate
+        InstructionNodes properly, so at this point we can convert
+        _InstructionNodes nodes almost 1:1 to CaptionNodes
+
+        :type node_buffer: InstructionNodeCreator
+
+        :type start: float
+        :param start: the start time in microseconds
+        """
+        if node_buffer.is_empty():
+            return
+
+        caption = PreCaption()
+        caption.start = start
+        caption.end = 0  # Not yet known; filled in later
+        self._still_editing = [caption]
+
+        for instruction in node_buffer:
+            # skip empty elements
+            if instruction.is_empty():
+                continue
+
+            elif instruction.requires_repositioning():
+                caption = PreCaption()
+                caption.start = start
+                caption.end = 0
+                self._still_editing.append(caption)
+
+            # handle line breaks
+            elif instruction.is_explicit_break():
+                caption.nodes.append(CaptionNode.create_break(
+                    layout_info=_get_layout_from_tuple(instruction.position)
+                ))
+
+            # handle open italics
+            elif instruction.sets_italics_on():
+                caption.nodes.append(
+                    CaptionNode.create_style(
+                        True, {'italics': True},
+                        layout_info=_get_layout_from_tuple(
+                            instruction.position
+                        ))
+                )
+
+            # handle clone italics
+            elif instruction.sets_italics_off():
+                caption.nodes.append(
+                    CaptionNode.create_style(
+                        False, {'italics': True},
+                        layout_info=_get_layout_from_tuple(
+                            instruction.position)
+                    ))
+
+            # handle text
+            elif instruction.is_text_node():
+                layout_info = _get_layout_from_tuple(instruction.position)
+                caption.nodes.append(
+                    CaptionNode.create_text(
+                        instruction.get_text(), layout_info=layout_info),
+                )
+                caption.layout_info = layout_info
+
+        self._collection.extend(self._still_editing)
+
+    def get_all(self):
+        """Returns the Caption collection as a CaptionList
+
+        :rtype: CaptionList
+        """
+        caption_list = CaptionList()
+        for precap in self._collection:
+            caption_list.append(precap.to_real_caption())
+        return caption_list
+
+
+class InstructionNodeCreator(object):
+    """Creates _InstructionNode instances from characters and commands, storing
+    them internally
+    """
+    def __init__(self, collection=None, position_tracker=None):
+        """
+        :param collection: an optional collection of nodes
+
+        :param position_tracker:
+        :return:
+        """
+        if not collection:
+            self._collection = []
+        else:
+            self._collection = collection
+
+        self._position_tracer = position_tracker
+
+    def is_empty(self):
+        """Whether any text was added to the buffer
+        """
+        return not any(element.text for element in self._collection)
+
+    def add_chars(self, *chars):
+        """Adds characters to a text node (last text node, or a new one)
+
+        :param chars: tuple containing text (unicode)
+        """
+        if not chars:
+            return
+
+        current_position = self._position_tracer.get_current_position()
+
+        # get or create a usable node
+        if (self._collection and self._collection[-1].is_text_node()
+                and not self._position_tracer.is_repositioning_required()):
+            node = self._collection[-1]
+        else:
+            # create first node
+            node = _InstructionNode(position=current_position)
+            self._collection.append(node)
+
+        # handle a simple line break
+        if self._position_tracer.is_linebreak_required():
+            # must insert a line break here
+            self._collection.append(_InstructionNode.create_break(
+                position=current_position))
+            node = _InstructionNode.create_text(current_position)
+            self._collection.append(node)
+            self._position_tracer.acknowledge_linebreak_consumed()
+
+        # handle completely new positioning
+        elif self._position_tracer.is_repositioning_required():
+            self._collection.append(
+                _InstructionNode.create_repositioning_command(
+                    current_position
+                )
+            )
+            node = _InstructionNode.create_text(current_position)
+            self._collection.append(node)
+            self._position_tracer.acknowledge_position_changed()
+
+        node.add_chars(*chars)
+
+    def interpret_command(self, command):
+        """Given a command determines whether tu turn italics on or off,
+        or to set the positioning
+
+        This is mostly used to convert from the legacy-style commands
+
+        :type command: unicode
+        """
+        self._update_positioning(command)
+
+        text = COMMANDS.get(command, '')
+
+        if 'italic' in text:
+            if 'end' not in text:
+                self._collection.append(
+                    _InstructionNode.create_italics_style(
+                        self._position_tracer.get_current_position())
+                )
+            else:
+                self._collection.append(
+                    _InstructionNode.create_italics_style(
+                        self._position_tracer.get_current_position(),
+                        turn_on=False
+                    )
+                )
+
+    def _update_positioning(self, command):
+        """Sets the positioning information to use for the next nodes
+
+        :type command: unicode
+        """
+        if len(command) != 4:
+            return
+
+        first, second = command[:2], command[2:]
+
+        try:
+            positioning = PAC_BYTES_TO_POSITIONING_MAP[first][second]
+        except KeyError:
+            pass
+        else:
+            self._position_tracer.update_positioning(positioning)
+
+    def __iter__(self):
+        return iter(_format_italics(self._collection))
+
+    @classmethod
+    def from_list(cls, stash_list, position_tracker):
+        """Having received a list of instances of this class, creates a new
+        instance that contains all the nodes of the previous instances
+        (basically concatenates the many stashes into one)
+
+        :type stash_list: list[InstructionNodeCreator]
+        :param stash_list: a list of instances of this class
+
+        :type position_tracker: .state_machines.DefaultProvidingPositionTracker
+        :param position_tracker: state machine to be interrogated about the
+            positioning when creating a node
+
+        :rtype: InstructionNodeCreator
+        """
+        instance = cls(position_tracker=position_tracker)
+        new_collection = instance._collection
+
+        for idx, stash in enumerate(stash_list):
+            new_collection.extend(stash._collection)
+
+            # use space to separate the stashes, but don't add final space
+            if idx < len(stash_list) - 1:
+                try:
+                    instance._collection[-1].add_chars(' ')
+                except AttributeError:
+                    pass
+
+        return instance
+
+
+def _get_layout_from_tuple(position_tuple):
+    """Create a Layout object from the positioning information given
+
+    The row can have a value from 1 to 15 inclusive. (vertical positioning)
+    The column can have a value from 0 to 31 inclusive. (horizontal)
+
+    :param position_tuple: a tuple of ints (row, col)
+    :type position_tuple: tuple
+    :rtype: Layout
+    """
+    if not position_tuple:
+        return None
+
+    row, column = position_tuple
+
+    horizontal = Size(100 * column / 32.0, UnitEnum.PERCENT)
+    vertical = Size(100 * (row - 1) / 15.0, UnitEnum.PERCENT)
+    return Layout(origin=Point(horizontal, vertical),
+                  alignment=Alignment(HorizontalAlignmentEnum.LEFT,
+                                      VerticalAlignmentEnum.TOP)
+                  )
+
+
+class _InstructionNode(object):
+    """Value object, that can contain text information, or interpretable
+    commands (such as explicit line breaks or turning italics on/off).
+
+    These nodes will be aggregated into a RepresentableNode, which will then
+    be easily converted to a CaptionNode.
+    """
+    TEXT = 0
+    BREAK = 1
+    ITALICS_ON = 2
+    ITALICS_OFF = 3
+    CHANGE_POSITION = 4
+
+    def __init__(self, text=None, position=None, type_=0):
+        """
+        :type text: unicode
+        :param position: a tuple of ints (row, column)
+        :param type_: self.TEXT | self.BREAK | self.ITALICS
+        :type type_: int
+        """
+        self.text = text
+        self.position = position
+        self._type = type_
+
+    def add_chars(self, *args):
+        """This being a text node, add characters to it.
+        :param args:
+        :type args: tuple[unicode]
+        :return:
+        """
+        if self.text is None:
+            self.text = ''
+
+        self.text += ''.join(args)
+
+    def is_text_node(self):
+        """
+        :rtype: bool
+        """
+        return self._type == self.TEXT
+
+    def is_empty(self):
+        """
+        :rtype: bool
+        """
+        if self._type == self.TEXT:
+            return not self.text
+
+        return False
+
+    def is_explicit_break(self):
+        """
+        :rtype: bool
+        """
+        return self._type == self.BREAK
+
+    def sets_italics_on(self):
+        """
+        :rtype: bool
+        """
+        return self._type == self.ITALICS_ON
+
+    def sets_italics_off(self):
+        """
+        :rtype: bool
+        """
+        return self._type == self.ITALICS_OFF
+
+    def is_italics_node(self):
+        """
+        :rtype: bool
+        """
+        return self._type in (self.ITALICS_OFF, self.ITALICS_ON)
+
+    def requires_repositioning(self):
+        """Whether the node must be interpreted as a change in positioning
+
+        :rtype: bool
+        """
+        return self._type == self.CHANGE_POSITION
+
+    def get_text(self):
+        """A little legacy code.
+        """
+        return ' '.join(self.text.split())
+
+    @classmethod
+    def create_break(cls, position):
+        """Create a node, interpretable as an explicit line break
+
+        :type position: tuple[int]
+        :param position: a tuple (row, col) containing the positioning info
+
+        :rtype: _InstructionNode
+        """
+        return cls(type_=cls.BREAK, position=position)
+
+    @classmethod
+    def create_text(cls, position, *chars):
+        """Create a node interpretable as text
+
+        :type position: tuple[int]
+        :param position: a tuple (row, col) to mark the positioning
+
+        :type chars: tuple[unicode]
+        :param chars: characters to add to the text
+
+        :rtype: _InstructionNode
+        """
+        return cls(''.join(chars), position=position)
+
+    @classmethod
+    def create_italics_style(cls, position, turn_on=True):
+        """Create a node, interpretable as a command to switch italics on/off
+
+        :type position: tuple[int]
+        :param position: a tuple (row, col) to mark the positioning
+
+        :type turn_on: bool
+        :param turn_on: whether to turn the italics on or off
+
+        :rtype: _InstructionNode
+        """
+        return cls(
+            position=position,
+            type_=cls.ITALICS_ON if turn_on else cls.ITALICS_OFF
+        )
+
+    @classmethod
+    def create_repositioning_command(cls, position=None):
+        """Create node interpretable as a command to change the current
+        position
+
+        :type position:
+        """
+        return cls(type_=cls.CHANGE_POSITION, position=position)
+
+    def __repr__(self):         # pragma: no cover
+        if self._type == self.BREAK:
+            extra = 'BR'
+        elif self._type == self.TEXT:
+            extra = '"{}"'.format(self.text)
+        elif self._type in (self.ITALICS_ON, self.ITALICS_OFF):
+            extra = 'italics {}'.format(
+                'on' if self._type == self.ITALICS_ON else 'off'
+            )
+        else:
+            extra = 'change position'
+
+        return '<INode: {extra} >'.format(extra=extra)
+
+
+def _format_italics(collection):
+    """Given a raw list of _InstructionNodes, returns a new equivalent list
+    where all the italics nodes properly close and open.
+
+    The list is equivalent in the sense that the SCC commands that would have
+    generated the output list, would have had the exact same visual effect
+    as the ones that generated the output, as far as italics are concerned.
+
+    This is useful because the raw commands read from the SCC can't be used
+    the way they are by the writers for the other formats. Those other writers
+    require the list of CaptionNodes to be formatted in a certain way.
+
+    Note: Using state machines to manage the italics didn't work well because
+    we're using state machines already to track the position, and their
+    interactions got crazy.
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = _skip_initial_italics_off_nodes(collection)
+
+    new_collection = _skip_empty_text_nodes(new_collection)
+
+    # after this step we're guaranteed a proper ordering of the nodes
+    new_collection = _skip_redundant_italics_nodes(new_collection)
+
+    # after this, we're guaranteed that the italics are properly contained
+    # within their context
+    new_collection = _close_italics_before_repositioning(new_collection)
+
+    # all nodes will be closed after this step
+    new_collection = _ensure_final_italics_node_closes(new_collection)
+
+    # removes pairs of italics nodes that don't do anything noticeable
+    new_collection = _remove_noop_italics(new_collection)
+
+    return new_collection
+
+
+def _remove_noop_on_off_italics(collection):
+    """Return an equivalent list to `collection`. It removes the italics node
+     pairs that don't surround text nodes, if those nodes are in the order:
+     on, off
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = []
+    to_commit = None
+
+    for node in collection:
+        if node.is_italics_node() and node.sets_italics_on():
+            to_commit = node
+            continue
+
+        elif node.is_italics_node() and node.sets_italics_off():
+            if to_commit:
+                to_commit = None
+                continue
+        else:
+            if to_commit:
+                new_collection.append(to_commit)
+                to_commit = None
+
+        new_collection.append(node)
+
+    return new_collection
+
+
+def _remove_noon_off_on_italics(collection):
+    """Removes pairs of off-on italics nodes, that don't surround any other
+    node
+
+    :type collection: list[_InstructionNode]
+    :return: list[_InstructionNode]
+    """
+    new_collection = []
+    to_commit = None
+
+    for node in collection:
+        if node.is_italics_node() and node.sets_italics_off():
+            to_commit = node
+            continue
+
+        elif node.is_italics_node() and node.sets_italics_on():
+            if to_commit:
+                to_commit = None
+                continue
+        else:
+            if to_commit:
+                new_collection.append(to_commit)
+                to_commit = None
+
+        new_collection.append(node)
+
+    if to_commit:
+        new_collection.append(to_commit)
+
+    return new_collection
+
+
+def _remove_noop_italics(collection):
+    """Return an equivalent list to `collection`. It removes the italics node
+     pairs that don't surround text nodes
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = _remove_noop_on_off_italics(collection)
+
+    new_collection = _remove_noon_off_on_italics(new_collection)
+
+    return new_collection
+
+
+def _skip_initial_italics_off_nodes(collection):
+    """Return a collection like the one given, but without the
+    initial <Italics OFF> nodes
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = []
+    can_add_italics_off_nodes = False
+
+    for node in collection:
+        if node.is_italics_node():
+            if node.sets_italics_on():
+                can_add_italics_off_nodes = True
+                new_collection.append(node)
+            elif can_add_italics_off_nodes:
+                new_collection.append(node)
+        else:
+            new_collection.append(node)
+
+    return new_collection
+
+
+def _skip_empty_text_nodes(collection):
+    """Return an iterable containing all the nodes in the previous
+    collection except for the empty text nodes
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    return [node for node in collection
+            if not (node.is_text_node() and node.is_empty())]
+
+
+def _skip_redundant_italics_nodes(collection):
+    """Return a list where the <Italics On> nodes only appear after
+    <Italics OFF>, and vice versa. This ignores the other node types, and
+    only removes redundant italic nodes
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = []
+    state = None
+
+    for node in collection:
+        if node.is_italics_node():
+            if state is None:
+                state = node.sets_italics_on()
+                new_collection.append(node)
+                continue
+            # skip the nodes that are like the previous
+            if node.sets_italics_on() is state:
+                continue
+            else:
+                state = node.sets_italics_on()
+        new_collection.append(node)
+
+    return new_collection
+
+
+def _close_italics_before_repositioning(collection):
+    """Make sure that for every opened italic node, there's a corresponding
+     closing node.
+
+     Will insert a closing italic node, before each repositioning node
+
+     :type collection: list[_InstructionNode]
+     :rtype: list[_InstructionNode]
+    """
+    new_collection = []
+
+    italics_on = False
+    last_italics_on_node = None
+
+    for idx, node in enumerate(collection):
+        if node.is_italics_node() and node.sets_italics_on():
+            italics_on = True
+            last_italics_on_node = node
+        if node.is_italics_node() and node.sets_italics_off():
+            italics_on = False
+        if node.requires_repositioning() and italics_on:
+            # Append an italics closing node before the position change
+            new_collection.append(
+                _InstructionNode.create_italics_style(
+                    # The position info of this new node should be the same
+                    position=last_italics_on_node.position,
+                    turn_on=False
+                )
+            )
+            new_collection.append(node)
+            # Append an italics opening node after the positioning change
+            new_collection.append(
+                _InstructionNode.create_italics_style(
+                    position=node.position
+                )
+            )
+            continue
+        new_collection.append(node)
+
+    return new_collection
+
+
+def _ensure_final_italics_node_closes(collection):
+    """The final italics command needs to be closed
+
+    :type collection: list[_InstructionNode]
+    :rtype: list[_InstructionNode]
+    """
+    new_collection = list(collection)
+
+    italics_on = False
+    last_italics_on_node = None
+
+    for node in collection:
+        if node.is_italics_node() and node.sets_italics_on():
+            italics_on = True
+            last_italics_on_node = node
+        if node.is_italics_node() and node.sets_italics_off():
+            italics_on = False
+
+    if italics_on:
+        new_collection.append(
+            _InstructionNode.create_italics_style(
+                position=last_italics_on_node.position,
+                turn_on=False
+            )
+        )
+    return new_collection
--- a/utils/modules/pycaption/scc/state_machines.py
+++ b/utils/modules/pycaption/scc/state_machines.py
@@ -0,0 +1,128 @@
+from ..exceptions import CaptionReadSyntaxError
+
+
+class _PositioningTracker(object):
+    """Helps determine the positioning of a node, having kept track of
+    positioning-related commands.
+    """
+    def __init__(self, positioning=None):
+        """
+        :param positioning: positioning information (row, column)
+        :type positioning: tuple[int]
+        """
+        self._positions = [positioning]
+        self._break_required = False
+        self._repositioning_required = False
+
+    def update_positioning(self, positioning):
+        """Being notified of a position change, updates the internal state,
+        to as to be able to tell if it was a trivial change (a simple line
+        break) or not.
+
+        :type positioning: tuple[int]
+        :param positioning: a tuple (row, col)
+        """
+        current = self._positions[-1]
+
+        if not current:
+            if positioning:
+                # set the positioning for the first time
+                self._positions = [positioning]
+            return
+
+        row, col = current
+        new_row, _ = positioning
+
+        # is the new position simply one line below?
+        if new_row == row + 1:
+            self._positions.append((new_row, col))
+            self._break_required = True
+        else:
+            # reset the "current" position altogether.
+            self._positions = [positioning]
+            self._repositioning_required = True
+
+    def get_current_position(self):
+        """Returns the current usable position
+
+        :rtype: tuple[int]
+
+        :raise: CaptionReadSyntaxError
+        """
+        if not any(self._positions):
+            raise CaptionReadSyntaxError(
+                'No Preamble Address Code [PAC] was provided'
+            )
+        else:
+            return self._positions[0]
+
+    def is_repositioning_required(self):
+        """Determines whether the current positioning has changed non-trivially
+
+        Trivial would be mean that a line break should suffice.
+        :rtype: bool
+        """
+        return self._repositioning_required
+
+    def acknowledge_position_changed(self):
+        """Acknowledge the position tracer that the position was changed
+        """
+        self._repositioning_required = False
+
+    def is_linebreak_required(self):
+        """If the current position is simply one line below the previous.
+        :rtype: bool
+        """
+        return self._break_required
+
+    def acknowledge_linebreak_consumed(self):
+        """Call to acknowledge that the line required was consumed
+        """
+        self._break_required = False
+
+
+class DefaultProvidingPositionTracker(_PositioningTracker):
+    """A _PositioningTracker that provides if needed a default value (14, 0), or
+    uses the last positioning value set anywhere in the document
+    """
+    default = (14, 0)
+
+    def __init__(self, positioning=None, default=None):
+        """
+        :type positioning: tuple[int]
+        :param positioning: a tuple of ints (row, column)
+
+        :type default: tuple[int]
+        :param default: a tuple of ints (row, column) to use as fallback
+        """
+        super(DefaultProvidingPositionTracker, self).__init__(positioning)
+
+        if default:
+            self.default = default
+
+    def get_current_position(self):
+        """Returns the currently tracked positioning, the last positioning that
+        was set (anywhere), or the default it was initiated with
+
+        :rtype: tuple[int]
+        """
+        try:
+            return (
+                super(DefaultProvidingPositionTracker, self).
+                get_current_position()
+            )
+        except CaptionReadSyntaxError:
+            return self.default
+
+    def update_positioning(self, positioning):
+        """If called, sets this positioning as the default, then delegates
+        to the super class.
+
+        :param positioning: a tuple of ints (row, col)
+        :type positioning: tuple[int]
+        """
+        if positioning:
+            self.default = positioning
+
+        super(DefaultProvidingPositionTracker, self).update_positioning(
+            positioning)