upload
This commit is contained in:
696
utils/modules/pycaption/scc/__init__.py
Normal file
696
utils/modules/pycaption/scc/__init__.py
Normal file
@@ -0,0 +1,696 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
3 types of SCC captions:
|
||||
Roll-Up
|
||||
Paint-On
|
||||
Pop-On
|
||||
|
||||
Commands:
|
||||
94ae - [ENM] - Erase Non-displayed(buffer) Memory
|
||||
942c - [EDM] - Erase Displayed Memory
|
||||
9420 - [RCL] - Resume Caption Loading
|
||||
9429 - [RDC] - Resume Direct Captioning
|
||||
|
||||
9425, 9426, 94a7 - [RU2], [RU3], [RU4] (roll up captions 2,3 or 4 rows)
|
||||
- these commands set the number of expected lines
|
||||
|
||||
94ad - (in CEA-608-E: 142d) - [CR] carriage return.
|
||||
- This actually rolls the captions up as many rows as specified by
|
||||
[RU1], [RU2], or [RU3]
|
||||
|
||||
80 - no-op char. Doesn't do anything, but must be used with other
|
||||
characters, to make a 2 byte word
|
||||
|
||||
97a1, 97a2, 9723 - [TO] move 1, 2 or 3 columns - Tab Over command
|
||||
- this moves the positioning 1, 2, or 3 columns to the right
|
||||
- Nothing regarding this is implemented.
|
||||
|
||||
942f - [EOC] - display the buffer on the screen - End Of Caption
|
||||
... - [PAC] - Preamble address code (can set positioning and style)
|
||||
- All the PACs are specified by the first and second byte combined
|
||||
from pycaption.scc.constants.PAC_BYTES_TO_POSITIONING_MAP
|
||||
|
||||
9429 - [RDC] - Resume Direct Captioning
|
||||
94a4 - (in CEA-608-E: 1424) - [DER] Delete to End of Row
|
||||
|
||||
|
||||
Pop-On:
|
||||
The commands should usually appear in this order. Not strict though, and
|
||||
the the commands don't have to necessarily be on the same row.
|
||||
|
||||
1. 94ae [ENM] (erase non displayed memory)
|
||||
2. 9420 [RCL] (resume caption loading => this command here means we're using Pop-On captions)
|
||||
2.1? [ENM] - if step 0 was skipped?
|
||||
3. [PAC] Positioning/ styling command (can position on columns divisible by 4)
|
||||
The control chars is called Preamble Address Code [PAC].
|
||||
4. If positioning needs to be on columns not divisible by 4, use a [TO] command
|
||||
5. text
|
||||
6. 942c [EDM] - optionally, erase the currently displayed caption
|
||||
7. 942f [EOC] display the caption
|
||||
|
||||
|
||||
Roll-Up:
|
||||
1. [RU2], [RU3] or [RU4] - sets Roll-Up style and depth
|
||||
- these set the Roll-Up style: (characteristic command)
|
||||
2. [CR] to roll the display up 1 row...lol?
|
||||
3. [PAC] - sets the indent of the base row
|
||||
|
||||
|
||||
Paint-On:
|
||||
1. [RDC] - sets the Paint-On style (characteristic command)
|
||||
2. [PAC]
|
||||
3. text
|
||||
4. [PAC]
|
||||
5. text or [DER]
|
||||
|
||||
There are some rules regarding the parity of the commands.
|
||||
|
||||
This resource:
|
||||
http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/SCC_FORMAT.HTML
|
||||
specifies that there are interpreters which only work if the commands have an
|
||||
odd parity. This however is not consistent, and we might not handle well
|
||||
these cases. Odd parity of a command means that, converting toe word into
|
||||
binary, should result in an odd number of '1's. The PAC commands obey this
|
||||
rule, but some do not. Some commands that do not are found in the COMMANDS
|
||||
dictionary. This is legacy logic, that I didn't know how to handle, and
|
||||
just carried over when implementing positioning.
|
||||
"""
|
||||
|
||||
import re
|
||||
import math
|
||||
import textwrap
|
||||
from copy import deepcopy
|
||||
|
||||
import six
|
||||
|
||||
from pycaption.base import (
|
||||
BaseReader, BaseWriter, CaptionSet, CaptionNode,
|
||||
)
|
||||
from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError
|
||||
from .constants import (
|
||||
HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS,
|
||||
MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
|
||||
SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP,
|
||||
PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED,
|
||||
)
|
||||
from .specialized_collections import (
|
||||
TimingCorrectingCaptionList, NotifyingDict, CaptionCreator,
|
||||
InstructionNodeCreator)
|
||||
from .state_machines import DefaultProvidingPositionTracker
|
||||
|
||||
|
||||
class NodeCreatorFactory(object):
|
||||
"""Will return instances of the given node_creator.
|
||||
|
||||
This is used as a means of creating new InstructionNodeCreator instances,
|
||||
because these need to share state beyond their garbage collection, but
|
||||
storing the information at the class level is not good either, because
|
||||
this information must be erased after the reader's .read() operation
|
||||
completes.
|
||||
"""
|
||||
def __init__(self, position_tracker,
|
||||
node_creator=InstructionNodeCreator):
|
||||
self.position_tracker = position_tracker
|
||||
self.node_creator = node_creator
|
||||
|
||||
def new_creator(self):
|
||||
"""Returns a new instance of self.node_creator, initialized with
|
||||
the same italics_tracker, and position_tracker
|
||||
"""
|
||||
return self.node_creator(position_tracker=self.position_tracker)
|
||||
|
||||
def from_list(self, roll_rows):
|
||||
"""Wraps the node_creator's method with the same name
|
||||
|
||||
:param roll_rows: list of node_creator instances
|
||||
|
||||
:return: a node_creator instance
|
||||
"""
|
||||
return self.node_creator.from_list(
|
||||
roll_rows,
|
||||
position_tracker=self.position_tracker
|
||||
)
|
||||
|
||||
|
||||
def get_corrected_end_time(caption):
|
||||
"""If the last caption was never explicitly ended, set its end time to
|
||||
start + 4 seconds
|
||||
|
||||
:param Caption caption: the last caption
|
||||
:rtype: int
|
||||
"""
|
||||
if caption.end:
|
||||
return caption.end
|
||||
|
||||
return caption.start + 4 * 1000 * 1000
|
||||
|
||||
|
||||
class SCCReader(BaseReader):
|
||||
"""Converts a given unicode string to a CaptionSet.
|
||||
|
||||
This can be then later used for converting into any other supported formats
|
||||
"""
|
||||
def __init__(self, *args, **kw):
|
||||
self.caption_stash = CaptionCreator()
|
||||
self.time_translator = _SccTimeTranslator()
|
||||
|
||||
self.node_creator_factory = NodeCreatorFactory(
|
||||
DefaultProvidingPositionTracker()
|
||||
)
|
||||
|
||||
self.last_command = ''
|
||||
|
||||
self.buffer_dict = NotifyingDict()
|
||||
|
||||
self.buffer_dict['pop'] = self.node_creator_factory.new_creator()
|
||||
self.buffer_dict['paint'] = self.node_creator_factory.new_creator()
|
||||
self.buffer_dict['roll'] = self.node_creator_factory.new_creator()
|
||||
|
||||
# Call this method when the active key changes
|
||||
self.buffer_dict.add_change_observer(self._flush_implicit_buffers)
|
||||
self.buffer_dict.set_active('pop')
|
||||
|
||||
self.roll_rows = []
|
||||
self.roll_rows_expected = 0
|
||||
self.simulate_roll_up = False
|
||||
|
||||
self.time = 0
|
||||
|
||||
def detect(self, content):
|
||||
"""Checks whether the given content is a proper SCC file
|
||||
|
||||
:type content: unicode
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
lines = content.splitlines()
|
||||
if lines[0] == HEADER:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
|
||||
"""Converts the unicode string into a CaptionSet
|
||||
|
||||
:type content: six.text_type
|
||||
:param content: The SCC content to be converted to a CaptionSet
|
||||
|
||||
:type lang: six.text_type
|
||||
:param lang: The language of the caption
|
||||
|
||||
:type simulate_roll_up: bool
|
||||
:param simulate_roll_up: If True, when converting to other formats,
|
||||
the resulting captions will contain all the rows that were visible
|
||||
on the screen when the captions were rolling up.
|
||||
|
||||
:type offset: int
|
||||
:param offset:
|
||||
|
||||
:rtype: CaptionSet
|
||||
"""
|
||||
if type(content) != six.text_type:
|
||||
raise InvalidInputError('The content is not a unicode string.')
|
||||
|
||||
self.simulate_roll_up = simulate_roll_up
|
||||
self.time_translator.offset = offset * 1000000
|
||||
# split lines
|
||||
lines = content.splitlines()
|
||||
|
||||
# loop through each line except the first
|
||||
for line in lines[1:]:
|
||||
self._translate_line(line)
|
||||
|
||||
self._flush_implicit_buffers()
|
||||
|
||||
captions = CaptionSet({lang: self.caption_stash.get_all()})
|
||||
|
||||
# check captions for incorrect lengths
|
||||
for cap in captions.get_captions(lang):
|
||||
# if there's an end time on a caption and the difference is
|
||||
# less than .05s kill it (this is likely caused by a standalone
|
||||
# EOC marker in the SCC file)
|
||||
if 0 < cap.end - cap.start < 50000:
|
||||
raise ValueError('unsupported length found in SCC input file: ' + str(cap))
|
||||
|
||||
if captions.is_empty():
|
||||
raise CaptionReadNoCaptions("empty caption file")
|
||||
else:
|
||||
last_caption = captions.get_captions(lang)[-1]
|
||||
last_caption.end = get_corrected_end_time(last_caption)
|
||||
|
||||
return captions
|
||||
|
||||
def _fix_last_timing(self, timing):
|
||||
"""HACK HACK: Certain Paint-On captions don't specify the 942f [EOC]
|
||||
(End Of Caption) command on the same line.
|
||||
If this is a 942f line, also simulate a 942c (Erase Displayed Memory)
|
||||
to properly set the timing on the last caption.
|
||||
|
||||
This method needs some serious attention, because it proves the timing
|
||||
calculation is not done well for Pop-On captions
|
||||
"""
|
||||
# Calculate the end time from the current line
|
||||
time_translator = _SccTimeTranslator()
|
||||
time_translator.start_at(timing)
|
||||
time_translator.offset = self.time_translator.offset
|
||||
|
||||
# But use the current time translator for the start time
|
||||
self.caption_stash.create_and_store(
|
||||
self.buffer, self.time_translator.get_time())
|
||||
|
||||
self.caption_stash.correct_last_timing(time_translator.get_time())
|
||||
self.buffer = self.node_creator_factory.node_creator()
|
||||
|
||||
def _flush_implicit_buffers(self, old_key=None, *args):
|
||||
"""Convert to Captions those buffers whose behavior is implicit.
|
||||
|
||||
The Paint-On buffer is explicit. New captions are created from it
|
||||
with the command 'End Of Caption' [EOC], '942f'
|
||||
|
||||
The other 2 buffers, Roll-Up and Paint-On we treat as "more" implicit,
|
||||
meaning that they can be displayed by a command on the next row.
|
||||
If they're on the last row however, or if the caption type is changing,
|
||||
we make sure to convert the buffers to text, so we don't lose any info.
|
||||
"""
|
||||
if old_key == 'pop':
|
||||
return
|
||||
|
||||
elif old_key is None or old_key == 'roll':
|
||||
if not self.buffer.is_empty():
|
||||
self._roll_up()
|
||||
|
||||
elif old_key is None or old_key == 'paint':
|
||||
# xxx - perhaps the self.buffer property is sufficient
|
||||
if not self.buffer_dict['paint'].is_empty():
|
||||
self.caption_stash.create_and_store(
|
||||
self.buffer_dict['paint'], self.time)
|
||||
|
||||
def _translate_line(self, line):
|
||||
# ignore blank lines
|
||||
if line.strip() == '':
|
||||
return
|
||||
|
||||
# split line in timestamp and words
|
||||
r = re.compile(r"([0-9:;]*)([\s\t]*)((.)*)")
|
||||
parts = r.findall(line.lower())
|
||||
|
||||
# XXX!!!!!! THESE 2 LINES ARE A HACK
|
||||
if parts[0][2].strip() == '942f':
|
||||
self._fix_last_timing(timing=parts[0][0])
|
||||
|
||||
self.time_translator.start_at(parts[0][0])
|
||||
|
||||
# loop through each word
|
||||
for word in parts[0][2].split(' '):
|
||||
# ignore empty results
|
||||
if word.strip() != '':
|
||||
self._translate_word(word)
|
||||
|
||||
def _translate_word(self, word):
|
||||
# count frames for timing
|
||||
self.time_translator.increment_frames()
|
||||
|
||||
# first check if word is a command
|
||||
# TODO - check that all the positioning commands are here, or use
|
||||
# some other strategy to determine if the word is a command.
|
||||
if word in COMMANDS or _is_pac_command(word):
|
||||
self._translate_command(word)
|
||||
|
||||
# second, check if word is a special character
|
||||
elif word in SPECIAL_CHARS:
|
||||
self._translate_special_char(word)
|
||||
|
||||
elif word in EXTENDED_CHARS:
|
||||
self._translate_extended_char(word)
|
||||
|
||||
# third, try to convert word into 2 characters
|
||||
else:
|
||||
self._translate_characters(word)
|
||||
|
||||
def _handle_double_command(self, word):
|
||||
# ensure we don't accidentally use the same command twice
|
||||
if word == self.last_command:
|
||||
self.last_command = ''
|
||||
return True
|
||||
else:
|
||||
self.last_command = word
|
||||
return False
|
||||
|
||||
def _translate_special_char(self, word):
|
||||
# XXX - this looks highly buggy. Why should special chars be ignored
|
||||
# when printed 2 times one after another?
|
||||
if self._handle_double_command(word):
|
||||
return
|
||||
|
||||
self.buffer.add_chars(SPECIAL_CHARS[word])
|
||||
|
||||
def _translate_extended_char(self, word):
|
||||
# XXX - this looks highly buggy. Why would a special char be ignored
|
||||
# if it's printed 2 times one after another?
|
||||
if self._handle_double_command(word):
|
||||
return
|
||||
|
||||
# add to buffer
|
||||
self.buffer.add_chars(EXTENDED_CHARS[word])
|
||||
|
||||
def _translate_command(self, word):
|
||||
if self._handle_double_command(word):
|
||||
return
|
||||
|
||||
# if command is pop_up
|
||||
if word == '9420':
|
||||
self.buffer_dict.set_active('pop')
|
||||
|
||||
# command is paint_on [Resume Direct Captioning]
|
||||
elif word == '9429':
|
||||
self.buffer_dict.set_active('paint')
|
||||
|
||||
self.roll_rows_expected = 1
|
||||
if not self.buffer.is_empty():
|
||||
self.caption_stash.create_and_store(
|
||||
self.buffer, self.time
|
||||
)
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
self.time = self.time_translator.get_time()
|
||||
|
||||
# if command is roll_up 2, 3 or 4 rows
|
||||
elif word in ('9425', '9426', '94a7'):
|
||||
self.buffer_dict.set_active('roll')
|
||||
|
||||
# count how many lines are expected
|
||||
if word == '9425':
|
||||
self.roll_rows_expected = 2
|
||||
elif word == '9426':
|
||||
self.roll_rows_expected = 3
|
||||
elif word == '94a7':
|
||||
self.roll_rows_expected = 4
|
||||
|
||||
# if content is in the queue, turn it into a caption
|
||||
if not self.buffer.is_empty():
|
||||
self.caption_stash.create_and_store(
|
||||
self.buffer, self.time)
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
# set rows to empty, configure start time for caption
|
||||
self.roll_rows = []
|
||||
self.time = self.time_translator.get_time()
|
||||
|
||||
# clear pop_on buffer
|
||||
elif word == '94ae':
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
# display pop_on buffer [End Of Caption]
|
||||
elif word == '942f':
|
||||
self.time = self.time_translator.get_time()
|
||||
self.caption_stash.create_and_store(self.buffer, self.time)
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
# roll up captions [Carriage Return]
|
||||
elif word == '94ad':
|
||||
# display roll-up buffer
|
||||
if not self.buffer.is_empty():
|
||||
self._roll_up()
|
||||
|
||||
# clear screen
|
||||
elif word == '942c':
|
||||
self.roll_rows = []
|
||||
|
||||
# XXX - The 942c command has nothing to do with paint-ons
|
||||
# This however is legacy code, and will break lots of tests if
|
||||
# the proper buffer (self.buffer) is used.
|
||||
# Most likely using `self.buffer` instead of the paint buffer
|
||||
# is the right thing to do, but this needs some further attention.
|
||||
if not self.buffer_dict['paint'].is_empty():
|
||||
self.caption_stash.create_and_store(
|
||||
self.buffer_dict['paint'], self.time)
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
# attempt to add proper end time to last caption(s)
|
||||
self.caption_stash.correct_last_timing(
|
||||
self.time_translator.get_time())
|
||||
|
||||
# if command not one of the aforementioned, add to buffer
|
||||
else:
|
||||
self.buffer.interpret_command(word)
|
||||
|
||||
def _translate_characters(self, word):
|
||||
# split word into the 2 bytes
|
||||
byte1 = word[:2]
|
||||
byte2 = word[2:]
|
||||
|
||||
# check to see if the the bytes are recognized characters
|
||||
if byte1 not in CHARACTERS or byte2 not in CHARACTERS:
|
||||
return
|
||||
|
||||
self.buffer.add_chars(CHARACTERS[byte1], CHARACTERS[byte2])
|
||||
|
||||
@property
|
||||
def buffer(self):
|
||||
"""Returns the currently active buffer
|
||||
"""
|
||||
return self.buffer_dict.get_active()
|
||||
|
||||
@buffer.setter
|
||||
def buffer(self, value):
|
||||
"""Sets a new value to the active key
|
||||
|
||||
:param value: any object
|
||||
"""
|
||||
try:
|
||||
key = self.buffer_dict.active_key
|
||||
self.buffer_dict[key] = value
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
def _roll_up(self):
|
||||
# We expect the active buffer to be the rol buffer
|
||||
if self.simulate_roll_up:
|
||||
if self.roll_rows_expected > 1:
|
||||
if len(self.roll_rows) >= self.roll_rows_expected:
|
||||
self.roll_rows.pop(0)
|
||||
|
||||
self.roll_rows.append(self.buffer)
|
||||
self.buffer = self.node_creator_factory.from_list(
|
||||
self.roll_rows)
|
||||
|
||||
# convert buffer and empty
|
||||
self.caption_stash.create_and_store(self.buffer, self.time)
|
||||
self.buffer = self.node_creator_factory.new_creator()
|
||||
|
||||
# configure time
|
||||
self.time = self.time_translator.get_time()
|
||||
|
||||
# try to insert the proper ending time for the previous caption
|
||||
self.caption_stash.correct_last_timing(self.time, force=True)
|
||||
|
||||
|
||||
class SCCWriter(BaseWriter):
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
super(SCCWriter, self).__init__(*args, **kw)
|
||||
|
||||
def write(self, caption_set):
|
||||
output = HEADER + '\n\n'
|
||||
|
||||
if caption_set.is_empty():
|
||||
return output
|
||||
|
||||
caption_set = deepcopy(caption_set)
|
||||
|
||||
# Only support one language.
|
||||
lang = list(caption_set.get_languages())[0]
|
||||
captions = caption_set.get_captions(lang)
|
||||
|
||||
# PASS 1: compute codes for each caption
|
||||
codes = [(self._text_to_code(caption), caption.start, caption.end)
|
||||
for caption in captions]
|
||||
|
||||
# PASS 2:
|
||||
# Advance start times so as to have time to write to the pop-on
|
||||
# buffer; possibly remove the previous clear-screen command
|
||||
for index, (code, start, end) in enumerate(codes):
|
||||
code_words = len(code) / 5 + 8
|
||||
code_time_microseconds = code_words * MICROSECONDS_PER_CODEWORD
|
||||
code_start = start - code_time_microseconds
|
||||
if index == 0:
|
||||
continue
|
||||
previous_code, previous_start, previous_end = codes[index-1]
|
||||
if previous_end + 3 * MICROSECONDS_PER_CODEWORD >= code_start:
|
||||
codes[index-1] = (previous_code, previous_start, None)
|
||||
codes[index] = (code, code_start, end)
|
||||
|
||||
# PASS 3:
|
||||
# Write captions.
|
||||
for (code, start, end) in codes:
|
||||
output += ('%s\t' % self._format_timestamp(start))
|
||||
output += '94ae 94ae 9420 9420 '
|
||||
output += code
|
||||
output += '942c 942c 942f 942f\n\n'
|
||||
if end is not None:
|
||||
output += '%s\t942c 942c\n\n' % self._format_timestamp(end)
|
||||
|
||||
return output
|
||||
|
||||
# Wrap lines at 32 chars
|
||||
@staticmethod
|
||||
def _layout_line(caption):
|
||||
def caption_node_to_text(caption_node):
|
||||
if caption_node.type_ == CaptionNode.TEXT:
|
||||
return six.text_type(caption_node.content)
|
||||
elif caption_node.type_ == CaptionNode.BREAK:
|
||||
return '\n'
|
||||
caption_text = ''.join(
|
||||
[caption_node_to_text(node) for node in caption.nodes])
|
||||
inner_lines = caption_text.split('\n')
|
||||
inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
|
||||
return '\n'.join(inner_lines_laid_out)
|
||||
|
||||
@staticmethod
|
||||
def _maybe_align(code):
|
||||
# Finish a half-word with a no-op so we can move to a full word
|
||||
if len(code) % 5 == 2:
|
||||
code += '80 '
|
||||
return code
|
||||
|
||||
@staticmethod
|
||||
def _maybe_space(code):
|
||||
if len(code) % 5 == 4:
|
||||
code += ' '
|
||||
return code
|
||||
|
||||
def _print_character(self, code, char):
|
||||
try:
|
||||
char_code = CHARACTER_TO_CODE[char]
|
||||
except KeyError:
|
||||
try:
|
||||
char_code = SPECIAL_OR_EXTENDED_CHAR_TO_CODE[char]
|
||||
except KeyError:
|
||||
char_code = '91b6' # Use £ as "unknown character" symbol
|
||||
|
||||
if len(char_code) == 2:
|
||||
return code + char_code
|
||||
elif len(char_code) == 4:
|
||||
return self._maybe_align(code) + char_code
|
||||
else:
|
||||
# This should not happen!
|
||||
return code
|
||||
|
||||
def _text_to_code(self, s):
|
||||
code = ''
|
||||
lines = self._layout_line(s).split('\n')
|
||||
for row, line in enumerate(lines):
|
||||
row += 16 - len(lines)
|
||||
# Move cursor to column 0 of the destination row
|
||||
for _ in range(2):
|
||||
code += ('%s%s ' % (PAC_HIGH_BYTE_BY_ROW[row],
|
||||
PAC_LOW_BYTE_BY_ROW_RESTRICTED[row]))
|
||||
# Print the line using the SCC encoding
|
||||
for char in line:
|
||||
code = self._print_character(code, char)
|
||||
code = self._maybe_space(code)
|
||||
code = self._maybe_align(code)
|
||||
return code
|
||||
|
||||
@staticmethod
|
||||
def _format_timestamp(microseconds):
|
||||
seconds_float = microseconds / 1000.0 / 1000.0
|
||||
# Convert to non-drop-frame timecode
|
||||
seconds_float *= 1000.0 / 1001.0
|
||||
hours = math.floor(seconds_float / 3600)
|
||||
seconds_float -= hours * 3600
|
||||
minutes = math.floor(seconds_float / 60)
|
||||
seconds_float -= minutes * 60
|
||||
seconds = math.floor(seconds_float)
|
||||
seconds_float -= seconds
|
||||
frames = math.floor(seconds_float * 30)
|
||||
return '%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frames)
|
||||
|
||||
|
||||
class _SccTimeTranslator(object):
|
||||
"""Converts SCC time to microseconds, keeping track of frames passed
|
||||
"""
|
||||
def __init__(self):
|
||||
self._time = '00:00:00;00'
|
||||
|
||||
# microseconds. The offset from which we begin the time calculation
|
||||
self.offset = 0
|
||||
self._frames = 0
|
||||
|
||||
def get_time(self):
|
||||
"""Returns the time, in microseconds. Takes into account the number of
|
||||
frames passed, and the offset
|
||||
|
||||
:rtype: int
|
||||
"""
|
||||
return self._translate_time(
|
||||
self._time[:-2] + six.text_type(int(self._time[-2:]) + self._frames),
|
||||
self.offset
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _translate_time(stamp, offset):
|
||||
"""
|
||||
:param stamp:
|
||||
:type offset: int
|
||||
:param offset: Subtract this many microseconds from the calculated time
|
||||
Helpful for when the captions are off by some time interval.
|
||||
:rtype: int
|
||||
"""
|
||||
if ';' in stamp:
|
||||
# Drop-frame timebase runs at the same rate as wall clock
|
||||
seconds_per_timestamp_second = 1.0
|
||||
else:
|
||||
# Non-drop-frame timebase runs "slow"
|
||||
# 1 second of timecode is longer than an actual second (1.001s)
|
||||
seconds_per_timestamp_second = 1001.0 / 1000.0
|
||||
|
||||
time_split = stamp.replace(';', ':').split(':')
|
||||
|
||||
timestamp_seconds = (int(time_split[0]) * 3600 +
|
||||
int(time_split[1]) * 60 +
|
||||
int(time_split[2]) +
|
||||
int(time_split[3]) / 30.0)
|
||||
|
||||
seconds = timestamp_seconds * seconds_per_timestamp_second
|
||||
microseconds = seconds * 1000 * 1000 - offset
|
||||
|
||||
if microseconds < 0:
|
||||
microseconds = 0
|
||||
|
||||
return microseconds
|
||||
|
||||
def start_at(self, timespec):
|
||||
"""Reset the counter to the given time
|
||||
|
||||
:type timespec: unicode
|
||||
"""
|
||||
self._time = timespec
|
||||
self._frames = 0
|
||||
|
||||
def increment_frames(self):
|
||||
"""After a command was processed, we'd increment the number of frames
|
||||
"""
|
||||
self._frames += 1
|
||||
|
||||
|
||||
def _is_pac_command(word):
|
||||
"""Checks whether the given word is a Preamble Address Code [PAC] command
|
||||
|
||||
:type word: unicode
|
||||
:param word: 4 letter unicode command
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
if not word or len(word) != 4:
|
||||
return False
|
||||
|
||||
byte1, byte2 = word[:2], word[2:]
|
||||
|
||||
try:
|
||||
PAC_BYTES_TO_POSITIONING_MAP[byte1][byte2]
|
||||
except KeyError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-36.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-36.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-37.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-37.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-39.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/__init__.cpython-39.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-36.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-36.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-37.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-37.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-38.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-38.pyc
Normal file
Binary file not shown.
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-39.pyc
Normal file
BIN
utils/modules/pycaption/scc/__pycache__/constants.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
984
utils/modules/pycaption/scc/constants.py
Normal file
984
utils/modules/pycaption/scc/constants.py
Normal file
@@ -0,0 +1,984 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from itertools import product
|
||||
from future.utils import viewitems
|
||||
|
||||
COMMANDS = {
|
||||
'9420': '',
|
||||
'9429': '',
|
||||
'9425': '',
|
||||
'9426': '',
|
||||
'94a7': '',
|
||||
'942a': '',
|
||||
'94ab': '',
|
||||
'942c': '',
|
||||
'94ae': '',
|
||||
'942f': '',
|
||||
'9779': '<$>{break}<$>',
|
||||
'9775': '<$>{break}<$>',
|
||||
'9776': '<$>{break}<$>',
|
||||
'9770': '<$>{break}<$>',
|
||||
'9773': '<$>{break}<$>',
|
||||
'10c8': '<$>{break}<$>',
|
||||
'10c2': '<$>{break}<$>',
|
||||
'166e': '<$>{break}<$>{italic}<$>',
|
||||
'166d': '<$>{break}<$>',
|
||||
'166b': '<$>{break}<$>',
|
||||
'10c4': '<$>{break}<$>',
|
||||
'9473': '<$>{break}<$>',
|
||||
'977f': '<$>{break}<$>',
|
||||
'977a': '<$>{break}<$>',
|
||||
'1668': '<$>{break}<$>',
|
||||
'1667': '<$>{break}<$>',
|
||||
'1664': '<$>{break}<$>',
|
||||
'1661': '<$>{break}<$>',
|
||||
'10ce': '<$>{break}<$>{italic}<$>',
|
||||
'94c8': '<$>{break}<$>',
|
||||
'94c7': '<$>{break}<$>',
|
||||
'94c4': '<$>{break}<$>',
|
||||
'94c2': '<$>{break}<$>',
|
||||
'94c1': '<$>{break}<$>',
|
||||
'915e': '<$>{break}<$>',
|
||||
'915d': '<$>{break}<$>',
|
||||
'915b': '<$>{break}<$>',
|
||||
'925d': '<$>{break}<$>',
|
||||
'925e': '<$>{break}<$>',
|
||||
'925b': '<$>{break}<$>',
|
||||
'97e6': '<$>{break}<$>',
|
||||
'97e5': '<$>{break}<$>',
|
||||
'97e3': '<$>{break}<$>',
|
||||
'97e0': '<$>{break}<$>',
|
||||
'97e9': '<$>{break}<$>',
|
||||
'9154': '<$>{break}<$>',
|
||||
'9157': '<$>{break}<$>',
|
||||
'9151': '<$>{break}<$>',
|
||||
'9258': '<$>{break}<$>',
|
||||
'9152': '<$>{break}<$>',
|
||||
'9257': '<$>{break}<$>',
|
||||
'9254': '<$>{break}<$>',
|
||||
'9252': '<$>{break}<$>',
|
||||
'9158': '<$>{break}<$>',
|
||||
'9251': '<$>{break}<$>',
|
||||
'94cd': '<$>{break}<$>',
|
||||
'94ce': '<$>{break}<$>{italic}<$>',
|
||||
'94cb': '<$>{break}<$>',
|
||||
'97ef': '<$>{break}<$>{italic}<$>',
|
||||
'1373': '<$>{break}<$>',
|
||||
'97ec': '<$>{break}<$>',
|
||||
'97ea': '<$>{break}<$>',
|
||||
'15c7': '<$>{break}<$>',
|
||||
'974f': '<$>{break}<$>{italic}<$>',
|
||||
'10c1': '<$>{break}<$>',
|
||||
'974a': '<$>{break}<$>',
|
||||
'974c': '<$>{break}<$>',
|
||||
'10c7': '<$>{break}<$>',
|
||||
'976d': '<$>{break}<$>',
|
||||
'15d6': '<$>{break}<$>',
|
||||
'15d5': '<$>{break}<$>',
|
||||
'15d3': '<$>{break}<$>',
|
||||
'15d0': '<$>{break}<$>',
|
||||
'15d9': '<$>{break}<$>',
|
||||
'9745': '<$>{break}<$>',
|
||||
'9746': '<$>{break}<$>',
|
||||
'9740': '<$>{break}<$>',
|
||||
'9743': '<$>{break}<$>',
|
||||
'9749': '<$>{break}<$>',
|
||||
'15df': '<$>{break}<$>',
|
||||
'15dc': '<$>{break}<$>',
|
||||
'15da': '<$>{break}<$>',
|
||||
'15f8': '<$>{break}<$>',
|
||||
'94fe': '<$>{break}<$>',
|
||||
'94fd': '<$>{break}<$>',
|
||||
'94fc': '<$>{break}<$>',
|
||||
'94fb': '<$>{break}<$>',
|
||||
'944f': '<$>{break}<$>{italic}<$>',
|
||||
'944c': '<$>{break}<$>',
|
||||
'944a': '<$>{break}<$>',
|
||||
'92fc': '<$>{break}<$>',
|
||||
'1051': '<$>{break}<$>',
|
||||
'1052': '<$>{break}<$>',
|
||||
'1054': '<$>{break}<$>',
|
||||
'92fe': '<$>{break}<$>',
|
||||
'92fd': '<$>{break}<$>',
|
||||
'1058': '<$>{break}<$>',
|
||||
'157a': '<$>{break}<$>',
|
||||
'157f': '<$>{break}<$>',
|
||||
'9279': '<$>{break}<$>',
|
||||
'94f4': '<$>{break}<$>',
|
||||
'94f7': '<$>{break}<$>',
|
||||
'94f1': '<$>{break}<$>',
|
||||
'9449': '<$>{break}<$>',
|
||||
'92fb': '<$>{break}<$>',
|
||||
'9446': '<$>{break}<$>',
|
||||
'9445': '<$>{break}<$>',
|
||||
'9443': '<$>{break}<$>',
|
||||
'94f8': '<$>{break}<$>',
|
||||
'9440': '<$>{break}<$>',
|
||||
'1057': '<$>{break}<$>',
|
||||
'9245': '<$>{break}<$>',
|
||||
'92f2': '<$>{break}<$>',
|
||||
'1579': '<$>{break}<$>',
|
||||
'92f7': '<$>{break}<$>',
|
||||
'105e': '<$>{break}<$>',
|
||||
'92f4': '<$>{break}<$>',
|
||||
'1573': '<$>{break}<$>',
|
||||
'1570': '<$>{break}<$>',
|
||||
'1576': '<$>{break}<$>',
|
||||
'1575': '<$>{break}<$>',
|
||||
'16c1': '<$>{break}<$>',
|
||||
'16c2': '<$>{break}<$>',
|
||||
'9168': '<$>{break}<$>',
|
||||
'16c7': '<$>{break}<$>',
|
||||
'9164': '<$>{break}<$>',
|
||||
'9167': '<$>{break}<$>',
|
||||
'9161': '<$>{break}<$>',
|
||||
'9162': '<$>{break}<$>',
|
||||
'947f': '<$>{break}<$>',
|
||||
'91c2': '<$>{break}<$>',
|
||||
'91c1': '<$>{break}<$>',
|
||||
'91c7': '<$>{break}<$>',
|
||||
'91c4': '<$>{break}<$>',
|
||||
'13e3': '<$>{break}<$>',
|
||||
'91c8': '<$>{break}<$>',
|
||||
'91d0': '<$>{break}<$>',
|
||||
'13e5': '<$>{break}<$>',
|
||||
'13c8': '<$>{break}<$>',
|
||||
'16cb': '<$>{break}<$>',
|
||||
'16cd': '<$>{break}<$>',
|
||||
'16ce': '<$>{break}<$>{italic}<$>',
|
||||
'916d': '<$>{break}<$>',
|
||||
'916e': '<$>{break}<$>{italic}<$>',
|
||||
'916b': '<$>{break}<$>',
|
||||
'91d5': '<$>{break}<$>',
|
||||
'137a': '<$>{break}<$>',
|
||||
'91cb': '<$>{break}<$>',
|
||||
'91ce': '<$>{break}<$>{italic}<$>',
|
||||
'91cd': '<$>{break}<$>',
|
||||
'13ec': '<$>{break}<$>',
|
||||
'13c1': '<$>{break}<$>',
|
||||
'13ea': '<$>{break}<$>',
|
||||
'13ef': '<$>{break}<$>{italic}<$>',
|
||||
'94f2': '<$>{break}<$>',
|
||||
'97fb': '<$>{break}<$>',
|
||||
'97fc': '<$>{break}<$>',
|
||||
'1658': '<$>{break}<$>',
|
||||
'97fd': '<$>{break}<$>',
|
||||
'97fe': '<$>{break}<$>',
|
||||
'1652': '<$>{break}<$>',
|
||||
'1651': '<$>{break}<$>',
|
||||
'1657': '<$>{break}<$>',
|
||||
'1654': '<$>{break}<$>',
|
||||
'10cb': '<$>{break}<$>',
|
||||
'97f2': '<$>{break}<$>',
|
||||
'97f1': '<$>{break}<$>',
|
||||
'97f7': '<$>{break}<$>',
|
||||
'97f4': '<$>{break}<$>',
|
||||
'165b': '<$>{break}<$>',
|
||||
'97f8': '<$>{break}<$>',
|
||||
'165d': '<$>{break}<$>',
|
||||
'165e': '<$>{break}<$>',
|
||||
'15cd': '<$>{break}<$>',
|
||||
'10cd': '<$>{break}<$>',
|
||||
'9767': '<$>{break}<$>',
|
||||
'9249': '<$>{break}<$>',
|
||||
'1349': '<$>{break}<$>',
|
||||
'91d9': '<$>{break}<$>',
|
||||
'1340': '<$>{break}<$>',
|
||||
'91d3': '<$>{break}<$>',
|
||||
'9243': '<$>{break}<$>',
|
||||
'1343': '<$>{break}<$>',
|
||||
'91d6': '<$>{break}<$>',
|
||||
'1345': '<$>{break}<$>',
|
||||
'1346': '<$>{break}<$>',
|
||||
'9246': '<$>{break}<$>',
|
||||
'94e9': '<$>{break}<$>',
|
||||
'94e5': '<$>{break}<$>',
|
||||
'94e6': '<$>{break}<$>',
|
||||
'94e0': '<$>{break}<$>',
|
||||
'94e3': '<$>{break}<$>',
|
||||
'15ea': '<$>{break}<$>',
|
||||
'15ec': '<$>{break}<$>',
|
||||
'15ef': '<$>{break}<$>{italic}<$>',
|
||||
'16fe': '<$>{break}<$>',
|
||||
'16fd': '<$>{break}<$>',
|
||||
'16fc': '<$>{break}<$>',
|
||||
'16fb': '<$>{break}<$>',
|
||||
'1367': '<$>{break}<$>',
|
||||
'94ef': '<$>{break}<$>{italic}<$>',
|
||||
'94ea': '<$>{break}<$>',
|
||||
'94ec': '<$>{break}<$>',
|
||||
'924a': '<$>{break}<$>',
|
||||
'91dc': '<$>{break}<$>',
|
||||
'924c': '<$>{break}<$>',
|
||||
'91da': '<$>{break}<$>',
|
||||
'91df': '<$>{break}<$>',
|
||||
'134f': '<$>{break}<$>{italic}<$>',
|
||||
'924f': '<$>{break}<$>{italic}<$>',
|
||||
'16f8': '<$>{break}<$>',
|
||||
'16f7': '<$>{break}<$>',
|
||||
'16f4': '<$>{break}<$>',
|
||||
'16f2': '<$>{break}<$>',
|
||||
'16f1': '<$>{break}<$>',
|
||||
'15e0': '<$>{break}<$>',
|
||||
'15e3': '<$>{break}<$>',
|
||||
'15e5': '<$>{break}<$>',
|
||||
'15e6': '<$>{break}<$>',
|
||||
'15e9': '<$>{break}<$>',
|
||||
'9757': '<$>{break}<$>',
|
||||
'9754': '<$>{break}<$>',
|
||||
'9752': '<$>{break}<$>',
|
||||
'9751': '<$>{break}<$>',
|
||||
'9758': '<$>{break}<$>',
|
||||
'92f1': '<$>{break}<$>',
|
||||
'104c': '<$>{break}<$>',
|
||||
'104a': '<$>{break}<$>',
|
||||
'104f': '<$>{break}<$>{italic}<$>',
|
||||
'105d': '<$>{break}<$>',
|
||||
'92f8': '<$>{break}<$>',
|
||||
'975e': '<$>{break}<$>',
|
||||
'975d': '<$>{break}<$>',
|
||||
'975b': '<$>{break}<$>',
|
||||
'1043': '<$>{break}<$>',
|
||||
'1040': '<$>{break}<$>',
|
||||
'1046': '<$>{break}<$>',
|
||||
'1045': '<$>{break}<$>',
|
||||
'1049': '<$>{break}<$>',
|
||||
'9479': '<$>{break}<$>',
|
||||
'917f': '<$>{break}<$>',
|
||||
'9470': '<$>{break}<$>',
|
||||
'9476': '<$>{break}<$>',
|
||||
'917a': '<$>{break}<$>',
|
||||
'9475': '<$>{break}<$>',
|
||||
'927a': '<$>{break}<$>',
|
||||
'927f': '<$>{break}<$>',
|
||||
'134a': '<$>{break}<$>',
|
||||
'15fb': '<$>{break}<$>',
|
||||
'15fc': '<$>{break}<$>',
|
||||
'15fd': '<$>{break}<$>',
|
||||
'15fe': '<$>{break}<$>',
|
||||
'1546': '<$>{break}<$>',
|
||||
'1545': '<$>{break}<$>',
|
||||
'1543': '<$>{break}<$>',
|
||||
'1540': '<$>{break}<$>',
|
||||
'1549': '<$>{break}<$>',
|
||||
'13fd': '<$>{break}<$>',
|
||||
'13fe': '<$>{break}<$>',
|
||||
'13fb': '<$>{break}<$>',
|
||||
'13fc': '<$>{break}<$>',
|
||||
'92e9': '<$>{break}<$>',
|
||||
'92e6': '<$>{break}<$>',
|
||||
'9458': '<$>{break}<$>',
|
||||
'92e5': '<$>{break}<$>',
|
||||
'92e3': '<$>{break}<$>',
|
||||
'92e0': '<$>{break}<$>',
|
||||
'9270': '<$>{break}<$>',
|
||||
'9273': '<$>{break}<$>',
|
||||
'9275': '<$>{break}<$>',
|
||||
'9276': '<$>{break}<$>',
|
||||
'15f1': '<$>{break}<$>',
|
||||
'15f2': '<$>{break}<$>',
|
||||
'15f4': '<$>{break}<$>',
|
||||
'15f7': '<$>{break}<$>',
|
||||
'9179': '<$>{break}<$>',
|
||||
'9176': '<$>{break}<$>',
|
||||
'9175': '<$>{break}<$>',
|
||||
'947a': '<$>{break}<$>',
|
||||
'9173': '<$>{break}<$>',
|
||||
'9170': '<$>{break}<$>',
|
||||
'13f7': '<$>{break}<$>',
|
||||
'13f4': '<$>{break}<$>',
|
||||
'13f2': '<$>{break}<$>',
|
||||
'13f1': '<$>{break}<$>',
|
||||
'92ef': '<$>{break}<$>{italic}<$>',
|
||||
'92ec': '<$>{break}<$>',
|
||||
'13f8': '<$>{break}<$>',
|
||||
'92ea': '<$>{break}<$>',
|
||||
'154f': '<$>{break}<$>{italic}<$>',
|
||||
'154c': '<$>{break}<$>',
|
||||
'154a': '<$>{break}<$>',
|
||||
'16c4': '<$>{break}<$>',
|
||||
'16c8': '<$>{break}<$>',
|
||||
'97c8': '<$>{break}<$>',
|
||||
'164f': '<$>{break}<$>{italic}<$>',
|
||||
'164a': '<$>{break}<$>',
|
||||
'164c': '<$>{break}<$>',
|
||||
'1645': '<$>{break}<$>',
|
||||
'1646': '<$>{break}<$>',
|
||||
'1640': '<$>{break}<$>',
|
||||
'1643': '<$>{break}<$>',
|
||||
'1649': '<$>{break}<$>',
|
||||
'94df': '<$>{break}<$>',
|
||||
'94dc': '<$>{break}<$>',
|
||||
'94da': '<$>{break}<$>',
|
||||
'135b': '<$>{break}<$>',
|
||||
'135e': '<$>{break}<$>',
|
||||
'135d': '<$>{break}<$>',
|
||||
'1370': '<$>{break}<$>',
|
||||
'9240': '<$>{break}<$>',
|
||||
'13e9': '<$>{break}<$>',
|
||||
'1375': '<$>{break}<$>',
|
||||
'1679': '<$>{break}<$>',
|
||||
'1358': '<$>{break}<$>',
|
||||
'1352': '<$>{break}<$>',
|
||||
'1351': '<$>{break}<$>',
|
||||
'1376': '<$>{break}<$>',
|
||||
'1357': '<$>{break}<$>',
|
||||
'1354': '<$>{break}<$>',
|
||||
'1379': '<$>{break}<$>',
|
||||
'94d9': '<$>{break}<$>',
|
||||
'94d6': '<$>{break}<$>',
|
||||
'94d5': '<$>{break}<$>',
|
||||
'15462': '<$>{break}<$>',
|
||||
'94d3': '<$>{break}<$>',
|
||||
'94d0': '<$>{break}<$>',
|
||||
'13e0': '<$>{break}<$>',
|
||||
'13e6': '<$>{break}<$>',
|
||||
'976b': '<$>{break}<$>',
|
||||
'15c4': '<$>{break}<$>',
|
||||
'15c2': '<$>{break}<$>',
|
||||
'15c1': '<$>{break}<$>',
|
||||
'976e': '<$>{break}<$>{italic}<$>',
|
||||
'134c': '<$>{break}<$>',
|
||||
'15c8': '<$>{break}<$>',
|
||||
'92c8': '<$>{break}<$>',
|
||||
'16e9': '<$>{break}<$>',
|
||||
'16e3': '<$>{break}<$>',
|
||||
'16e0': '<$>{break}<$>',
|
||||
'16e6': '<$>{break}<$>',
|
||||
'16e5': '<$>{break}<$>',
|
||||
'91e5': '<$>{break}<$>',
|
||||
'91e6': '<$>{break}<$>',
|
||||
'91e0': '<$>{break}<$>',
|
||||
'91e3': '<$>{break}<$>',
|
||||
'13c4': '<$>{break}<$>',
|
||||
'13c7': '<$>{break}<$>',
|
||||
'91e9': '<$>{break}<$>',
|
||||
'13c2': '<$>{break}<$>',
|
||||
'9762': '<$>{break}<$>',
|
||||
'15ce': '<$>{break}<$>{italic}<$>',
|
||||
'9761': '<$>{break}<$>',
|
||||
'15cb': '<$>{break}<$>',
|
||||
'9764': '<$>{break}<$>',
|
||||
'9768': '<$>{break}<$>',
|
||||
'91ef': '<$>{break}<$>{italic}<$>',
|
||||
'91ea': '<$>{break}<$>',
|
||||
'91ec': '<$>{break}<$>',
|
||||
'13ce': '<$>{break}<$>{italic}<$>',
|
||||
'13cd': '<$>{break}<$>',
|
||||
'97da': '<$>{break}<$>',
|
||||
'13cb': '<$>{break}<$>',
|
||||
'13462': '<$>{break}<$>',
|
||||
'16ec': '<$>{break}<$>',
|
||||
'16ea': '<$>{break}<$>',
|
||||
'16ef': '<$>{break}<$>{italic}<$>',
|
||||
'97c1': '<$>{break}<$>',
|
||||
'97c2': '<$>{break}<$>',
|
||||
'97c4': '<$>{break}<$>',
|
||||
'97c7': '<$>{break}<$>',
|
||||
'92cd': '<$>{break}<$>',
|
||||
'92ce': '<$>{break}<$>{italic}<$>',
|
||||
'92cb': '<$>{break}<$>',
|
||||
'92da': '<$>{break}<$>',
|
||||
'92dc': '<$>{break}<$>',
|
||||
'92df': '<$>{break}<$>',
|
||||
'97df': '<$>{break}<$>',
|
||||
'155b': '<$>{break}<$>',
|
||||
'155e': '<$>{break}<$>',
|
||||
'155d': '<$>{break}<$>',
|
||||
'97dc': '<$>{break}<$>',
|
||||
'1675': '<$>{break}<$>',
|
||||
'1676': '<$>{break}<$>',
|
||||
'1670': '<$>{break}<$>',
|
||||
'1673': '<$>{break}<$>',
|
||||
'16462': '<$>{break}<$>',
|
||||
'97cb': '<$>{break}<$>',
|
||||
'97ce': '<$>{break}<$>{italic}<$>',
|
||||
'97cd': '<$>{break}<$>',
|
||||
'92c4': '<$>{break}<$>',
|
||||
'92c7': '<$>{break}<$>',
|
||||
'92c1': '<$>{break}<$>',
|
||||
'92c2': '<$>{break}<$>',
|
||||
'1551': '<$>{break}<$>',
|
||||
'97d5': '<$>{break}<$>',
|
||||
'97d6': '<$>{break}<$>',
|
||||
'1552': '<$>{break}<$>',
|
||||
'97d0': '<$>{break}<$>',
|
||||
'1554': '<$>{break}<$>',
|
||||
'1557': '<$>{break}<$>',
|
||||
'97d3': '<$>{break}<$>',
|
||||
'1558': '<$>{break}<$>',
|
||||
'167f': '<$>{break}<$>',
|
||||
'137f': '<$>{break}<$>',
|
||||
'167a': '<$>{break}<$>',
|
||||
'92d9': '<$>{break}<$>',
|
||||
'92d0': '<$>{break}<$>',
|
||||
'92d3': '<$>{break}<$>',
|
||||
'92d5': '<$>{break}<$>',
|
||||
'92d6': '<$>{break}<$>',
|
||||
'10dc': '<$>{break}<$>',
|
||||
'9262': '<$>{break}<$>',
|
||||
'9261': '<$>{break}<$>',
|
||||
'91f8': '<$>{break}<$>',
|
||||
'10df': '<$>{break}<$>',
|
||||
'9264': '<$>{break}<$>',
|
||||
'91f4': '<$>{break}<$>',
|
||||
'91f7': '<$>{break}<$>',
|
||||
'91f1': '<$>{break}<$>',
|
||||
'91f2': '<$>{break}<$>',
|
||||
'97d9': '<$>{break}<$>',
|
||||
'9149': '<$>{break}<$>',
|
||||
'9143': '<$>{break}<$>',
|
||||
'9140': '<$>{break}<$>',
|
||||
'9146': '<$>{break}<$>',
|
||||
'9145': '<$>{break}<$>',
|
||||
'9464': '<$>{break}<$>',
|
||||
'9467': '<$>{break}<$>',
|
||||
'9461': '<$>{break}<$>',
|
||||
'9462': '<$>{break}<$>',
|
||||
'9468': '<$>{break}<$>',
|
||||
'914c': '<$>{break}<$>',
|
||||
'914a': '<$>{break}<$>',
|
||||
'914f': '<$>{break}<$>{italic}<$>',
|
||||
'10d3': '<$>{break}<$>',
|
||||
'926b': '<$>{break}<$>',
|
||||
'10d0': '<$>{break}<$>',
|
||||
'10d6': '<$>{break}<$>',
|
||||
'926e': '<$>{break}<$>{italic}<$>',
|
||||
'926d': '<$>{break}<$>',
|
||||
'91fd': '<$>{break}<$>',
|
||||
'91fe': '<$>{break}<$>',
|
||||
'10d9': '<$>{break}<$>',
|
||||
'91fb': '<$>{break}<$>',
|
||||
'91fc': '<$>{break}<$>',
|
||||
'946e': '<$>{break}<$>{italic}<$>',
|
||||
'946d': '<$>{break}<$>',
|
||||
'946b': '<$>{break}<$>',
|
||||
'10da': '<$>{break}<$>',
|
||||
'10d5': '<$>{break}<$>',
|
||||
'9267': '<$>{break}<$>',
|
||||
'9268': '<$>{break}<$>',
|
||||
'16df': '<$>{break}<$>',
|
||||
'16da': '<$>{break}<$>',
|
||||
'16dc': '<$>{break}<$>',
|
||||
'9454': '<$>{break}<$>',
|
||||
'9457': '<$>{break}<$>',
|
||||
'9451': '<$>{break}<$>',
|
||||
'9452': '<$>{break}<$>',
|
||||
'136d': '<$>{break}<$>',
|
||||
'136e': '<$>{break}<$>{italic}<$>',
|
||||
'136b': '<$>{break}<$>',
|
||||
'13d9': '<$>{break}<$>',
|
||||
'13da': '<$>{break}<$>',
|
||||
'13dc': '<$>{break}<$>',
|
||||
'13df': '<$>{break}<$>',
|
||||
'1568': '<$>{break}<$>',
|
||||
'1561': '<$>{break}<$>',
|
||||
'1564': '<$>{break}<$>',
|
||||
'1567': '<$>{break}<$>',
|
||||
'16d5': '<$>{break}<$>',
|
||||
'16d6': '<$>{break}<$>',
|
||||
'16d0': '<$>{break}<$>',
|
||||
'16d3': '<$>{break}<$>',
|
||||
'945d': '<$>{break}<$>',
|
||||
'945e': '<$>{break}<$>',
|
||||
'16d9': '<$>{break}<$>',
|
||||
'945b': '<$>{break}<$>',
|
||||
'156b': '<$>{break}<$>',
|
||||
'156d': '<$>{break}<$>',
|
||||
'156e': '<$>{break}<$>{italic}<$>',
|
||||
'105b': '<$>{break}<$>',
|
||||
'1364': '<$>{break}<$>',
|
||||
'1368': '<$>{break}<$>',
|
||||
'1361': '<$>{break}<$>',
|
||||
'13d0': '<$>{break}<$>',
|
||||
'13d3': '<$>{break}<$>',
|
||||
'13d5': '<$>{break}<$>',
|
||||
'13d6': '<$>{break}<$>',
|
||||
'97a1': '',
|
||||
'97a2': '',
|
||||
'9723': '',
|
||||
'94a1': '',
|
||||
'94a4': '',
|
||||
'94ad': '',
|
||||
'1020': '',
|
||||
'10a1': '',
|
||||
'10a2': '',
|
||||
'1023': '',
|
||||
'10a4': '',
|
||||
'1025': '',
|
||||
'1026': '',
|
||||
'10a7': '',
|
||||
'10a8': '',
|
||||
'1029': '',
|
||||
'102a': '',
|
||||
'10ab': '',
|
||||
'102c': '',
|
||||
'10ad': '',
|
||||
'10ae': '',
|
||||
'102f': '',
|
||||
'97ad': '',
|
||||
'97a4': '',
|
||||
'9725': '',
|
||||
'9726': '',
|
||||
'97a7': '',
|
||||
'97a8': '',
|
||||
'9729': '',
|
||||
'972a': '',
|
||||
'9120': '<$>{end-italic}<$>',
|
||||
'91a1': '',
|
||||
'91a2': '',
|
||||
'9123': '',
|
||||
'91a4': '',
|
||||
'9125': '',
|
||||
'9126': '',
|
||||
'91a7': '',
|
||||
'91a8': '',
|
||||
'9129': '',
|
||||
'912a': '',
|
||||
'91ab': '',
|
||||
'912c': '',
|
||||
'91ad': '',
|
||||
'97ae': '',
|
||||
'972f': '',
|
||||
'91ae': '<$>{italic}<$>',
|
||||
'912f': '<$>{italic}<$>',
|
||||
'94a8': '',
|
||||
'9423': '',
|
||||
'94a2': '',
|
||||
}
|
||||
|
||||
|
||||
CHARACTERS = {
|
||||
'20': ' ',
|
||||
'a1': '!',
|
||||
'a2': '"',
|
||||
'23': '#',
|
||||
'a4': '$',
|
||||
'25': '%',
|
||||
'26': '&',
|
||||
'a7': '\'',
|
||||
'a8': '(',
|
||||
'29': ')',
|
||||
'2a': 'á',
|
||||
'ab': '+',
|
||||
'2c': ',',
|
||||
'ad': '-',
|
||||
'ae': '.',
|
||||
'2f': '/',
|
||||
'b0': '0',
|
||||
'31': '1',
|
||||
'32': '2',
|
||||
'b3': '3',
|
||||
'34': '4',
|
||||
'b5': '5',
|
||||
'b6': '6',
|
||||
'37': '7',
|
||||
'38': '8',
|
||||
'b9': '9',
|
||||
'ba': ':',
|
||||
'3b': ';',
|
||||
'bc': '<',
|
||||
'3d': '=',
|
||||
'3e': '>',
|
||||
'bf': '?',
|
||||
'40': '@',
|
||||
'c1': 'A',
|
||||
'c2': 'B',
|
||||
'43': 'C',
|
||||
'c4': 'D',
|
||||
'45': 'E',
|
||||
'46': 'F',
|
||||
'c7': 'G',
|
||||
'c8': 'H',
|
||||
'49': 'I',
|
||||
'4a': 'J',
|
||||
'cb': 'K',
|
||||
'4c': 'L',
|
||||
'cd': 'M',
|
||||
'ce': 'N',
|
||||
'4f': 'O',
|
||||
'd0': 'P',
|
||||
'51': 'Q',
|
||||
'52': 'R',
|
||||
'd3': 'S',
|
||||
'54': 'T',
|
||||
'd5': 'U',
|
||||
'd6': 'V',
|
||||
'57': 'W',
|
||||
'58': 'X',
|
||||
'd9': 'Y',
|
||||
'da': 'Z',
|
||||
'5b': '[',
|
||||
'dc': 'é',
|
||||
'5d': ']',
|
||||
'5e': 'í',
|
||||
'df': 'ó',
|
||||
'e0': 'ú',
|
||||
'61': 'a',
|
||||
'62': 'b',
|
||||
'e3': 'c',
|
||||
'64': 'd',
|
||||
'e5': 'e',
|
||||
'e6': 'f',
|
||||
'67': 'g',
|
||||
'68': 'h',
|
||||
'e9': 'i',
|
||||
'ea': 'j',
|
||||
'6b': 'k',
|
||||
'ec': 'l',
|
||||
'6d': 'm',
|
||||
'6e': 'n',
|
||||
'ef': 'o',
|
||||
'70': 'p',
|
||||
'f1': 'q',
|
||||
'f2': 'r',
|
||||
'73': 's',
|
||||
'f4': 't',
|
||||
'75': 'u',
|
||||
'76': 'v',
|
||||
'f7': 'w',
|
||||
'f8': 'x',
|
||||
'79': 'y',
|
||||
'7a': 'z',
|
||||
'fb': 'ç',
|
||||
'7c': '÷',
|
||||
'fd': 'Ñ',
|
||||
'fe': 'ñ',
|
||||
'7f': '',
|
||||
'80': ''
|
||||
}
|
||||
|
||||
|
||||
SPECIAL_CHARS = {
|
||||
'91b0': '®',
|
||||
'9131': '°',
|
||||
'9132': '½',
|
||||
'91b3': '¿',
|
||||
'91b4': '™',
|
||||
'91b5': '¢',
|
||||
'91b6': '£',
|
||||
'9137': '♪',
|
||||
'9138': 'à',
|
||||
'91b9': ' ',
|
||||
'91ba': 'è',
|
||||
'913b': 'â',
|
||||
'91bc': 'ê',
|
||||
'913d': 'î',
|
||||
'913e': 'ô',
|
||||
'91bf': 'û'
|
||||
}
|
||||
|
||||
|
||||
EXTENDED_CHARS = {
|
||||
'9220': 'Á',
|
||||
'92a1': 'É',
|
||||
'92a2': 'Ó',
|
||||
'9223': 'Ú',
|
||||
'92a4': 'Ü',
|
||||
'9225': 'ü',
|
||||
'9226': '‘',
|
||||
'92a7': '¡',
|
||||
'92a8': '*',
|
||||
'9229': '’',
|
||||
'922a': '—',
|
||||
'92ab': '©',
|
||||
'922c': '℠',
|
||||
'92ad': '•',
|
||||
'92ae': '“',
|
||||
'922f': '”',
|
||||
'92b0': 'À',
|
||||
'9231': 'Â',
|
||||
'9232': 'Ç',
|
||||
'92b3': 'È',
|
||||
'9234': 'Ê',
|
||||
'92b5': 'Ë',
|
||||
'92b6': 'ë',
|
||||
'9237': 'Î',
|
||||
'9238': 'Ï',
|
||||
'92b9': 'ï',
|
||||
'92ba': 'Ô',
|
||||
'923b': 'Ù',
|
||||
'92bc': 'ù',
|
||||
'923d': 'Û',
|
||||
'923e': '«',
|
||||
'92bf': '»',
|
||||
'1320': 'Ã',
|
||||
'13a1': 'ã',
|
||||
'13a2': 'Í',
|
||||
'1323': 'Ì',
|
||||
'13a4': 'ì',
|
||||
'1325': 'Ò',
|
||||
'1326': 'ò',
|
||||
'13a7': 'Õ',
|
||||
'13a8': 'õ',
|
||||
'1329': '{',
|
||||
'132a': '}',
|
||||
'13ab': '\\',
|
||||
'132c': '^',
|
||||
'13ad': '_',
|
||||
'13ae': '¦',
|
||||
'132f': '~',
|
||||
'13b0': 'Ä',
|
||||
'1331': 'ä',
|
||||
'1332': 'Ö',
|
||||
'13b3': 'ö',
|
||||
'1334': 'ß',
|
||||
'13b5': '¥',
|
||||
'13b6': '¤',
|
||||
'1337': '|',
|
||||
'1338': 'Å',
|
||||
'13b9': 'å',
|
||||
'13ba': 'Ø',
|
||||
'133b': 'ø',
|
||||
'13bc': '┌',
|
||||
'133d': '┐',
|
||||
'133e': '└',
|
||||
'13bf': '┘',
|
||||
}
|
||||
|
||||
|
||||
# Cursor positioning codes
|
||||
PAC_HIGH_BYTE_BY_ROW = [
|
||||
'xx',
|
||||
'91',
|
||||
'91',
|
||||
'92',
|
||||
'92',
|
||||
'15',
|
||||
'15',
|
||||
'16',
|
||||
'16',
|
||||
'97',
|
||||
'97',
|
||||
'10',
|
||||
'13',
|
||||
'13',
|
||||
'94',
|
||||
'94'
|
||||
]
|
||||
PAC_LOW_BYTE_BY_ROW_RESTRICTED = [
|
||||
'xx',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'd0',
|
||||
'70',
|
||||
'd0',
|
||||
'70'
|
||||
]
|
||||
|
||||
# High order bytes come first, then each key contains a list of low bytes.
|
||||
# Any of the values in that list, coupled with the high order byte will
|
||||
# map to the (row, column) tuple.
|
||||
# This particular dictionary will get transformed to a more suitable form for
|
||||
# usage like PAC_BYTES_TO_POSITIONING_MAP[u'91'][u'd6'] = (1, 12)
|
||||
PAC_BYTES_TO_POSITIONING_MAP = {
|
||||
'91': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (1, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (2, 0), # noqa
|
||||
('52', 'd3'): (1, 4),
|
||||
('54', 'd5'): (1, 8),
|
||||
('d6', '57'): (1, 12),
|
||||
('58', 'd9'): (1, 16),
|
||||
('da', '5b'): (1, 20),
|
||||
('dc', '5d'): (1, 24),
|
||||
('5e', 'df'): (1, 28),
|
||||
|
||||
('f2', '73'): (2, 4),
|
||||
('f4', '75'): (2, 8),
|
||||
('76', 'f7'): (2, 12),
|
||||
('f8', '79'): (2, 16),
|
||||
('7a', 'fb'): (2, 20),
|
||||
('7c', 'fd'): (2, 24),
|
||||
('fe', '7f'): (2, 28)
|
||||
},
|
||||
'92': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (3, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (4, 0), # noqa
|
||||
('52', 'd3'): (3, 4),
|
||||
('54', 'd5'): (3, 8),
|
||||
('d6', '57'): (3, 12),
|
||||
('58', 'd9'): (3, 16),
|
||||
('da', '5b'): (3, 20),
|
||||
('dc', '5d'): (3, 24),
|
||||
('5e', 'df'): (3, 28),
|
||||
|
||||
('f2', '73'): (4, 4),
|
||||
('f4', '75'): (4, 8),
|
||||
('76', 'f7'): (4, 12),
|
||||
('f8', '79'): (4, 16),
|
||||
('7a', 'fb'): (4, 20),
|
||||
('7c', 'fd'): (4, 24),
|
||||
('fe', '7f'): (4, 28)
|
||||
},
|
||||
'15': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (5, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (6, 0), # noqa
|
||||
('52', 'd3'): (5, 4),
|
||||
('54', 'd5'): (5, 8),
|
||||
('d6', '57'): (5, 12),
|
||||
('58', 'd9'): (5, 16),
|
||||
('da', '5b'): (5, 20),
|
||||
('dc', '5d'): (5, 24),
|
||||
('5e', 'df'): (5, 28),
|
||||
|
||||
('f2', '73'): (6, 4),
|
||||
('f4', '75'): (6, 8),
|
||||
('76', 'f7'): (6, 12),
|
||||
('f8', '79'): (6, 16),
|
||||
('7a', 'fb'): (6, 20),
|
||||
('7c', 'fd'): (6, 24),
|
||||
('fe', '7f'): (6, 28)
|
||||
},
|
||||
'16': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (7, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (8, 0), # noqa
|
||||
('52', 'd3'): (7, 4),
|
||||
('54', 'd5'): (7, 8),
|
||||
('d6', '57'): (7, 12),
|
||||
('58', 'd9'): (7, 16),
|
||||
('da', '5b'): (7, 20),
|
||||
('dc', '5d'): (7, 24),
|
||||
('5e', 'df'): (7, 28),
|
||||
|
||||
('f2', '73'): (8, 4),
|
||||
('f4', '75'): (8, 8),
|
||||
('76', 'f7'): (8, 12),
|
||||
('f8', '79'): (8, 16),
|
||||
('7a', 'fb'): (8, 20),
|
||||
('7c', 'fd'): (8, 24),
|
||||
('fe', '7f'): (8, 28)
|
||||
},
|
||||
'97': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (9, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (10, 0), # noqa
|
||||
('52', 'd3'): (9, 4),
|
||||
('54', 'd5'): (9, 8),
|
||||
('d6', '57'): (9, 12),
|
||||
('58', 'd9'): (9, 16),
|
||||
('da', '5b'): (9, 20),
|
||||
('dc', '5d'): (9, 24),
|
||||
('5e', 'df'): (9, 28),
|
||||
|
||||
('f2', '73'): (10, 4),
|
||||
('f4', '75'): (10, 8),
|
||||
('76', 'f7'): (10, 12),
|
||||
('f8', '79'): (10, 16),
|
||||
('7a', 'fb'): (10, 20),
|
||||
('7c', 'fd'): (10, 24),
|
||||
('fe', '7f'): (10, 28)
|
||||
},
|
||||
'10': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (11, 0), # noqa
|
||||
('52', 'd3'): (11, 4),
|
||||
('54', 'd5'): (11, 8),
|
||||
('d6', '57'): (11, 12),
|
||||
('58', 'd9'): (11, 16),
|
||||
('da', '5b'): (11, 20),
|
||||
('dc', '5d'): (11, 24),
|
||||
('5e', 'df'): (11, 28),
|
||||
},
|
||||
'13': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (12, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (13, 0), # noqa
|
||||
('52', 'd3'): (12, 4),
|
||||
('54', 'd5'): (12, 8),
|
||||
('d6', '57'): (12, 12),
|
||||
('58', 'd9'): (12, 16),
|
||||
('da', '5b'): (12, 20),
|
||||
('dc', '5d'): (12, 24),
|
||||
('5e', 'df'): (12, 28),
|
||||
|
||||
('f2', '73'): (13, 4),
|
||||
('f4', '75'): (13, 8),
|
||||
('76', 'f7'): (13, 12),
|
||||
('f8', '79'): (13, 16),
|
||||
('7a', 'fb'): (13, 20),
|
||||
('7c', 'fd'): (13, 24),
|
||||
('fe', '7f'): (13, 28)
|
||||
},
|
||||
'94': {
|
||||
('d0', '51', 'c2', '43', 'c4', '45', '46', 'c7', 'c8', '49', '4a', 'cb', '4c', 'cd'): (14, 0), # noqa
|
||||
('70', 'f1', '62', 'e3', '64', 'e5', 'e6', '67', '68', 'e9', 'ea', '6b', 'ec', '6d'): (15, 0), # noqa
|
||||
('52', 'd3'): (14, 4),
|
||||
('54', 'd5'): (14, 8),
|
||||
('d6', '57'): (14, 12),
|
||||
('58', 'd9'): (14, 16),
|
||||
('da', '5b'): (14, 20),
|
||||
('dc', '5d'): (14, 24),
|
||||
('5e', 'df'): (14, 28),
|
||||
|
||||
('f2', '73'): (15, 4),
|
||||
('f4', '75'): (15, 8),
|
||||
('76', 'f7'): (15, 12),
|
||||
('f8', '79'): (15, 16),
|
||||
('7a', 'fb'): (15, 20),
|
||||
('7c', 'fd'): (15, 24),
|
||||
('fe', '7f'): (15, 28)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def _create_position_to_bytes_map(bytes_to_pos):
|
||||
result = {}
|
||||
for high_byte, low_byte_dict in list(bytes_to_pos.items()):
|
||||
|
||||
# must contain mappings to column, to the tuple of possible values
|
||||
for low_byte_list in list(low_byte_dict.keys()):
|
||||
column = bytes_to_pos[high_byte][low_byte_list][1]
|
||||
|
||||
row = bytes_to_pos[high_byte][low_byte_list][0]
|
||||
if row not in result:
|
||||
result[row] = {}
|
||||
|
||||
result[row][column] = (
|
||||
tuple(product([high_byte], low_byte_list)))
|
||||
return result
|
||||
|
||||
# (Almost) the reverse of PAC_BYTES_TO_POSITIONING_MAP. Call with arguments
|
||||
# like for example [15][4] to get the tuple ((u'94', u'f2'), (u'94', u'73'))
|
||||
POSITIONING_TO_PAC_MAP = _create_position_to_bytes_map(
|
||||
PAC_BYTES_TO_POSITIONING_MAP
|
||||
)
|
||||
|
||||
|
||||
def _restructure_bytes_to_position_map(byte_to_pos_map):
|
||||
return {
|
||||
k_: {
|
||||
low_byte: byte_to_pos_map[k_][low_byte_list]
|
||||
for low_byte_list in list(v_.keys()) for low_byte in low_byte_list
|
||||
}
|
||||
for k_, v_ in list(byte_to_pos_map.items())
|
||||
}
|
||||
|
||||
# Now use the dict with arguments like [u'91'][u'75'] directly.
|
||||
PAC_BYTES_TO_POSITIONING_MAP = _restructure_bytes_to_position_map(
|
||||
PAC_BYTES_TO_POSITIONING_MAP)
|
||||
|
||||
|
||||
# Inverted character lookup
|
||||
CHARACTER_TO_CODE = {
|
||||
character: code
|
||||
for code, character in viewitems(CHARACTERS)
|
||||
}
|
||||
|
||||
SPECIAL_OR_EXTENDED_CHAR_TO_CODE = {
|
||||
character: code for code, character in viewitems(EXTENDED_CHARS)
|
||||
}
|
||||
SPECIAL_OR_EXTENDED_CHAR_TO_CODE.update(
|
||||
{character: code for code, character in viewitems(SPECIAL_CHARS)}
|
||||
)
|
||||
|
||||
# Time to transmit a single codeword = 1 second / 29.97
|
||||
MICROSECONDS_PER_CODEWORD = 1000.0 * 1000.0 / (30.0 * 1000.0 / 1001.0)
|
||||
|
||||
|
||||
HEADER = 'Scenarist_SCC V1.0'
|
||||
823
utils/modules/pycaption/scc/specialized_collections.py
Normal file
823
utils/modules/pycaption/scc/specialized_collections.py
Normal file
@@ -0,0 +1,823 @@
|
||||
from ..base import CaptionList, Caption, CaptionNode
|
||||
from ..geometry import (UnitEnum, Size, Layout, Point, Alignment,
|
||||
VerticalAlignmentEnum, HorizontalAlignmentEnum)
|
||||
|
||||
from .constants import PAC_BYTES_TO_POSITIONING_MAP, COMMANDS
|
||||
import collections
|
||||
|
||||
|
||||
class PreCaption(object):
|
||||
"""
|
||||
The Caption class has been refactored and now its instances must be used as
|
||||
immutable objects. Some of the code in this module, however, relied on the
|
||||
fact that Caption instances were mutable. For backwards compatibility,
|
||||
therefore, this class was created to work as a mutable caption data holder
|
||||
used to eventually instantiate an actual Caption object.
|
||||
"""
|
||||
|
||||
def __init__(self, start=0, end=0):
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.nodes = []
|
||||
self.style = {}
|
||||
self.layout_info = None
|
||||
|
||||
def to_real_caption(self):
|
||||
return Caption(
|
||||
self.start, self.end, self.nodes, self.style, self.layout_info
|
||||
)
|
||||
|
||||
|
||||
class TimingCorrectingCaptionList(list):
|
||||
"""List of captions. When appending new elements, it will correct the end time
|
||||
of the last ones, so they end when the new caption gets added.
|
||||
|
||||
"last ones" could mean the last caption `append`ed or all of the last
|
||||
captions with which this list was `extended`
|
||||
|
||||
Also, doesn't allow Nones or empty captions
|
||||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(TimingCorrectingCaptionList, self).__init__(*args, **kwargs)
|
||||
self._last_batch = ()
|
||||
|
||||
def append(self, p_object):
|
||||
"""When appending a new caption to the list, make sure the last one
|
||||
has an end. Also, don't add empty captions
|
||||
|
||||
:type p_object: Caption | None
|
||||
"""
|
||||
if p_object is None or not p_object.nodes:
|
||||
return
|
||||
|
||||
self._update_last_batch(self._last_batch, p_object)
|
||||
|
||||
self._last_batch = (p_object,)
|
||||
|
||||
super(TimingCorrectingCaptionList, self).append(p_object)
|
||||
|
||||
def extend(self, iterable):
|
||||
"""Adds the elements in the iterable to the list, regarding the first
|
||||
caption's start time as the end time for the previously added
|
||||
caption(s)
|
||||
|
||||
:param iterable: an iterable of Caption instances
|
||||
"""
|
||||
appendable_items = [item for item in iterable if item and item.nodes]
|
||||
self._update_last_batch(self._last_batch, *appendable_items)
|
||||
|
||||
self._last_batch = tuple(appendable_items)
|
||||
|
||||
super(TimingCorrectingCaptionList, self).extend(appendable_items)
|
||||
|
||||
@staticmethod
|
||||
def _update_last_batch(batch, *new_captions):
|
||||
"""Given a batch of captions, sets their end time equal to the start
|
||||
time of the first caption in *new_captions
|
||||
|
||||
The start time of the first caption in new_captions should never be 0.
|
||||
This means an invalid SCC file.
|
||||
|
||||
:type batch: tuple[Caption]
|
||||
:type new_captions: tuple[Caption]
|
||||
"""
|
||||
if not new_captions:
|
||||
return
|
||||
if not new_captions[0]:
|
||||
return
|
||||
if not new_captions[0].nodes:
|
||||
return
|
||||
|
||||
new_caption = new_captions[0]
|
||||
|
||||
if batch and batch[-1].end == 0:
|
||||
for caption in batch:
|
||||
caption.end = new_caption.start
|
||||
|
||||
|
||||
class NotifyingDict(dict):
|
||||
"""Dictionary-like object, that treats one key as 'active',
|
||||
and notifies observers if the active key changed
|
||||
"""
|
||||
# Need an unhashable object as initial value for the active key.
|
||||
# That way we're sure this was never a key in the dict.
|
||||
_guard = {}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(NotifyingDict, self).__init__(*args, **kwargs)
|
||||
self.active_key = self._guard
|
||||
self.observers = []
|
||||
|
||||
def set_active(self, key):
|
||||
"""Sets the active key
|
||||
|
||||
:param key: any hashable object
|
||||
"""
|
||||
if key not in self:
|
||||
raise ValueError('No such key present')
|
||||
|
||||
# Notify observers of the change
|
||||
if key != self.active_key:
|
||||
for observer in self.observers:
|
||||
observer(self.active_key, key)
|
||||
|
||||
self.active_key = key
|
||||
|
||||
def get_active(self):
|
||||
"""Returns the value corresponding to the active key
|
||||
"""
|
||||
if self.active_key is self._guard:
|
||||
raise KeyError('No active key set')
|
||||
|
||||
return self[self.active_key]
|
||||
|
||||
def add_change_observer(self, observer):
|
||||
"""Receives a callable function, which it will call if the active
|
||||
element changes.
|
||||
|
||||
The observer will receive 2 positional arguments: the old and new key
|
||||
|
||||
:param observer: any callable that can be called with 2 positional
|
||||
arguments
|
||||
"""
|
||||
if not isinstance(observer, collections.Callable):
|
||||
raise TypeError('The observer should be callable')
|
||||
|
||||
self.observers.append(observer)
|
||||
|
||||
|
||||
class CaptionCreator(object):
|
||||
"""Creates and maintains a collection of Captions
|
||||
"""
|
||||
def __init__(self):
|
||||
self._collection = TimingCorrectingCaptionList()
|
||||
|
||||
# subset of self._collection;
|
||||
# captions here will be susceptible to time corrections
|
||||
self._still_editing = []
|
||||
|
||||
def correct_last_timing(self, end_time, force=False):
|
||||
"""Called to set the time on the last Caption(s) stored with no end
|
||||
time
|
||||
|
||||
:type force: bool
|
||||
:param force: Set the end time even if there's already an end time
|
||||
|
||||
:type end_time: float
|
||||
:param end_time: microseconds; the end of the caption;
|
||||
"""
|
||||
if not self._still_editing:
|
||||
return
|
||||
|
||||
if force:
|
||||
# Select all last captions
|
||||
captions_to_correct = self._still_editing
|
||||
elif self._still_editing[-1].end == 0:
|
||||
# Only select the last captions if they haven't gotten their
|
||||
# end time set yet
|
||||
captions_to_correct = self._still_editing
|
||||
else:
|
||||
return
|
||||
|
||||
for caption in captions_to_correct:
|
||||
caption.end = end_time
|
||||
|
||||
def create_and_store(self, node_buffer, start):
|
||||
"""Interpreter method, will convert the buffer into one or more Caption
|
||||
objects, storing them internally.
|
||||
|
||||
This method relies on the InstructionNodeCreator's ability to generate
|
||||
InstructionNodes properly, so at this point we can convert
|
||||
_InstructionNodes nodes almost 1:1 to CaptionNodes
|
||||
|
||||
:type node_buffer: InstructionNodeCreator
|
||||
|
||||
:type start: float
|
||||
:param start: the start time in microseconds
|
||||
"""
|
||||
if node_buffer.is_empty():
|
||||
return
|
||||
|
||||
caption = PreCaption()
|
||||
caption.start = start
|
||||
caption.end = 0 # Not yet known; filled in later
|
||||
self._still_editing = [caption]
|
||||
|
||||
for instruction in node_buffer:
|
||||
# skip empty elements
|
||||
if instruction.is_empty():
|
||||
continue
|
||||
|
||||
elif instruction.requires_repositioning():
|
||||
caption = PreCaption()
|
||||
caption.start = start
|
||||
caption.end = 0
|
||||
self._still_editing.append(caption)
|
||||
|
||||
# handle line breaks
|
||||
elif instruction.is_explicit_break():
|
||||
caption.nodes.append(CaptionNode.create_break(
|
||||
layout_info=_get_layout_from_tuple(instruction.position)
|
||||
))
|
||||
|
||||
# handle open italics
|
||||
elif instruction.sets_italics_on():
|
||||
caption.nodes.append(
|
||||
CaptionNode.create_style(
|
||||
True, {'italics': True},
|
||||
layout_info=_get_layout_from_tuple(
|
||||
instruction.position
|
||||
))
|
||||
)
|
||||
|
||||
# handle clone italics
|
||||
elif instruction.sets_italics_off():
|
||||
caption.nodes.append(
|
||||
CaptionNode.create_style(
|
||||
False, {'italics': True},
|
||||
layout_info=_get_layout_from_tuple(
|
||||
instruction.position)
|
||||
))
|
||||
|
||||
# handle text
|
||||
elif instruction.is_text_node():
|
||||
layout_info = _get_layout_from_tuple(instruction.position)
|
||||
caption.nodes.append(
|
||||
CaptionNode.create_text(
|
||||
instruction.get_text(), layout_info=layout_info),
|
||||
)
|
||||
caption.layout_info = layout_info
|
||||
|
||||
self._collection.extend(self._still_editing)
|
||||
|
||||
def get_all(self):
|
||||
"""Returns the Caption collection as a CaptionList
|
||||
|
||||
:rtype: CaptionList
|
||||
"""
|
||||
caption_list = CaptionList()
|
||||
for precap in self._collection:
|
||||
caption_list.append(precap.to_real_caption())
|
||||
return caption_list
|
||||
|
||||
|
||||
class InstructionNodeCreator(object):
|
||||
"""Creates _InstructionNode instances from characters and commands, storing
|
||||
them internally
|
||||
"""
|
||||
def __init__(self, collection=None, position_tracker=None):
|
||||
"""
|
||||
:param collection: an optional collection of nodes
|
||||
|
||||
:param position_tracker:
|
||||
:return:
|
||||
"""
|
||||
if not collection:
|
||||
self._collection = []
|
||||
else:
|
||||
self._collection = collection
|
||||
|
||||
self._position_tracer = position_tracker
|
||||
|
||||
def is_empty(self):
|
||||
"""Whether any text was added to the buffer
|
||||
"""
|
||||
return not any(element.text for element in self._collection)
|
||||
|
||||
def add_chars(self, *chars):
|
||||
"""Adds characters to a text node (last text node, or a new one)
|
||||
|
||||
:param chars: tuple containing text (unicode)
|
||||
"""
|
||||
if not chars:
|
||||
return
|
||||
|
||||
current_position = self._position_tracer.get_current_position()
|
||||
|
||||
# get or create a usable node
|
||||
if (self._collection and self._collection[-1].is_text_node()
|
||||
and not self._position_tracer.is_repositioning_required()):
|
||||
node = self._collection[-1]
|
||||
else:
|
||||
# create first node
|
||||
node = _InstructionNode(position=current_position)
|
||||
self._collection.append(node)
|
||||
|
||||
# handle a simple line break
|
||||
if self._position_tracer.is_linebreak_required():
|
||||
# must insert a line break here
|
||||
self._collection.append(_InstructionNode.create_break(
|
||||
position=current_position))
|
||||
node = _InstructionNode.create_text(current_position)
|
||||
self._collection.append(node)
|
||||
self._position_tracer.acknowledge_linebreak_consumed()
|
||||
|
||||
# handle completely new positioning
|
||||
elif self._position_tracer.is_repositioning_required():
|
||||
self._collection.append(
|
||||
_InstructionNode.create_repositioning_command(
|
||||
current_position
|
||||
)
|
||||
)
|
||||
node = _InstructionNode.create_text(current_position)
|
||||
self._collection.append(node)
|
||||
self._position_tracer.acknowledge_position_changed()
|
||||
|
||||
node.add_chars(*chars)
|
||||
|
||||
def interpret_command(self, command):
|
||||
"""Given a command determines whether tu turn italics on or off,
|
||||
or to set the positioning
|
||||
|
||||
This is mostly used to convert from the legacy-style commands
|
||||
|
||||
:type command: unicode
|
||||
"""
|
||||
self._update_positioning(command)
|
||||
|
||||
text = COMMANDS.get(command, '')
|
||||
|
||||
if 'italic' in text:
|
||||
if 'end' not in text:
|
||||
self._collection.append(
|
||||
_InstructionNode.create_italics_style(
|
||||
self._position_tracer.get_current_position())
|
||||
)
|
||||
else:
|
||||
self._collection.append(
|
||||
_InstructionNode.create_italics_style(
|
||||
self._position_tracer.get_current_position(),
|
||||
turn_on=False
|
||||
)
|
||||
)
|
||||
|
||||
def _update_positioning(self, command):
|
||||
"""Sets the positioning information to use for the next nodes
|
||||
|
||||
:type command: unicode
|
||||
"""
|
||||
if len(command) != 4:
|
||||
return
|
||||
|
||||
first, second = command[:2], command[2:]
|
||||
|
||||
try:
|
||||
positioning = PAC_BYTES_TO_POSITIONING_MAP[first][second]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
self._position_tracer.update_positioning(positioning)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(_format_italics(self._collection))
|
||||
|
||||
@classmethod
|
||||
def from_list(cls, stash_list, position_tracker):
|
||||
"""Having received a list of instances of this class, creates a new
|
||||
instance that contains all the nodes of the previous instances
|
||||
(basically concatenates the many stashes into one)
|
||||
|
||||
:type stash_list: list[InstructionNodeCreator]
|
||||
:param stash_list: a list of instances of this class
|
||||
|
||||
:type position_tracker: .state_machines.DefaultProvidingPositionTracker
|
||||
:param position_tracker: state machine to be interrogated about the
|
||||
positioning when creating a node
|
||||
|
||||
:rtype: InstructionNodeCreator
|
||||
"""
|
||||
instance = cls(position_tracker=position_tracker)
|
||||
new_collection = instance._collection
|
||||
|
||||
for idx, stash in enumerate(stash_list):
|
||||
new_collection.extend(stash._collection)
|
||||
|
||||
# use space to separate the stashes, but don't add final space
|
||||
if idx < len(stash_list) - 1:
|
||||
try:
|
||||
instance._collection[-1].add_chars(' ')
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
return instance
|
||||
|
||||
|
||||
def _get_layout_from_tuple(position_tuple):
|
||||
"""Create a Layout object from the positioning information given
|
||||
|
||||
The row can have a value from 1 to 15 inclusive. (vertical positioning)
|
||||
The column can have a value from 0 to 31 inclusive. (horizontal)
|
||||
|
||||
:param position_tuple: a tuple of ints (row, col)
|
||||
:type position_tuple: tuple
|
||||
:rtype: Layout
|
||||
"""
|
||||
if not position_tuple:
|
||||
return None
|
||||
|
||||
row, column = position_tuple
|
||||
|
||||
horizontal = Size(100 * column / 32.0, UnitEnum.PERCENT)
|
||||
vertical = Size(100 * (row - 1) / 15.0, UnitEnum.PERCENT)
|
||||
return Layout(origin=Point(horizontal, vertical),
|
||||
alignment=Alignment(HorizontalAlignmentEnum.LEFT,
|
||||
VerticalAlignmentEnum.TOP)
|
||||
)
|
||||
|
||||
|
||||
class _InstructionNode(object):
|
||||
"""Value object, that can contain text information, or interpretable
|
||||
commands (such as explicit line breaks or turning italics on/off).
|
||||
|
||||
These nodes will be aggregated into a RepresentableNode, which will then
|
||||
be easily converted to a CaptionNode.
|
||||
"""
|
||||
TEXT = 0
|
||||
BREAK = 1
|
||||
ITALICS_ON = 2
|
||||
ITALICS_OFF = 3
|
||||
CHANGE_POSITION = 4
|
||||
|
||||
def __init__(self, text=None, position=None, type_=0):
|
||||
"""
|
||||
:type text: unicode
|
||||
:param position: a tuple of ints (row, column)
|
||||
:param type_: self.TEXT | self.BREAK | self.ITALICS
|
||||
:type type_: int
|
||||
"""
|
||||
self.text = text
|
||||
self.position = position
|
||||
self._type = type_
|
||||
|
||||
def add_chars(self, *args):
|
||||
"""This being a text node, add characters to it.
|
||||
:param args:
|
||||
:type args: tuple[unicode]
|
||||
:return:
|
||||
"""
|
||||
if self.text is None:
|
||||
self.text = ''
|
||||
|
||||
self.text += ''.join(args)
|
||||
|
||||
def is_text_node(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type == self.TEXT
|
||||
|
||||
def is_empty(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
if self._type == self.TEXT:
|
||||
return not self.text
|
||||
|
||||
return False
|
||||
|
||||
def is_explicit_break(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type == self.BREAK
|
||||
|
||||
def sets_italics_on(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type == self.ITALICS_ON
|
||||
|
||||
def sets_italics_off(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type == self.ITALICS_OFF
|
||||
|
||||
def is_italics_node(self):
|
||||
"""
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type in (self.ITALICS_OFF, self.ITALICS_ON)
|
||||
|
||||
def requires_repositioning(self):
|
||||
"""Whether the node must be interpreted as a change in positioning
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._type == self.CHANGE_POSITION
|
||||
|
||||
def get_text(self):
|
||||
"""A little legacy code.
|
||||
"""
|
||||
return ' '.join(self.text.split())
|
||||
|
||||
@classmethod
|
||||
def create_break(cls, position):
|
||||
"""Create a node, interpretable as an explicit line break
|
||||
|
||||
:type position: tuple[int]
|
||||
:param position: a tuple (row, col) containing the positioning info
|
||||
|
||||
:rtype: _InstructionNode
|
||||
"""
|
||||
return cls(type_=cls.BREAK, position=position)
|
||||
|
||||
@classmethod
|
||||
def create_text(cls, position, *chars):
|
||||
"""Create a node interpretable as text
|
||||
|
||||
:type position: tuple[int]
|
||||
:param position: a tuple (row, col) to mark the positioning
|
||||
|
||||
:type chars: tuple[unicode]
|
||||
:param chars: characters to add to the text
|
||||
|
||||
:rtype: _InstructionNode
|
||||
"""
|
||||
return cls(''.join(chars), position=position)
|
||||
|
||||
@classmethod
|
||||
def create_italics_style(cls, position, turn_on=True):
|
||||
"""Create a node, interpretable as a command to switch italics on/off
|
||||
|
||||
:type position: tuple[int]
|
||||
:param position: a tuple (row, col) to mark the positioning
|
||||
|
||||
:type turn_on: bool
|
||||
:param turn_on: whether to turn the italics on or off
|
||||
|
||||
:rtype: _InstructionNode
|
||||
"""
|
||||
return cls(
|
||||
position=position,
|
||||
type_=cls.ITALICS_ON if turn_on else cls.ITALICS_OFF
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def create_repositioning_command(cls, position=None):
|
||||
"""Create node interpretable as a command to change the current
|
||||
position
|
||||
|
||||
:type position:
|
||||
"""
|
||||
return cls(type_=cls.CHANGE_POSITION, position=position)
|
||||
|
||||
def __repr__(self): # pragma: no cover
|
||||
if self._type == self.BREAK:
|
||||
extra = 'BR'
|
||||
elif self._type == self.TEXT:
|
||||
extra = '"{}"'.format(self.text)
|
||||
elif self._type in (self.ITALICS_ON, self.ITALICS_OFF):
|
||||
extra = 'italics {}'.format(
|
||||
'on' if self._type == self.ITALICS_ON else 'off'
|
||||
)
|
||||
else:
|
||||
extra = 'change position'
|
||||
|
||||
return '<INode: {extra} >'.format(extra=extra)
|
||||
|
||||
|
||||
def _format_italics(collection):
|
||||
"""Given a raw list of _InstructionNodes, returns a new equivalent list
|
||||
where all the italics nodes properly close and open.
|
||||
|
||||
The list is equivalent in the sense that the SCC commands that would have
|
||||
generated the output list, would have had the exact same visual effect
|
||||
as the ones that generated the output, as far as italics are concerned.
|
||||
|
||||
This is useful because the raw commands read from the SCC can't be used
|
||||
the way they are by the writers for the other formats. Those other writers
|
||||
require the list of CaptionNodes to be formatted in a certain way.
|
||||
|
||||
Note: Using state machines to manage the italics didn't work well because
|
||||
we're using state machines already to track the position, and their
|
||||
interactions got crazy.
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = _skip_initial_italics_off_nodes(collection)
|
||||
|
||||
new_collection = _skip_empty_text_nodes(new_collection)
|
||||
|
||||
# after this step we're guaranteed a proper ordering of the nodes
|
||||
new_collection = _skip_redundant_italics_nodes(new_collection)
|
||||
|
||||
# after this, we're guaranteed that the italics are properly contained
|
||||
# within their context
|
||||
new_collection = _close_italics_before_repositioning(new_collection)
|
||||
|
||||
# all nodes will be closed after this step
|
||||
new_collection = _ensure_final_italics_node_closes(new_collection)
|
||||
|
||||
# removes pairs of italics nodes that don't do anything noticeable
|
||||
new_collection = _remove_noop_italics(new_collection)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _remove_noop_on_off_italics(collection):
|
||||
"""Return an equivalent list to `collection`. It removes the italics node
|
||||
pairs that don't surround text nodes, if those nodes are in the order:
|
||||
on, off
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = []
|
||||
to_commit = None
|
||||
|
||||
for node in collection:
|
||||
if node.is_italics_node() and node.sets_italics_on():
|
||||
to_commit = node
|
||||
continue
|
||||
|
||||
elif node.is_italics_node() and node.sets_italics_off():
|
||||
if to_commit:
|
||||
to_commit = None
|
||||
continue
|
||||
else:
|
||||
if to_commit:
|
||||
new_collection.append(to_commit)
|
||||
to_commit = None
|
||||
|
||||
new_collection.append(node)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _remove_noon_off_on_italics(collection):
|
||||
"""Removes pairs of off-on italics nodes, that don't surround any other
|
||||
node
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:return: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = []
|
||||
to_commit = None
|
||||
|
||||
for node in collection:
|
||||
if node.is_italics_node() and node.sets_italics_off():
|
||||
to_commit = node
|
||||
continue
|
||||
|
||||
elif node.is_italics_node() and node.sets_italics_on():
|
||||
if to_commit:
|
||||
to_commit = None
|
||||
continue
|
||||
else:
|
||||
if to_commit:
|
||||
new_collection.append(to_commit)
|
||||
to_commit = None
|
||||
|
||||
new_collection.append(node)
|
||||
|
||||
if to_commit:
|
||||
new_collection.append(to_commit)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _remove_noop_italics(collection):
|
||||
"""Return an equivalent list to `collection`. It removes the italics node
|
||||
pairs that don't surround text nodes
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = _remove_noop_on_off_italics(collection)
|
||||
|
||||
new_collection = _remove_noon_off_on_italics(new_collection)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _skip_initial_italics_off_nodes(collection):
|
||||
"""Return a collection like the one given, but without the
|
||||
initial <Italics OFF> nodes
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = []
|
||||
can_add_italics_off_nodes = False
|
||||
|
||||
for node in collection:
|
||||
if node.is_italics_node():
|
||||
if node.sets_italics_on():
|
||||
can_add_italics_off_nodes = True
|
||||
new_collection.append(node)
|
||||
elif can_add_italics_off_nodes:
|
||||
new_collection.append(node)
|
||||
else:
|
||||
new_collection.append(node)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _skip_empty_text_nodes(collection):
|
||||
"""Return an iterable containing all the nodes in the previous
|
||||
collection except for the empty text nodes
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
return [node for node in collection
|
||||
if not (node.is_text_node() and node.is_empty())]
|
||||
|
||||
|
||||
def _skip_redundant_italics_nodes(collection):
|
||||
"""Return a list where the <Italics On> nodes only appear after
|
||||
<Italics OFF>, and vice versa. This ignores the other node types, and
|
||||
only removes redundant italic nodes
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = []
|
||||
state = None
|
||||
|
||||
for node in collection:
|
||||
if node.is_italics_node():
|
||||
if state is None:
|
||||
state = node.sets_italics_on()
|
||||
new_collection.append(node)
|
||||
continue
|
||||
# skip the nodes that are like the previous
|
||||
if node.sets_italics_on() is state:
|
||||
continue
|
||||
else:
|
||||
state = node.sets_italics_on()
|
||||
new_collection.append(node)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _close_italics_before_repositioning(collection):
|
||||
"""Make sure that for every opened italic node, there's a corresponding
|
||||
closing node.
|
||||
|
||||
Will insert a closing italic node, before each repositioning node
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = []
|
||||
|
||||
italics_on = False
|
||||
last_italics_on_node = None
|
||||
|
||||
for idx, node in enumerate(collection):
|
||||
if node.is_italics_node() and node.sets_italics_on():
|
||||
italics_on = True
|
||||
last_italics_on_node = node
|
||||
if node.is_italics_node() and node.sets_italics_off():
|
||||
italics_on = False
|
||||
if node.requires_repositioning() and italics_on:
|
||||
# Append an italics closing node before the position change
|
||||
new_collection.append(
|
||||
_InstructionNode.create_italics_style(
|
||||
# The position info of this new node should be the same
|
||||
position=last_italics_on_node.position,
|
||||
turn_on=False
|
||||
)
|
||||
)
|
||||
new_collection.append(node)
|
||||
# Append an italics opening node after the positioning change
|
||||
new_collection.append(
|
||||
_InstructionNode.create_italics_style(
|
||||
position=node.position
|
||||
)
|
||||
)
|
||||
continue
|
||||
new_collection.append(node)
|
||||
|
||||
return new_collection
|
||||
|
||||
|
||||
def _ensure_final_italics_node_closes(collection):
|
||||
"""The final italics command needs to be closed
|
||||
|
||||
:type collection: list[_InstructionNode]
|
||||
:rtype: list[_InstructionNode]
|
||||
"""
|
||||
new_collection = list(collection)
|
||||
|
||||
italics_on = False
|
||||
last_italics_on_node = None
|
||||
|
||||
for node in collection:
|
||||
if node.is_italics_node() and node.sets_italics_on():
|
||||
italics_on = True
|
||||
last_italics_on_node = node
|
||||
if node.is_italics_node() and node.sets_italics_off():
|
||||
italics_on = False
|
||||
|
||||
if italics_on:
|
||||
new_collection.append(
|
||||
_InstructionNode.create_italics_style(
|
||||
position=last_italics_on_node.position,
|
||||
turn_on=False
|
||||
)
|
||||
)
|
||||
return new_collection
|
||||
128
utils/modules/pycaption/scc/state_machines.py
Normal file
128
utils/modules/pycaption/scc/state_machines.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from ..exceptions import CaptionReadSyntaxError
|
||||
|
||||
|
||||
class _PositioningTracker(object):
|
||||
"""Helps determine the positioning of a node, having kept track of
|
||||
positioning-related commands.
|
||||
"""
|
||||
def __init__(self, positioning=None):
|
||||
"""
|
||||
:param positioning: positioning information (row, column)
|
||||
:type positioning: tuple[int]
|
||||
"""
|
||||
self._positions = [positioning]
|
||||
self._break_required = False
|
||||
self._repositioning_required = False
|
||||
|
||||
def update_positioning(self, positioning):
|
||||
"""Being notified of a position change, updates the internal state,
|
||||
to as to be able to tell if it was a trivial change (a simple line
|
||||
break) or not.
|
||||
|
||||
:type positioning: tuple[int]
|
||||
:param positioning: a tuple (row, col)
|
||||
"""
|
||||
current = self._positions[-1]
|
||||
|
||||
if not current:
|
||||
if positioning:
|
||||
# set the positioning for the first time
|
||||
self._positions = [positioning]
|
||||
return
|
||||
|
||||
row, col = current
|
||||
new_row, _ = positioning
|
||||
|
||||
# is the new position simply one line below?
|
||||
if new_row == row + 1:
|
||||
self._positions.append((new_row, col))
|
||||
self._break_required = True
|
||||
else:
|
||||
# reset the "current" position altogether.
|
||||
self._positions = [positioning]
|
||||
self._repositioning_required = True
|
||||
|
||||
def get_current_position(self):
|
||||
"""Returns the current usable position
|
||||
|
||||
:rtype: tuple[int]
|
||||
|
||||
:raise: CaptionReadSyntaxError
|
||||
"""
|
||||
if not any(self._positions):
|
||||
raise CaptionReadSyntaxError(
|
||||
'No Preamble Address Code [PAC] was provided'
|
||||
)
|
||||
else:
|
||||
return self._positions[0]
|
||||
|
||||
def is_repositioning_required(self):
|
||||
"""Determines whether the current positioning has changed non-trivially
|
||||
|
||||
Trivial would be mean that a line break should suffice.
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._repositioning_required
|
||||
|
||||
def acknowledge_position_changed(self):
|
||||
"""Acknowledge the position tracer that the position was changed
|
||||
"""
|
||||
self._repositioning_required = False
|
||||
|
||||
def is_linebreak_required(self):
|
||||
"""If the current position is simply one line below the previous.
|
||||
:rtype: bool
|
||||
"""
|
||||
return self._break_required
|
||||
|
||||
def acknowledge_linebreak_consumed(self):
|
||||
"""Call to acknowledge that the line required was consumed
|
||||
"""
|
||||
self._break_required = False
|
||||
|
||||
|
||||
class DefaultProvidingPositionTracker(_PositioningTracker):
|
||||
"""A _PositioningTracker that provides if needed a default value (14, 0), or
|
||||
uses the last positioning value set anywhere in the document
|
||||
"""
|
||||
default = (14, 0)
|
||||
|
||||
def __init__(self, positioning=None, default=None):
|
||||
"""
|
||||
:type positioning: tuple[int]
|
||||
:param positioning: a tuple of ints (row, column)
|
||||
|
||||
:type default: tuple[int]
|
||||
:param default: a tuple of ints (row, column) to use as fallback
|
||||
"""
|
||||
super(DefaultProvidingPositionTracker, self).__init__(positioning)
|
||||
|
||||
if default:
|
||||
self.default = default
|
||||
|
||||
def get_current_position(self):
|
||||
"""Returns the currently tracked positioning, the last positioning that
|
||||
was set (anywhere), or the default it was initiated with
|
||||
|
||||
:rtype: tuple[int]
|
||||
"""
|
||||
try:
|
||||
return (
|
||||
super(DefaultProvidingPositionTracker, self).
|
||||
get_current_position()
|
||||
)
|
||||
except CaptionReadSyntaxError:
|
||||
return self.default
|
||||
|
||||
def update_positioning(self, positioning):
|
||||
"""If called, sets this positioning as the default, then delegates
|
||||
to the super class.
|
||||
|
||||
:param positioning: a tuple of ints (row, col)
|
||||
:type positioning: tuple[int]
|
||||
"""
|
||||
if positioning:
|
||||
self.default = positioning
|
||||
|
||||
super(DefaultProvidingPositionTracker, self).update_positioning(
|
||||
positioning)
|
||||
Reference in New Issue
Block a user