From 47b0c8697a39bbd64d5b922f81ad74ee4d2a3136 Mon Sep 17 00:00:00 2001 From: dirkf Date: Mon, 7 Feb 2022 13:28:21 +0000 Subject: [PATCH] [ARD] Back-port subtitle extraction from yt-dlp PR 2409 Authored by: fstirlitz Fixes #30543 Closes #17766 (thanks ngdio) --- youtube_dl/extractor/ard.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index d45a9fe52..a5b1f54d5 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -332,9 +332,24 @@ class ARDIE(InfoExtractor): formats.append(f) self._sort_formats(formats) + _SUB_FORMATS = ( + ('./dataTimedText', 'ttml'), + ('./dataTimedTextNoOffset', 'ttml'), + ('./dataTimedTextVtt', 'vtt'), + ) + + subtitles = {} + for subsel, subext in _SUB_FORMATS: + for node in video_node.findall(subsel): + subtitles.setdefault('de', []).append({ + 'url': node.attrib['url'], + 'ext': subext, + }) + return { 'id': xpath_text(video_node, './videoId', default=display_id), 'formats': formats, + 'subtitles': subtitles, 'display_id': display_id, 'title': video_node.find('./title').text, 'duration': parse_duration(video_node.find('./duration').text),