From 31f50c8194f12c27ac6fbfe336f1d515aa8677ae Mon Sep 17 00:00:00 2001 From: dirkf Date: Sun, 27 Aug 2023 19:08:28 +0100 Subject: [PATCH] [S4C] Add thumbnail extraction, extract series as playlist Based on https://github.com/yt-dlp/yt-dlp/pull/7776: thx ifan-t, bashonly --- youtube_dl/extractor/extractors.py | 5 ++- youtube_dl/extractor/s4c.py | 62 ++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cb39876c2..d9289e5bf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1087,7 +1087,10 @@ from .rutube import ( from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import RuvIE -from .s4c import S4CIE +from .s4c import ( + S4CIE, + S4CSeriesIE, +) from .safari import ( SafariIE, SafariApiIE, diff --git a/youtube_dl/extractor/s4c.py b/youtube_dl/extractor/s4c.py index 21d40c2d3..b152e6680 100644 --- a/youtube_dl/extractor/s4c.py +++ b/youtube_dl/extractor/s4c.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals +from functools import partial as partial_f + from .common import InfoExtractor from ..utils import ( float_or_none, @@ -9,6 +11,7 @@ from ..utils import ( T, traverse_obj, txt_or_none, + url_or_none, ) @@ -21,7 +24,8 @@ class S4CIE(InfoExtractor): 'ext': 'mp4', 'title': 'Y Swn', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', - 'duration': 5340 + 'duration': 5340, + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', }, }, { 'url': 'https://www.s4c.cymru/clic/programme/856636948', @@ -31,6 +35,7 @@ class S4CIE(InfoExtractor): 'title': 'Am Dro', 'duration': 2880, 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', }, }] @@ -43,7 +48,7 @@ class S4CIE(InfoExtractor): 'programme_id': video_id, }, fatal=False) - filename = self._download_json( + player_config = self._download_json( 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ 'programme_id': video_id, 'signed': '0', @@ -51,7 +56,8 @@ class S4CIE(InfoExtractor): 'mode': 'od', 'appId': 'clic', 'streamName': '', - }, note='Downloading player config JSON')['filename'] + }, note='Downloading player config JSON') + m3u8_url = self._download_json( 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'mode': 'od', @@ -59,18 +65,60 @@ class S4CIE(InfoExtractor): 'region': 'WW', 'extra': 'false', 'thirdParty': 'false', - 'filename': filename, + 'filename': player_config['filename'], }, note='Downloading streaming urls JSON')['hls'] - # ... self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') - formats, subtitles = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native'), {} + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', entry_protocol='m3u8_native') + self._sort_formats(formats) + + subtitles = {} + for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))): + subtitles.setdefault(sub.get('3', 'en'), []).append({ + 'url': sub['0'], + 'name': sub.get('1'), + }) return merge_dicts({ 'id': video_id, 'formats': formats, 'subtitles': subtitles, + 'thumbnail': url_or_none(player_config.get('poster')), }, traverse_obj(details, ('full_prog_details', 0, { 'title': (('programme_title', 'series_title'), T(txt_or_none)), 'description': ('full_billing', T(txt_or_none)), - 'duration': ('duration', T(lambda x: float_or_none(x, invscale=60))), + 'duration': ('duration', T(partial_f(float_or_none, invscale=60))), }), get_all=False), rev=True) + + +class S4CSeriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.s4c.cymru/clic/series/864982911', + 'playlist_mincount': 6, + 'info_dict': { + 'id': '864982911', + 'title': 'Iaith ar Daith', + }, + }, { + 'url': 'https://www.s4c.cymru/clic/series/866852587', + 'playlist_mincount': 8, + 'info_dict': { + 'id': '866852587', + 'title': 'FFIT Cymru', + }, + }] + + def _real_extract(self, url): + series_id = self._match_id(url) + series_details = self._download_json( + 'https://www.s4c.cymru/df/series_details', series_id, query={ + 'lang': 'e', + 'series_id': series_id, + 'show_prog_in_series': 'Y' + }, note='Downloading series details JSON') + + return self.playlist_result( + (self.url_result('https://www.s4c.cymru/clic/programme/' + episode_id, S4CIE, episode_id) + for episode_id in traverse_obj(series_details, ('other_progs_in_series', Ellipsis, 'id'))), + playlist_id=series_id, playlist_title=traverse_obj( + series_details, ('full_prog_details', 0, 'series_title', T(txt_or_none))))