[rtvs] Fixed extractor for Slovak television and radio

2024-12-22 22:35:21 +00:00 · 2022-01-15 21:08:06 +01:00 · 2022-01-15 21:08:06 +01:00 · 63af7465cc
commit 63af7465cc
parent 5014bd67c2
1 changed files with 69 additions and 8 deletions
--- a/youtube_dl/extractor/rtvs.py
+++ b/youtube_dl/extractor/rtvs.py
@ -1,7 +1,13 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import re
+
 from .common import InfoExtractor
+from ..utils import (
+    url_or_none,
+    determine_ext
+)


 class RTVSIE(InfoExtractor):
@ -26,7 +32,8 @@ class RTVSIE(InfoExtractor):
            'id': '63118',
            'ext': 'mp4',
            'title': 'Amaro Džives - Náš deň',
-            'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
+            'description':
+            'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
        },
        'params': {
            'skip_download': True,
@ -36,12 +43,66 @@ class RTVSIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        webpage = self._download_webpage(url, video_id)
+        if url.find('/radio/') != -1:
+            a2 = url.split('/')[-1]
+            a1 = url.split('/')[-2]
+            embed = self._download_webpage(
+                "https://www.rtvs.sk/embed/radio/archive/%s/%s" % (a1, a2),
+                video_id)
+            audio_id = re.search('audio5f.json?id=(?P<id>[^\"]+)', embed)
+            audio_id = audio_id.group('id')
+            info = self._download_json(
+                "https://www.rtvs.sk/json/audio5f.json?id=%s" % audio_id,
+                audio_id)

-        playlist_url = self._search_regex(
-            r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-            'playlist url', group='url')
+            formats = []
+            formats.append({
+                'url': info['playlist'][0]['sources'][0]['src'],
+                'format_id':  None,
+                'height': 0
+                })
+            info = info['playlist'][0]
+            return {
+                'id': audio_id,
+                'title': info.get('title'),
+                'thumbnail': info.get('image'),
+                'formats': formats
+            }
+        else:
+            info = self._download_json(
+                "https://www.rtvs.sk/json/archive5f.json?id=%s" % video_id,
+                video_id)
+            info = info.get('clip')

-        data = self._download_json(
-            playlist_url, video_id, 'Downloading playlist')[0]
-        return self._parse_jwplayer_data(data, video_id=video_id)
+            formats = []
+            for format_id, format_list in info.items():
+                if not isinstance(format_list, list):
+                    format_list = [format_list]
+                for format_dict in format_list:
+                    if not isinstance(format_dict, dict):
+                        continue
+                    format_url = url_or_none(format_dict.get('src'))
+                    format_type = format_dict.get('type')
+                    ext = determine_ext(format_url)
+                    if (format_type == 'application/x-mpegURL'
+                            or format_id == 'HLS' or ext == 'm3u8'):
+                        formats.extend(self._extract_m3u8_formats(
+                            format_url, video_id, 'mp4',
+                            entry_protocol='m3u8_native', m3u8_id='hls',
+                            fatal=False))
+                    elif (format_type == 'application/dash+xml'
+                          or format_id == 'DASH' or ext == 'mpd'):
+                        pass
+                    else:
+                        formats.append({
+                            'url': format_url,
+                        })
+            formats = sorted(formats, key=lambda i: i['tbr'])
+            dt = info.get('datetime_create')
+            return {
+                'id': video_id,
+                'title': info.get('title') + '-' + dt[:10],
+                'thumbnail': info.get('image'),
+                'description': info.get('description'),
+                'formats': formats
+            }