[nrk] improve format extraction

This commit is contained in:
Remita Amine 2020-12-05 09:13:42 +01:00
parent 1b26bfd425
commit 4c93b2fd15

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools import itertools
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -22,13 +23,26 @@ from ..utils import (
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
_CDN_REPL_REGEX = r'''(?x)://
(?:
nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
nrk-od-no\.telenorcdn\.net|
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
)/'''
def _extract_nrk_formats(self, asset_url, video_id): def _extract_nrk_formats(self, asset_url, video_id):
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url): if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
return self._extract_akamai_formats( return self._extract_akamai_formats(
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id) re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
return self._extract_m3u8_formats( asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url), formats = self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
formats = self._extract_m3u8_formats(
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
video_id, 'mp4', 'm3u8_native', fatal=False) video_id, 'mp4', 'm3u8_native', fatal=False)
return formats
def _raise_error(self, data): def _raise_error(self, data):
MESSAGES = { MESSAGES = {
@ -107,8 +121,10 @@ class NRKIE(NRKBaseIE):
def _extract_from_playback(self, video_id): def _extract_from_playback(self, video_id):
path_templ = 'playback/%s/' + video_id path_templ = 'playback/%s/' + video_id
call_playback_api = lambda x: self._call_api(path_templ % x, video_id, x) def call_playback_api(item, query=None):
manifest = call_playback_api('manifest') return self._call_api(path_templ % item, video_id, item, query=query)
# known values for preferredCdn: akamai, iponly, minicdn and telenor
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
if manifest.get('playability') == 'nonPlayable': if manifest.get('playability') == 'nonPlayable':
self._raise_error(manifest['nonPlayable']) self._raise_error(manifest['nonPlayable'])
@ -195,7 +211,6 @@ class NRKTVIE(NRKBaseIE):
'series': '20 spørsmål', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
'skip': 'NoProgramRights',
}, { }, {
'url': 'https://tv.nrk.no/program/mdfp15000514', 'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': { 'info_dict': {
@ -214,15 +229,15 @@ class NRKTVIE(NRKBaseIE):
# single playlist video # single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'info_dict': { 'info_dict': {
'id': 'MSPO40010515-part2', 'id': 'MSPO40010515AH',
'ext': 'flv', 'ext': 'mp4',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Video is geo restricted'], 'expected_warnings': ['Failed to download m3u8 information'],
'skip': 'particular part is not supported currently', 'skip': 'particular part is not supported currently',
}, { }, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
@ -232,7 +247,7 @@ class NRKTVIE(NRKBaseIE):
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:c03aba1e917561eface5214020551b7a', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'skip': 'Video is geo restricted', 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
'info_dict': { 'info_dict': {
@ -312,6 +327,7 @@ class NRKTVIE(NRKBaseIE):
asset_url = asset.get('url') asset_url = asset.get('url')
if not asset_url or asset_url in urls: if not asset_url or asset_url in urls:
continue continue
urls.append(asset_url)
formats = self._extract_nrk_formats(asset_url, video_id) formats = self._extract_nrk_formats(asset_url, video_id)
if not formats: if not formats:
continue continue