diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8f65c6499..fe30758ef 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -773,11 +773,20 @@ class YoutubeDL(object): def extract_info(self, url, download=True, ie_key=None, extra_info={}, process=True, force_generic_extractor=False): - ''' - Returns a list with a dictionary for each video we find. - If 'download', also downloads the videos. - extra_info is a dict containing the extra values to add to each result - ''' + """ + Return a list with a dictionary for each video extracted. + + Arguments: + url -- URL to extract + + Keyword arguments: + download -- whether to download videos during extraction + ie_key -- extractor key hint + extra_info -- dictionary containing the extra values to add to each result + process -- whether to resolve all unresolved references (URLs, playlist items), + must be True for download to work. + force_generic_extractor -- force using the generic extractor + """ if not ie_key and force_generic_extractor: ie_key = 'Generic' diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py deleted file mode 100644 index db5e12b21..000000000 --- a/youtube_dl/extractor/blinkx.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import ( - remove_start, - int_or_none, -) - - -class BlinkxIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P[^?]+)' - IE_NAME = 'blinkx' - - _TEST = { - 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', - 'md5': '337cf7a344663ec79bf93a526a2e06c7', - 'info_dict': { - 'id': 'Da0Gw3xc', - 'ext': 'mp4', - 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', - 'uploader': 'IGN News', - 'upload_date': '20150217', - 'timestamp': 1424215740, - 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', - 'duration': 47.743333, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - display_id = video_id[:8] - - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' - + 'video=%s' % video_id) - data_json = self._download_webpage(api_url, display_id) - data = json.loads(data_json)['api']['results'][0] - duration = None - thumbnails = [] - formats = [] - for m in data['media']: - if m['type'] == 'jpg': - thumbnails.append({ - 'url': m['link'], - 'width': int(m['w']), - 'height': int(m['h']), - }) - elif m['type'] == 'original': - duration = float(m['d']) - elif m['type'] == 'youtube': - yt_id = m['link'] - self.to_screen('Youtube video detected: %s' % yt_id) - return self.url_result(yt_id, 'Youtube', video_id=yt_id) - elif m['type'] in ('flv', 'mp4'): - vcodec = remove_start(m['vcodec'], 'ff') - acodec = remove_start(m['acodec'], 'ff') - vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) - abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) - tbr = vbr + abr if vbr and abr else None - format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) - formats.append({ - 'format_id': format_id, - 'url': m['link'], - 'vcodec': vcodec, - 'acodec': acodec, - 'abr': abr, - 'vbr': vbr, - 'tbr': tbr, - 'width': int_or_none(m.get('w')), - 'height': int_or_none(m.get('h')), - }) - - self._sort_formats(formats) - - return { - 'id': display_id, - 'fullid': video_id, - 'title': data['title'], - 'formats': formats, - 'uploader': data['channel_name'], - 'timestamp': data['pubdate_epoch'], - 'description': data.get('description'), - 'thumbnails': thumbnails, - 'duration': duration, - } diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py index c345e0274..276fd4b09 100644 --- a/youtube_dl/extractor/dispeak.py +++ b/youtube_dl/extractor/dispeak.py @@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor): # From http://www.gdcvault.com/play/1013700/Advanced-Material 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', 'only_matching': True, + }, { + # From https://gdcvault.com/play/1016624, empty speakerVideo + 'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml', + 'info_dict': { + 'id': '201210-822101_1349794556671DDDD', + 'ext': 'flv', + 'title': 'Pre-launch - Preparing to Take the Plunge', + }, + }, { + # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo + 'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml', + 'only_matching': True, }] def _parse_mp4(self, metadata): @@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor): 'vcodec': 'none', 'format_id': audio.get('code'), }) - slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(slide_video_path, '.flv'), - 'ext': 'flv', - 'format_note': 'slide deck video', - 'quality': -2, - 'preference': -2, - 'format_id': 'slides', - }) - speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) - formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, - 'play_path': remove_end(speaker_video_path, '.flv'), - 'ext': 'flv', - 'format_note': 'speaker video', - 'quality': -1, - 'preference': -1, - 'format_id': 'speaker', - }) + for video_key, format_id, preference in ( + ('slide', 'slides', -2), ('speaker', 'speaker', -1)): + video_path = xpath_text(metadata, './%sVideo' % video_key) + if not video_path: + continue + formats.append({ + 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'play_path': remove_end(video_path, '.flv'), + 'ext': 'flv', + 'format_note': '%s video' % video_key, + 'quality': preference, + 'preference': preference, + 'format_id': format_id, + }) return formats def _real_extract(self, url): diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 527632432..825d1c0f8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -132,7 +132,6 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) -from .blinkx import BlinkxIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py index c2e102a91..5a105614e 100644 --- a/youtube_dl/extractor/funimation.py +++ b/youtube_dl/extractor/funimation.py @@ -17,7 +17,7 @@ from ..utils import ( class FunimationIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P[^/?#&]+)' _NETRC_MACHINE = 'funimation' _TOKEN = None @@ -52,6 +52,10 @@ class FunimationIE(InfoExtractor): }, { 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'only_matching': True, + }, { + # with lang code + 'url': 'https://www.funimation.com/en/shows/hacksign/role-play/', + 'only_matching': True, }] def _login(self): diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 2f555c1d4..acc6478b8 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from .kaltura import KalturaIE from ..utils import ( HEADRequest, + remove_start, sanitized_Request, smuggle_url, urlencode_postdata, @@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor): 'format': 'mp4-408', }, }, + { + # Kaltura embed, whitespace between quote and embedded URL in iframe's src + 'url': 'https://www.gdcvault.com/play/1025699', + 'info_dict': { + 'id': '0_zagynv0a', + 'ext': 'mp4', + 'title': 'Tech Toolbox', + 'upload_date': '20190408', + 'uploader_id': 'joe@blazestreaming.com', + 'timestamp': 1554764629, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # HTML5 video + 'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru', + 'only_matching': True, + }, ] def _login(self, webpage_url, display_id): @@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor): xml_name = self._html_search_regex( r'