From 13267a2be37371bd3e8aea0bc7549f4ed2465c03 Mon Sep 17 00:00:00 2001 From: remitamine Date: Mon, 11 Apr 2016 14:23:55 +0100 Subject: [PATCH] [openclassroom] Add new extractor(closes #9147) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/openclassroom.py | 42 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/openclassroom.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index c234ff127..21159f5a1 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -544,6 +544,7 @@ from .ooyala import ( OoyalaIE, OoyalaExternalIE, ) +from .openclassroom import OpenClassRoomIE from .openload import OpenloadIE from .ora import OraTVIE from .orf import ( diff --git a/youtube_dl/extractor/openclassroom.py b/youtube_dl/extractor/openclassroom.py new file mode 100644 index 000000000..bbc1af7c9 --- /dev/null +++ b/youtube_dl/extractor/openclassroom.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ( + ExtractorError, + xpath_text, + clean_html, +) + + +class OpenClassRoomIE(InfoExtractor): + _VALID_URL = r'https?://openclassroom\.stanford\.edu/MainFolder/VideoPage\.php\?(?P.*)' + _TEST = { + 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', + 'md5': '544a9468546059d4e80d76265b0443b8', + 'info_dict': { + 'id': 'intro-environment', + 'ext': 'mp4', + 'title': 'Intro Environment', + 'description': 'md5:7d57306c8649f814ca00bb80dada600e', + } + } + _URL_TEMPLATE = 'http://openclassroom.stanford.edu/MainFolder/courses/%s/videos/%s' + + def _real_extract(self, url): + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) + if not qs.get('course') or not qs.get('video'): + raise ExtractorError('Unsupported URL', expected=True) + video_id = qs['video'][0] + video_doc = self._download_xml( + self._URL_TEMPLATE % (qs['course'][0], video_id + '.xml'), video_id) + return { + 'id': video_id, + 'title': xpath_text(video_doc, 'title', 'title', True), + 'url': self._URL_TEMPLATE % (qs['course'][0], xpath_text( + video_doc, 'videoFile', 'video url', True)), + 'description': clean_html(xpath_text(video_doc, 'text')), + }