diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index aba8b4537..7e504b75c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -52,6 +52,7 @@ __authors__ = ( 'Juan C. Olivares', 'Mattias Harrysson', 'phaer', + 'Sainyam Kapoor', ) __license__ = 'Public Domain' diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 66f71edf6..65d97d94f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -62,6 +62,7 @@ from .dotsub import DotsubIE from .dreisat import DreiSatIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE +from .divxstage import DivxStageIE from .dropbox import DropboxIE from .ebaumsworld import EbaumsWorldIE from .ehow import EHowIE @@ -156,6 +157,7 @@ from .mofosex import MofosexIE from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motorsport import MotorsportIE +from .movshare import MovShareIE from .mtv import ( MTVIE, MTVIggyIE, @@ -276,6 +278,7 @@ from .videodetective import VideoDetectiveIE from .videolecturesnet import VideoLecturesNetIE from .videofyme import VideofyMeIE from .videopremium import VideoPremiumIE +from .videoweed import VideoWeedIE from .vimeo import ( VimeoIE, VimeoChannelIE, diff --git a/youtube_dl/extractor/divxstage.py b/youtube_dl/extractor/divxstage.py new file mode 100644 index 000000000..4ca3f37a2 --- /dev/null +++ b/youtube_dl/extractor/divxstage.py @@ -0,0 +1,27 @@ +from __future__ import unicode_literals + +from .novamov import NovaMovIE + + +class DivxStageIE(NovaMovIE): + IE_NAME = 'divxstage' + IE_DESC = 'DivxStage' + + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'divxstage\.(?:eu|net|ch|co|at|ag)'} + + _HOST = 'www.divxstage.eu' + + _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' + _TITLE_REGEX = r'
\s*([^<]+)' + _DESCRIPTION_REGEX = r'
\s*[^<]+\s*

([^<]+)

' + + _TEST = { + 'url': 'http://www.divxstage.eu/video/57f238e2e5e01', + 'md5': '63969f6eb26533a1968c4d325be63e72', + 'info_dict': { + 'id': '57f238e2e5e01', + 'ext': 'flv', + 'title': 'youtubedl test video', + 'description': 'This is a test video for youtubedl.', + } + } \ No newline at end of file diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 238cc7125..cf245d9cd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -511,17 +511,18 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group(1), 'Mpora') - # Look for embedded NovaMov player + # Look for embedded NovaMov-based player mobj = re.search( - r']+?src=(["\'])(?Phttp://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) + r'''(?x)]+?src=(["\']) + (?Phttp://(?:(?:embed|www)\.)? + (?:novamov\.com| + nowvideo\.(?:ch|sx|eu|at|ag|co)| + videoweed\.(?:es|com)| + movshare\.(?:net|sx|ag)| + divxstage\.(?:eu|net|ch|co|at|ag)) + /embed\.php.+?)\1''', webpage) if mobj is not None: - return self.url_result(mobj.group('url'), 'NovaMov') - - # Look for embedded NowVideo player - mobj = re.search( - r']+?src=(["\'])(?Phttp://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'NowVideo') + return self.url_result(mobj.group('url')) # Look for embedded Facebook player mobj = re.search( diff --git a/youtube_dl/extractor/movshare.py b/youtube_dl/extractor/movshare.py new file mode 100644 index 000000000..4191cf7a0 --- /dev/null +++ b/youtube_dl/extractor/movshare.py @@ -0,0 +1,27 @@ +from __future__ import unicode_literals + +from .novamov import NovaMovIE + + +class MovShareIE(NovaMovIE): + IE_NAME = 'movshare' + IE_DESC = 'MovShare' + + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'movshare\.(?:net|sx|ag)'} + + _HOST = 'www.movshare.net' + + _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' + _TITLE_REGEX = r'Title: ([^<]+)

' + _DESCRIPTION_REGEX = r'Description: ([^<]+)

' + + _TEST = { + 'url': 'http://www.movshare.net/video/559e28be54d96', + 'md5': 'abd31a2132947262c50429e1d16c1bfd', + 'info_dict': { + 'id': '559e28be54d96', + 'ext': 'flv', + 'title': 'dissapeared image', + 'description': 'optical illusion dissapeared image magic illusion', + } + } \ No newline at end of file diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py index fd310e219..2e7ab1e4f 100644 --- a/youtube_dl/extractor/novamov.py +++ b/youtube_dl/extractor/novamov.py @@ -13,7 +13,8 @@ class NovaMovIE(InfoExtractor): IE_NAME = 'novamov' IE_DESC = 'NovaMov' - _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' % {'host': 'novamov\.com'} + _VALID_URL_TEMPLATE = r'http://(?:(?:www\.)?%(host)s/(?:file|video)/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' + _VALID_URL = _VALID_URL_TEMPLATE % {'host': 'novamov\.com'} _HOST = 'www.novamov.com' @@ -36,18 +37,17 @@ class NovaMovIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') + video_id = mobj.group('id') page = self._download_webpage( 'http://%s/video/%s' % (self._HOST, video_id), video_id, 'Downloading video page') if re.search(self._FILE_DELETED_REGEX, page) is not None: - raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) + raise ExtractorError('Video %s does not exist' % video_id, expected=True) filekey = self._search_regex(self._FILEKEY_REGEX, page, 'filekey') title = self._html_search_regex(self._TITLE_REGEX, page, 'title', fatal=False) - description = self._html_search_regex(self._DESCRIPTION_REGEX, page, 'description', default='', fatal=False) api_response = self._download_webpage( diff --git a/youtube_dl/extractor/nowvideo.py b/youtube_dl/extractor/nowvideo.py index dd665874d..bfba18418 100644 --- a/youtube_dl/extractor/nowvideo.py +++ b/youtube_dl/extractor/nowvideo.py @@ -7,7 +7,7 @@ class NowVideoIE(NovaMovIE): IE_NAME = 'nowvideo' IE_DESC = 'NowVideo' - _VALID_URL = r'http://(?:(?:www\.)?%(host)s/video/|(?:(?:embed|www)\.)%(host)s/embed\.php\?(?:.*?&)?v=)(?P[a-z\d]{13})' % {'host': 'nowvideo\.(?:ch|sx|eu)'} + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'nowvideo\.(?:ch|sx|eu|at|ag|co)'} _HOST = 'www.nowvideo.ch' diff --git a/youtube_dl/extractor/videoweed.py b/youtube_dl/extractor/videoweed.py new file mode 100644 index 000000000..4a08ddd43 --- /dev/null +++ b/youtube_dl/extractor/videoweed.py @@ -0,0 +1,26 @@ +from __future__ import unicode_literals + +from .novamov import NovaMovIE + + +class VideoWeedIE(NovaMovIE): + IE_NAME = 'videoweed' + IE_DESC = 'VideoWeed' + + _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': 'videoweed\.(?:es|com)'} + + _HOST = 'www.videoweed.es' + + _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<' + _TITLE_REGEX = r'

([^<]+)

' + + _TEST = { + 'url': 'http://www.videoweed.es/file/b42178afbea14', + 'md5': 'abd31a2132947262c50429e1d16c1bfd', + 'info_dict': { + 'id': 'b42178afbea14', + 'ext': 'flv', + 'title': 'optical illusion dissapeared image magic illusion', + 'description': '' + }, + } \ No newline at end of file