add direct screenwavemedia.com URL support

pull/4411/head
felix 2014-12-07 11:51:02 +01:00
parent 603c92080f
commit 684712076f
2 changed files with 61 additions and 17 deletions

View File

@ -50,7 +50,6 @@ from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
@ -335,6 +334,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screenwavemedia import ScreenwaveMediaIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE

View File

@ -9,15 +9,18 @@ from ..utils import (
int_or_none, int_or_none,
) )
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?' \
r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
r')'
class CinemassacreIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190', 'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': { 'info_dict': {
'id': '19911', 'id': 'Cinemasssacre-19911',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20121110', 'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer', 'title': '“Angry Video Game Nerd: The Movie” Trailer',
@ -28,7 +31,7 @@ class CinemassacreIE(InfoExtractor):
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511', 'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': { 'info_dict': {
'id': '521be8ef82b16', 'id': 'Cinemasssacre-521be8ef82b16',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20131002', 'upload_date': '20131002',
'title': 'The Mummys Hand (1940)', 'title': 'The Mummys Hand (1940)',
@ -36,18 +39,16 @@ class CinemassacreIE(InfoExtractor):
} }
] ]
def _real_extract(self, url): def _cinemassacre_get_info(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id') display_id = mobj.group('cm_display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj: if not mobj:
raise ExtractorError('Can\'t extract embed url and video id') raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url') playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id')
full_video_id = mobj.group('full_video_id')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title') r'<title>(?P<title>.+?)\|', webpage, 'title')
@ -56,10 +57,28 @@ class CinemassacreIE(InfoExtractor):
webpage, 'description', flags=re.DOTALL, fatal=False) webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage) video_thumbnail = self._og_search_thumbnail(webpage)
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _screenwavemedia_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
video_id = mobj.group('video_id')
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex( vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
pageurl = self._search_regex(
r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
videolist_url = None videolist_url = None
@ -67,7 +86,7 @@ class CinemassacreIE(InfoExtractor):
if mobj: if mobj:
videoserver = mobj.group('videoserver') videoserver = mobj.group('videoserver')
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
vidid = mobj.group('vidid') if mobj else full_video_id vidid = mobj.group('vidid') if mobj else video_id
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
else: else:
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@ -110,9 +129,34 @@ class CinemassacreIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': vidtitle,
'formats': formats, 'formats': formats,
'description': video_description, '_episode_page': pageurl,
'upload_date': video_date,
'thumbnail': video_thumbnail,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
swm_info = None
site_info = None
if mobj.group('generic'):
swm_info = self._screenwavemedia_get_info(url)
url = swm_info['_episode_page']
mobj = re.match(self._VALID_URL, url)
if mobj:
if mobj.group('cinemassacre'):
site_info = self._cinemassacre_get_info(url)
if not swm_info:
if site_info:
swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
if not swm_info:
raise ExtractorError("Failed to extract metadata for this URL")
if site_info:
swm_info.update(site_info)
return swm_info