youtube-dl/youtube_dl/extractor/websurg.py

60 lines
1.9 KiB
Python
Raw Permalink Normal View History

2013-09-17 20:13:40 +00:00
# coding: utf-8
import re
from ..utils import (
compat_urllib_request,
compat_urllib_parse
)
from .common import InfoExtractor
class WeBSurgIE(InfoExtractor):
IE_NAME = u'websurg.com'
_VALID_URL = r'http://.*?\.websurg\.com/MEDIA/\?noheader=1&doi=(.*)'
_TEST = {
u'url': u'http://www.websurg.com/MEDIA/?noheader=1&doi=vd01en4012',
u'file': u'vd01en4012.mp4',
u'params': {
u'skip_download': True,
},
u'skip': u'Requires login information',
2013-09-17 20:13:40 +00:00
}
_LOGIN_URL = 'http://www.websurg.com/inc/login/login_div.ajax.php?login=1'
2013-09-17 22:07:04 +00:00
def _real_initialize(self):
2013-09-17 20:13:40 +00:00
login_form = {
'username': self._downloader.params['username'],
'password': self._downloader.params['password'],
'Submit': 1
}
request = compat_urllib_request.Request(
self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
request.add_header(
'Content-Type', 'application/x-www-form-urlencoded;charset=utf-8')
2013-09-17 22:07:04 +00:00
compat_urllib_request.urlopen(request).info()
2013-10-03 23:12:42 +00:00
webpage = self._download_webpage(self._LOGIN_URL, '', 'Logging in')
2013-09-17 20:13:40 +00:00
2013-10-03 23:04:38 +00:00
if webpage != 'OK':
self._downloader.report_error(
u'Unable to log in: bad username/password')
2013-09-17 20:13:40 +00:00
2013-10-03 23:04:38 +00:00
def _real_extract(self, url):
2013-09-17 20:13:40 +00:00
video_id = re.match(self._VALID_URL, url).group(1)
2013-10-03 23:04:38 +00:00
webpage = self._download_webpage(url, video_id)
2013-09-17 20:13:40 +00:00
url_info = re.search(r'streamer="(.*?)" src="(.*?)"', webpage)
return {'id': video_id,
2013-09-17 22:07:04 +00:00
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
2013-09-17 20:13:40 +00:00
'ext' : 'mp4',
'url' : url_info.group(1) + '/' + url_info.group(2),
2013-09-17 22:07:04 +00:00
'thumbnail': self._og_search_thumbnail(webpage)
2013-09-17 20:13:40 +00:00
}