youtube-dl/youtube_dl/extractor/vice.py

from __future__ import unicode_literals
import re

from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import ExtractorError


class ViceIE(InfoExtractor):
    _VALID_URL = r'https?://(?:.+?\.)?vice\.com/.*?/(?P<name>.+)'

    _TESTS = [
        {
            'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
            'info_dict': {
                'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
                'ext': 'mp4',
                'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
            },
            'params': {
                # Requires ffmpeg (m3u8 manifest)
                'skip_download': True,
            },
        }, {
            'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
            'info_dict': {
                'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
                'ext': 'mp4',
                'title': 'VICE News - Inside the Monkey Lab',
                'description': 'md5:1f660d467d3515f29d11e5ef742a4b82',
            },
            'params': {
                # Requires ffmpeg (m3u8 manifest)
                'skip_download': True,
            },
        }
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name = mobj.group('name')
        webpage = self._download_webpage(url, name)
        try:
            embed_code = self._search_regex(
                r'embedCode=([^&\'"]+)', webpage,
                'ooyala embed code')
            ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
        except ExtractorError:
            raise ExtractorError('The page doesn\'t contain a video', expected=True)
        return self.url_result(ooyala_url, ie='Ooyala')
[vice] Re-add extractor (fixes #4120) The generic extraction no longer works. 2014-11-06 20:44:07 +00:00			`from __future__ import unicode_literals`
			`import re`

			`from .common import InfoExtractor`
			`from .ooyala import OoyalaIE`
			`from ..utils import ExtractorError`


			`class ViceIE(InfoExtractor):`
[vice] Do not capture unused groups in _VALID_URL 2015-07-19 18:39:55 +00:00			`_VALID_URL = r'https?://(?:.+?\.)?vice\.com/.*?/(?P<name>.+)'`
[vice] Re-add extractor (fixes #4120) The generic extraction no longer works. 2014-11-06 20:44:07 +00:00
fix/support news.vice.com 2015-07-19 16:31:38 +00:00			`_TESTS = [`
			`{`
			`'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1',`
			`'info_dict': {`
			`'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',`
			`'ext': 'mp4',`
			`'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',`
			`},`
			`'params': {`
			`# Requires ffmpeg (m3u8 manifest)`
			`'skip_download': True,`
			`},`
			`}, {`
			`'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',`
			`'info_dict': {`
			`'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',`
			`'ext': 'mp4',`
			`'title': 'VICE News - Inside the Monkey Lab',`
			`'description': 'md5:1f660d467d3515f29d11e5ef742a4b82',`
			`},`
			`'params': {`
			`# Requires ffmpeg (m3u8 manifest)`
			`'skip_download': True,`
			`},`
			`}`
			`]`
[vice] Re-add extractor (fixes #4120) The generic extraction no longer works. 2014-11-06 20:44:07 +00:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
			`name = mobj.group('name')`
			`webpage = self._download_webpage(url, name)`
			`try:`
			`embed_code = self._search_regex(`
			`r'embedCode=([^&\'"]+)', webpage,`
			`'ooyala embed code')`
			`ooyala_url = OoyalaIE._url_for_embed_code(embed_code)`
			`except ExtractorError:`
			`raise ExtractorError('The page doesn\'t contain a video', expected=True)`
			`return self.url_result(ooyala_url, ie='Ooyala')`