[EsriVideo] Add new extractor

Add extractor for [videos.esri.com](https://videos.esri.com), a collection
of videos relating to GIS.
pull/5459/merge
Shaun Walbridge 2015-04-18 00:37:04 -04:00 committed by Sergey M․
parent d0d6c097fc
commit 8b8c1093b6
2 changed files with 91 additions and 0 deletions

View File

@ -695,6 +695,7 @@ from .vice import ViceIE
from .viddler import ViddlerIE
from .videobam import VideoBamIE
from .videodetective import VideoDetectiveIE
from .videoesri import VideoEsriIE
from .videolecturesnet import VideoLecturesNetIE
from .videofyme import VideofyMeIE
from .videomega import VideoMegaIE

View File

@ -0,0 +1,90 @@
# coding: utf-8
from __future__ import unicode_literals
import os
import re
from .common import InfoExtractor
from ..utils import (
unified_strdate
)
class VideoEsriIE(InfoExtractor):
_VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://video.esri.com/watch/4228',
'md5': '170b4d513c2466ed483c150a48384133',
'info_dict': {
'id': '4228',
'ext': 'mp4',
'title': 'AppStudio for ArcGIS',
'thumbnail': 're:^https?://.*\.jpg$',
'upload_date': '20150310',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title')
upload_date_raw = self._search_regex(
r'http-equiv="last-modified" content="(.*)"',
webpage, 'upload date')
upload_date = unified_strdate(upload_date_raw)
settings_info = self._search_regex(
r'evPlayerSettings = {(.*?);\s*$',
webpage, 'settings info', flags=re.MULTILINE | re.DOTALL)
# thumbnail includes '_x' for large, also has {_m,_t,_s} or
# without size suffix returns full image
thumbnail_path = re.findall(
r'image\': \'(\/thumbs.*)\'',
settings_info)[0]
if thumbnail_path:
thumbnail = '/'.join(['http://video.esri.com', thumbnail_path])
# note that this misses the (exceedly rare) webm files
video_paths = re.findall(r'mp4:(.*)\'', settings_info)
# find possible http servers of the mp4 files (also has rtsp)
base_url = re.findall(
r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0]
# these are the numbers used internally, but really map
# to other resolutions, e.g. 960 is 720p.
heights = [480, 720, 960]
videos_by_res = {}
for video_path in video_paths:
url = "{base_url}{video_path}".format(
base_url=base_url,
video_path=video_path)
filename, ext = os.path.splitext(video_path)
height_label = int(filename.split('_')[1])
videos_by_res[height_label] = {
'url': url,
'ext': ext[1:],
'protocol': 'http', # http-only supported currently
}
formats = []
for height in heights:
if height in videos_by_res:
formats.append(videos_by_res[height])
result = {
'id': video_id,
'title': title,
'upload_date': upload_date,
'formats': formats,
}
if thumbnail:
result['thumbnail'] = thumbnail
return result