Added a basic shell for a kankids extractor.

This commit is contained in:
deepspy 2024-06-24 00:10:22 +03:00
parent 4d05f84325
commit 833fe8c9af
2 changed files with 45 additions and 0 deletions

View File

@ -560,6 +560,7 @@ from .jwplatform import JWPlatformIE
from .kakao import KakaoIE
from .kaltura import KalturaIE
from .kankan import KankanIE
from .kankids import KanKidsIE
from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE

View File

@ -0,0 +1,44 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class KanKidsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kankids\.org\.il/content/kids/(?P<category>[a-z]+)-main/p-(?P<id>[0-9]+)/(?P<season>\w+/)?$'
_TEST = {
'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
'thumbnail': r're:^https?://.*\.jpg$',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type (for example int or float)
}
}
def _real_extract(self, url):
a = super()._match_valid_url(url)
print(a.groupdict())
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# TODO more code goes here, for example ...
# title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
title = 'hi'
return {
'id': video_id,
'title': title,
'description': self._og_search_description(webpage),
'url': 'https://www.kankids.org.il/content/kids/hinuchit-main/p-12050/s1/89707/',
'ie_key': 'Generic',
'_type': 'url',
# 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
# TODO more properties (see youtube_dl/extractor/common.py)
}