[extractor/Smotrim] Add extractor (#5015)

Authored by: nikita-moor, Lesmiscore
This commit is contained in:
Lesmiscore 2022-09-24 18:30:31 +09:00 committed by GitHub
parent d42763a443
commit faf7863bb0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 0 deletions

View file

@ -1619,6 +1619,7 @@
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .smotrim import SmotrimIE
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .sonyliv import ( from .sonyliv import (

View file

@ -0,0 +1,65 @@
from .common import InfoExtractor
from ..utils import ExtractorError
class SmotrimIE(InfoExtractor):
_VALID_URL = r'https?://smotrim\.ru/(?P<type>brand|video|article|live)/(?P<id>[0-9]+)'
_TESTS = [{ # video
'url': 'https://smotrim.ru/video/1539617',
'md5': 'b1923a533c8cab09679789d720d0b1c5',
'info_dict': {
'id': '1539617',
'ext': 'mp4',
'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16',
'description': '',
},
'add_ie': ['RUTV'],
}, { # article (geo-restricted? plays fine from the US and JP)
'url': 'https://smotrim.ru/article/2813445',
'md5': 'e0ac453952afbc6a2742e850b4dc8e77',
'info_dict': {
'id': '2431846',
'ext': 'mp4',
'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"',
'description': 'md5:94a4a22472da4252bf5587a4ee441b99',
},
'add_ie': ['RUTV'],
}, { # brand, redirect
'url': 'https://smotrim.ru/brand/64356',
'md5': '740472999ccff81d7f6df79cecd91c18',
'info_dict': {
'id': '2354523',
'ext': 'mp4',
'title': 'Большие и маленькие. Лучшее. 4-й выпуск',
'description': 'md5:84089e834429008371ea41ea3507b989',
},
'add_ie': ['RUTV'],
}, { # live
'url': 'https://smotrim.ru/live/19201',
'info_dict': {
'id': '19201',
'ext': 'mp4',
# this looks like a TV channel name
'title': 'Россия Культура. Прямой эфир',
'description': '',
},
'add_ie': ['RUTV'],
}]
def _real_extract(self, url):
video_id, typ = self._match_valid_url(url).group('id', 'type')
rutv_type = 'video'
if typ not in ('video', 'live'):
webpage = self._download_webpage(url, video_id, f'Resolving {typ} link')
# there are two cases matching regex:
# 1. "embedUrl" in JSON LD (/brand/)
# 2. "src" attribute from iframe (/article/)
video_id = self._search_regex(
r'"https://player.smotrim.ru/iframe/video/id/(?P<video_id>\d+)/',
webpage, 'video_id', default=None)
if not video_id:
raise ExtractorError('There are no video in this page.', expected=True)
elif typ == 'live':
rutv_type = 'live'
return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}')