[extractor/udemy] Fix lectures that have no URL and detect DRM

Closes #5662
This commit is contained in:
pukkandan 2022-12-31 09:45:12 +05:30
parent 9bb856998b
commit 8d1ddb0805
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -11,8 +11,10 @@
int_or_none, int_or_none,
js_to_json, js_to_json,
sanitized_Request, sanitized_Request,
smuggle_url,
try_get, try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -106,7 +108,7 @@ def _download_lecture(self, course_id, lecture_id):
% (course_id, lecture_id), % (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={ lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset', 'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
}) })
def _handle_error(self, response): def _handle_error(self, response):
@ -199,16 +201,19 @@ def is_logged(webpage):
def _real_extract(self, url): def _real_extract(self, url):
lecture_id = self._match_id(url) lecture_id = self._match_id(url)
course_id = unsmuggle_url(url, {})[1].get('course_id')
webpage = self._download_webpage(url, lecture_id) webpage = None
if not course_id:
course_id, _ = self._extract_course_info(webpage, lecture_id) webpage = self._download_webpage(url, lecture_id)
course_id, _ = self._extract_course_info(webpage, lecture_id)
try: try:
lecture = self._download_lecture(course_id, lecture_id) lecture = self._download_lecture(course_id, lecture_id)
except ExtractorError as e: except ExtractorError as e:
# Error could possibly mean we are not enrolled in the course # Error could possibly mean we are not enrolled in the course
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
webpage = webpage or self._download_webpage(url, lecture_id)
self._enroll_course(url, webpage, course_id) self._enroll_course(url, webpage, course_id)
lecture = self._download_lecture(course_id, lecture_id) lecture = self._download_lecture(course_id, lecture_id)
else: else:
@ -391,6 +396,9 @@ def extract_subtitles(track_list):
if f.get('url'): if f.get('url'):
formats.append(f) formats.append(f)
if not formats and asset.get('course_is_drmed'):
self.report_drm(video_id)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -449,7 +457,9 @@ def _real_extract(self, url):
if lecture_id: if lecture_id:
entry = { entry = {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), 'url': smuggle_url(
f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
{'course_id': course_id}),
'title': entry.get('title'), 'title': entry.get('title'),
'ie_key': UdemyIE.ie_key(), 'ie_key': UdemyIE.ie_key(),
} }