[extractor/cda] Support premium and misc improvements (#5529)

* Fix cache for non-ASCII key
* Improve error messages
* Better UA for fingerprint bypass

Authored by: selfisekai
This commit is contained in:
lauren n. liberda 2022-12-27 20:57:26 +01:00 committed by GitHub
parent 15e9e578c0
commit da8d2de208
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 12 deletions

View file

@ -5,6 +5,7 @@
import re
import shutil
import traceback
import urllib.parse
from .utils import expand_path, traverse_obj, version_tuple, write_json_file
from .version import __version__
@ -22,11 +23,9 @@ def _get_root_dir(self):
return expand_path(res)
def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
'invalid section %r' % section
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
return os.path.join(
self._get_root_dir(), section, f'{key}.{dtype}')
assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
@property
def enabled(self):

View file

@ -4,6 +4,7 @@
import hashlib
import hmac
import json
import random
import re
from .common import InfoExtractor
@ -27,11 +28,10 @@ class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_NETRC_MACHINE = 'cdapl'
_BASE_URL = 'http://www.cda.pl/'
_BASE_URL = 'https://www.cda.pl'
_BASE_API_URL = 'https://api.cda.pl'
_API_HEADERS = {
'Accept': 'application/vnd.cda.public+json',
'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
}
# hardcoded in the app
_LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
@ -101,6 +101,38 @@ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
}, **kwargs)
def _perform_login(self, username, password):
app_version = random.choice((
'1.2.88 build 15306',
'1.2.174 build 18469',
))
android_version = random.randrange(8, 14)
phone_model = random.choice((
# x-kom.pl top selling Android smartphones, as of 2022-12-26
# https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
'ASUS ZenFone 8',
'Motorola edge 20 5G',
'Motorola edge 30 neo 5G',
'Motorola moto g22',
'OnePlus Nord 2T 5G',
'Samsung Galaxy A32 SMA325F',
'Samsung Galaxy M13',
'Samsung Galaxy S20 FE 5G',
'Xiaomi 11T',
'Xiaomi POCO M4 Pro',
'Xiaomi Redmi 10',
'Xiaomi Redmi 10C',
'Xiaomi Redmi 9C NFC',
'Xiaomi Redmi Note 10 Pro',
'Xiaomi Redmi Note 11 Pro',
'Xiaomi Redmi Note 11',
'Xiaomi Redmi Note 11S 5G',
'Xiaomi Redmi Note 11S',
'realme 10',
'realme 9 Pro+',
'vivo Y33s',
))
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
@ -138,9 +170,6 @@ def _api_extract(self, video_id):
meta = self._download_json(
f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
if meta.get('premium') and not meta.get('premium_free'):
self.report_drm(video_id)
uploader = traverse_obj(meta, 'author', 'login')
formats = [{
@ -151,6 +180,10 @@ def _api_extract(self, video_id):
'filesize': quality.get('length'),
} for quality in meta['qualities'] if quality.get('file')]
if meta.get('premium') and not meta.get('premium_free') and not formats:
raise ExtractorError(
'Video requires CDA Premium - subscription needed', expected=True)
return {
'id': video_id,
'title': meta.get('title'),
@ -167,10 +200,10 @@ def _api_extract(self, video_id):
def _web_extract(self, video_id, url):
self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id)
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
self.raise_login_required('This video is only available for premium users')
if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
self.raise_geo_restricted()