mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-19 06:39:16 +00:00
Merge remote-tracking branch 'origin/master' into yt-live-from-start-range
This commit is contained in:
commit
444e02ef3b
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
|
@ -192,7 +192,7 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt
|
||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==5.8 -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
|
28
README.md
28
README.md
|
@ -463,15 +463,11 @@ ## Geo-restriction:
|
|||
specified by --proxy (or none, if the option
|
||||
is not present) is used for the actual
|
||||
downloading
|
||||
--geo-bypass Bypass geographic restriction via faking
|
||||
X-Forwarded-For HTTP header (default)
|
||||
--no-geo-bypass Do not bypass geographic restriction via
|
||||
faking X-Forwarded-For HTTP header
|
||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
||||
explicitly provided two-letter ISO 3166-2
|
||||
country code
|
||||
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
||||
explicitly provided IP block in CIDR notation
|
||||
--xff VALUE How to fake X-Forwarded-For HTTP header to
|
||||
try bypassing geographic restriction. One of
|
||||
"default" (Only when known to be useful),
|
||||
"never", a two-letter ISO 3166-2 country
|
||||
code, or an IP block in CIDR notation
|
||||
|
||||
## Video Selection:
|
||||
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
||||
|
@ -752,6 +748,7 @@ ## Internet Shortcut Options:
|
|||
## Verbosity and Simulation Options:
|
||||
-q, --quiet Activate quiet mode. If used with --verbose,
|
||||
print the log to stderr
|
||||
--no-quiet Deactivate quiet mode. (Default)
|
||||
--no-warnings Ignore warnings
|
||||
-s, --simulate Do not download the video and do not write
|
||||
anything to disk
|
||||
|
@ -1246,7 +1243,7 @@ # OUTPUT TEMPLATE
|
|||
|
||||
1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
||||
|
||||
1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
|
||||
1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s`
|
||||
|
||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||
|
||||
|
@ -1797,7 +1794,10 @@ #### youtubetab (YouTube playlists, channels, feeds, etc.)
|
|||
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
||||
|
||||
#### generic
|
||||
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
|
||||
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
|
||||
* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
|
||||
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||
|
||||
#### funimation
|
||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||
|
@ -1833,7 +1833,7 @@ #### rokfinchannel
|
|||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
#### twitter
|
||||
* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
|
||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
|
@ -2164,6 +2164,10 @@ #### Not recommended
|
|||
--youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
|
||||
--youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
|
||||
--youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
|
||||
--geo-bypass --xff "default"
|
||||
--no-geo-bypass --xff "never"
|
||||
--geo-bypass-country CODE --xff CODE
|
||||
--geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK
|
||||
|
||||
|
||||
#### Developer options
|
||||
|
|
|
@ -1406,6 +1406,7 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'none',
|
||||
'acodec': 'AACL',
|
||||
'protocol': 'ism',
|
||||
'audio_channels': 2,
|
||||
'_download_params': {
|
||||
'stream_type': 'audio',
|
||||
'duration': 8880746666,
|
||||
|
@ -1419,9 +1420,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'audio_ext': 'isma',
|
||||
'video_ext': 'none',
|
||||
'abr': 128,
|
||||
}, {
|
||||
'format_id': 'video-100',
|
||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||
|
@ -1445,9 +1443,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 100,
|
||||
}, {
|
||||
'format_id': 'video-326',
|
||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||
|
@ -1471,9 +1466,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 326,
|
||||
}, {
|
||||
'format_id': 'video-698',
|
||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||
|
@ -1497,9 +1489,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 698,
|
||||
}, {
|
||||
'format_id': 'video-1493',
|
||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||
|
@ -1523,9 +1512,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 1493,
|
||||
}, {
|
||||
'format_id': 'video-4482',
|
||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||
|
@ -1549,9 +1535,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 4482,
|
||||
}],
|
||||
{
|
||||
'eng': [
|
||||
|
@ -1575,34 +1558,6 @@ def test_parse_ism_formats(self):
|
|||
'ec-3_test',
|
||||
'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
[{
|
||||
'format_id': 'audio_deu_1-224',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'isma',
|
||||
'tbr': 224,
|
||||
'asr': 48000,
|
||||
'vcodec': 'none',
|
||||
'acodec': 'EC-3',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'stream_type': 'audio',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 0,
|
||||
'height': 0,
|
||||
'fourcc': 'EC-3',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
|
||||
'sampling_rate': 48000,
|
||||
'channels': 6,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'audio_ext': 'isma',
|
||||
'video_ext': 'none',
|
||||
'abr': 224,
|
||||
}, {
|
||||
'format_id': 'audio_deu-127',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1612,8 +1567,9 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'none',
|
||||
'acodec': 'AACL',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'audio_channels': 2,
|
||||
'_download_params': {
|
||||
'stream_type': 'audio',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1627,9 +1583,32 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'audio_ext': 'isma',
|
||||
'video_ext': 'none',
|
||||
'abr': 127,
|
||||
}, {
|
||||
'format_id': 'audio_deu_1-224',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
'ext': 'isma',
|
||||
'tbr': 224,
|
||||
'asr': 48000,
|
||||
'vcodec': 'none',
|
||||
'acodec': 'EC-3',
|
||||
'protocol': 'ism',
|
||||
'language': 'deu',
|
||||
'audio_channels': 6,
|
||||
'_download_params': {
|
||||
'stream_type': 'audio',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
'width': 0,
|
||||
'height': 0,
|
||||
'fourcc': 'EC-3',
|
||||
'language': 'deu',
|
||||
'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
|
||||
'sampling_rate': 48000,
|
||||
'channels': 6,
|
||||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
}, {
|
||||
'format_id': 'video_deu-23',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1641,8 +1620,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1655,9 +1634,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 23,
|
||||
}, {
|
||||
'format_id': 'video_deu-403',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1669,8 +1645,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1683,9 +1659,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 403,
|
||||
}, {
|
||||
'format_id': 'video_deu-680',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1697,8 +1670,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1711,9 +1684,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 680,
|
||||
}, {
|
||||
'format_id': 'video_deu-1253',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1725,8 +1695,9 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'vbr': 1253,
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1739,9 +1710,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 1253,
|
||||
}, {
|
||||
'format_id': 'video_deu-2121',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1753,8 +1721,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1767,9 +1735,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 2121,
|
||||
}, {
|
||||
'format_id': 'video_deu-3275',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1781,8 +1746,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1795,9 +1760,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 3275,
|
||||
}, {
|
||||
'format_id': 'video_deu-5300',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1809,8 +1771,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1823,9 +1785,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 5300,
|
||||
}, {
|
||||
'format_id': 'video_deu-8079',
|
||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||
|
@ -1837,8 +1796,8 @@ def test_parse_ism_formats(self):
|
|||
'vcodec': 'AVC1',
|
||||
'acodec': 'none',
|
||||
'protocol': 'ism',
|
||||
'_download_params':
|
||||
{
|
||||
'language': 'deu',
|
||||
'_download_params': {
|
||||
'stream_type': 'video',
|
||||
'duration': 370000000,
|
||||
'timescale': 10000000,
|
||||
|
@ -1851,9 +1810,6 @@ def test_parse_ism_formats(self):
|
|||
'bits_per_sample': 16,
|
||||
'nal_unit_length_field': 4
|
||||
},
|
||||
'video_ext': 'ismv',
|
||||
'audio_ext': 'none',
|
||||
'vbr': 8079,
|
||||
}],
|
||||
{},
|
||||
),
|
||||
|
|
|
@ -822,6 +822,10 @@ def expect_same_infodict(out):
|
|||
test('%(title&foo|baz)s.bar', 'baz.bar')
|
||||
test('%(x,id&foo|baz)s.bar', 'foo.bar')
|
||||
test('%(x,title&foo|baz)s.bar', 'baz.bar')
|
||||
test('%(id&a\nb|)s', ('a\nb', 'a b'))
|
||||
test('%(id&hi {:>10} {}|)s', 'hi 1234 1234')
|
||||
test(R'%(id&{0} {}|)s', 'NA')
|
||||
test(R'%(id&{0.1}|)s', 'NA')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
|
|
|
@ -445,6 +445,22 @@ def test_bitwise_operators_overflow(self):
|
|||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||
|
||||
def test_negative(self):
|
||||
jsi = JSInterpreter("function f(){return 2 * -2.0;}")
|
||||
self.assertEqual(jsi.call_function('f'), -4)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 - - -2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 - - - -2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 4)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 - + + - -2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
jsi = JSInterpreter('function f(){return 2 + - + - -2;}')
|
||||
self.assertEqual(jsi.call_function('f'), 0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -1195,6 +1195,13 @@ def test_js_to_json_malformed(self):
|
|||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
||||
def test_js_to_json_template_literal(self):
|
||||
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
||||
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
||||
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
||||
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@ -2014,6 +2021,8 @@ def test_traverse_obj(self):
|
|||
msg='nested `...` queries should work')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
||||
msg='`...` query result should be flattened')
|
||||
self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
|
||||
msg='`...` should accept iterables')
|
||||
|
||||
# Test function as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||
|
@ -2021,6 +2030,8 @@ def test_traverse_obj(self):
|
|||
msg='function as query key should perform a filter based on (key, value)')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
||||
msg='exceptions in the query function should be catched')
|
||||
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||
msg='function key should accept iterables')
|
||||
if __debug__:
|
||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, lambda a: ...)
|
||||
|
@ -2045,6 +2056,17 @@ def test_traverse_obj(self):
|
|||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, {str.upper, str})
|
||||
|
||||
# Test `slice` as a key
|
||||
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||
msg='slice on a dictionary should not throw')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||
msg='slice key should apply slice to sequence')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||
msg='slice key should apply slice to sequence')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||
msg='slice key should apply slice to sequence')
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `paths` should be treated as alternative paths')
|
||||
|
@ -2228,6 +2250,12 @@ def test_traverse_obj(self):
|
|||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||
traverse_string=True), ['s', 'r'],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
|
||||
# Test is_user_input behavior
|
||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||
|
|
|
@ -142,6 +142,10 @@
|
|||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
import traceback
|
||||
import unicodedata
|
||||
import urllib.request
|
||||
from string import ascii_letters
|
||||
from string import Formatter, ascii_letters
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
|
@ -1161,7 +1161,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
|||
}
|
||||
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
|
||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||
INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
|
||||
INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
|
||||
(?P<negate>-)?
|
||||
(?P<fields>{FIELD_RE})
|
||||
(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
|
||||
|
@ -1242,6 +1242,14 @@ def _dumpjson_default(obj):
|
|||
return list(obj)
|
||||
return repr(obj)
|
||||
|
||||
class _ReplacementFormatter(Formatter):
|
||||
def get_field(self, field_name, args, kwargs):
|
||||
if field_name.isdigit():
|
||||
return args[0], -1
|
||||
raise ValueError('Unsupported field')
|
||||
|
||||
replacement_formatter = _ReplacementFormatter()
|
||||
|
||||
def create_key(outer_mobj):
|
||||
if not outer_mobj.group('has_key'):
|
||||
return outer_mobj.group(0)
|
||||
|
@ -1263,7 +1271,13 @@ def create_key(outer_mobj):
|
|||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||
fmt = f'0{field_size_compat_map[key]:d}d'
|
||||
|
||||
value = default if value is None else value if replacement is None else replacement
|
||||
if value is None:
|
||||
value = default
|
||||
elif replacement is not None:
|
||||
try:
|
||||
value = replacement_formatter.format(replacement, value)
|
||||
except ValueError:
|
||||
value = na
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
|
@ -1668,7 +1682,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
|
|||
self.add_extra_info(info_copy, extra_info)
|
||||
info_copy, _ = self.pre_process(info_copy)
|
||||
self._fill_common_fields(info_copy, False)
|
||||
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
|
||||
self.__forced_printings(info_copy)
|
||||
self._raise_pending_errors(info_copy)
|
||||
if self.params.get('force_write_download_archive', False):
|
||||
self.record_download_archive(info_copy)
|
||||
|
@ -1937,7 +1951,7 @@ def _build_format_filter(self, filter_spec):
|
|||
'!=': operator.ne,
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
|
||||
(?P<key>[\w.-]+)\s*
|
||||
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
|
||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||
|
@ -2710,7 +2724,7 @@ def is_wellformed(f):
|
|||
self.list_formats(info_dict)
|
||||
if list_only:
|
||||
# Without this printing, -F --print-json will not work
|
||||
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
|
||||
self.__forced_printings(info_dict)
|
||||
return info_dict
|
||||
|
||||
format_selector = self.format_selector
|
||||
|
@ -2870,6 +2884,12 @@ def _forceprint(self, key, info_dict):
|
|||
if info_dict is None:
|
||||
return
|
||||
info_copy = info_dict.copy()
|
||||
info_copy.setdefault('filename', self.prepare_filename(info_dict))
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
# For RTMP URLs, also include the playpath
|
||||
info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
||||
elif info_dict.get('url'):
|
||||
info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
||||
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||
|
@ -2895,46 +2915,36 @@ def format_tmpl(tmpl):
|
|||
tmpl = format_tmpl(tmpl)
|
||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||
if self._ensure_dir_exists(filename):
|
||||
with open(filename, 'a', encoding='utf-8') as f:
|
||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
|
||||
with open(filename, 'a', encoding='utf-8', newline='') as f:
|
||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
|
||||
|
||||
def __forced_printings(self, info_dict, filename, incomplete):
|
||||
def print_mandatory(field, actual_field=None):
|
||||
if actual_field is None:
|
||||
actual_field = field
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and (not incomplete or info_dict.get(actual_field) is not None)):
|
||||
self.to_stdout(info_dict[actual_field])
|
||||
|
||||
def print_optional(field):
|
||||
if (self.params.get('force%s' % field, False)
|
||||
and info_dict.get(field) is not None):
|
||||
self.to_stdout(info_dict[field])
|
||||
|
||||
info_dict = info_dict.copy()
|
||||
if filename is not None:
|
||||
info_dict['filename'] = filename
|
||||
if info_dict.get('requested_formats') is not None:
|
||||
# For RTMP URLs, also include the playpath
|
||||
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
||||
elif info_dict.get('url'):
|
||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||
return info_copy
|
||||
|
||||
def __forced_printings(self, info_dict, filename=None, incomplete=True):
|
||||
if (self.params.get('forcejson')
|
||||
or self.params['forceprint'].get('video')
|
||||
or self.params['print_to_file'].get('video')):
|
||||
self.post_extract(info_dict)
|
||||
self._forceprint('video', info_dict)
|
||||
if filename:
|
||||
info_dict['filename'] = filename
|
||||
info_copy = self._forceprint('video', info_dict)
|
||||
|
||||
print_mandatory('title')
|
||||
print_mandatory('id')
|
||||
print_mandatory('url', 'urls')
|
||||
print_optional('thumbnail')
|
||||
print_optional('description')
|
||||
print_optional('filename')
|
||||
if self.params.get('forceduration') and info_dict.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
||||
print_mandatory('format')
|
||||
def print_field(field, actual_field=None, optional=False):
|
||||
if actual_field is None:
|
||||
actual_field = field
|
||||
if self.params.get(f'force{field}') and (
|
||||
info_copy.get(field) is not None or (not optional and not incomplete)):
|
||||
self.to_stdout(info_copy[actual_field])
|
||||
|
||||
print_field('title')
|
||||
print_field('id')
|
||||
print_field('url', 'urls')
|
||||
print_field('thumbnail', optional=True)
|
||||
print_field('description', optional=True)
|
||||
print_field('filename', optional=True)
|
||||
if self.params.get('forceduration') and info_copy.get('duration') is not None:
|
||||
self.to_stdout(formatSeconds(info_copy['duration']))
|
||||
print_field('format')
|
||||
|
||||
if self.params.get('forcejson'):
|
||||
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||
|
@ -3316,7 +3326,7 @@ def ffmpeg_fixup(cndn, msg, cls):
|
|||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||
FFmpegFixupM3u8PP)
|
||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
|
||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||
|
||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||
|
@ -3482,7 +3492,7 @@ def run_pp(self, pp, infodict):
|
|||
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
|
||||
return infodict
|
||||
|
||||
def run_all_pps(self, key, info, *, additional_pps=None):
|
||||
def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
|
||||
if key != 'video':
|
||||
self._forceprint(key, info)
|
||||
for pp in (additional_pps or []) + self._pps[key]:
|
||||
|
|
|
@ -412,12 +412,17 @@ def metadataparser_actions(f):
|
|||
except Exception as err:
|
||||
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
|
||||
|
||||
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
|
||||
if geo_bypass_code is not None:
|
||||
opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
|
||||
if opts.geo_bypass.lower() not in ('default', 'never'):
|
||||
try:
|
||||
GeoUtils.random_ipv4(geo_bypass_code)
|
||||
GeoUtils.random_ipv4(opts.geo_bypass)
|
||||
except Exception:
|
||||
raise ValueError('unsupported geo-bypass country or ip-block')
|
||||
raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
|
||||
if len(opts.geo_bypass) == 2:
|
||||
opts.geo_bypass_country = opts.geo_bypass
|
||||
else:
|
||||
opts.geo_bypass_ip_block = opts.geo_bypass
|
||||
opts.geo_bypass = opts.geo_bypass.lower() != 'never'
|
||||
|
||||
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
|
||||
|
||||
|
@ -720,7 +725,8 @@ def parse_options(argv=None):
|
|||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
))
|
||||
opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint)
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
|
||||
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
|
||||
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import types
|
||||
from ..compat.compat_utils import passthrough_module
|
||||
|
||||
try:
|
||||
import Cryptodome as _parent
|
||||
|
@ -6,9 +6,11 @@
|
|||
try:
|
||||
import Crypto as _parent
|
||||
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
|
||||
_parent = types.ModuleType('no_Cryptodome')
|
||||
_parent = passthrough_module(__name__, 'no_Cryptodome')
|
||||
__bool__ = lambda: False
|
||||
|
||||
del passthrough_module
|
||||
|
||||
__version__ = ''
|
||||
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
|
||||
try:
|
||||
|
|
|
@ -254,6 +254,14 @@
|
|||
BRMediathekIE,
|
||||
)
|
||||
from .bravotv import BravoTVIE
|
||||
from .brainpop import (
|
||||
BrainPOPIE,
|
||||
BrainPOPJrIE,
|
||||
BrainPOPELLIE,
|
||||
BrainPOPEspIE,
|
||||
BrainPOPFrIE,
|
||||
BrainPOPIlIE,
|
||||
)
|
||||
from .breakcom import BreakIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
|
@ -298,7 +306,10 @@
|
|||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
)
|
||||
from .cbs import CBSIE
|
||||
from .cbs import (
|
||||
CBSIE,
|
||||
ParamountPressExpressIE,
|
||||
)
|
||||
from .cbslocal import (
|
||||
CBSLocalIE,
|
||||
CBSLocalArticleIE,
|
||||
|
@ -345,6 +356,7 @@
|
|||
)
|
||||
from .ciscowebex import CiscoWebexIE
|
||||
from .cjsw import CJSWIE
|
||||
from .clipchamp import ClipchampIE
|
||||
from .cliphunter import CliphunterIE
|
||||
from .clippit import ClippitIE
|
||||
from .cliprs import ClipRsIE
|
||||
|
@ -441,6 +453,10 @@
|
|||
)
|
||||
from .democracynow import DemocracynowIE
|
||||
from .detik import DetikEmbedIE
|
||||
from .dlf import (
|
||||
DLFIE,
|
||||
DLFCorpusIE,
|
||||
)
|
||||
from .dfb import DFBIE
|
||||
from .dhm import DHMIE
|
||||
from .digg import DiggIE
|
||||
|
@ -674,10 +690,18 @@
|
|||
from .giantbomb import GiantBombIE
|
||||
from .giga import GigaIE
|
||||
from .glide import GlideIE
|
||||
from .globalplayer import (
|
||||
GlobalPlayerLiveIE,
|
||||
GlobalPlayerLivePlaylistIE,
|
||||
GlobalPlayerAudioIE,
|
||||
GlobalPlayerAudioEpisodeIE,
|
||||
GlobalPlayerVideoIE
|
||||
)
|
||||
from .globo import (
|
||||
GloboIE,
|
||||
GloboArticleIE,
|
||||
)
|
||||
from .gmanetwork import GMANetworkVideoIE
|
||||
from .go import GoIE
|
||||
from .godtube import GodTubeIE
|
||||
from .gofile import GofileIE
|
||||
|
@ -709,13 +733,16 @@
|
|||
from .heise import HeiseIE
|
||||
from .hellporno import HellPornoIE
|
||||
from .helsinki import HelsinkiIE
|
||||
from .hentaistigma import HentaiStigmaIE
|
||||
from .hgtv import HGTVComShowIE
|
||||
from .hketv import HKETVIE
|
||||
from .hidive import HiDiveIE
|
||||
from .historicfilms import HistoricFilmsIE
|
||||
from .hitbox import HitboxIE, HitboxLiveIE
|
||||
from .hitrecord import HitRecordIE
|
||||
from .hollywoodreporter import (
|
||||
HollywoodReporterIE,
|
||||
HollywoodReporterPlaylistIE,
|
||||
)
|
||||
from .holodex import HolodexIE
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
|
@ -727,6 +754,7 @@
|
|||
)
|
||||
from .howcast import HowcastIE
|
||||
from .howstuffworks import HowStuffWorksIE
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
from .hrfensehen import HRFernsehenIE
|
||||
from .hrti import (
|
||||
HRTiIE,
|
||||
|
@ -936,10 +964,6 @@
|
|||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import (
|
||||
LineLiveIE,
|
||||
LineLiveChannelIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInIE,
|
||||
LinkedInLearningIE,
|
||||
|
@ -1219,6 +1243,8 @@
|
|||
NhkForSchoolBangumiIE,
|
||||
NhkForSchoolSubjectIE,
|
||||
NhkForSchoolProgramListIE,
|
||||
NhkRadioNewsPageIE,
|
||||
NhkRadiruIE,
|
||||
)
|
||||
from .nhl import NHLIE
|
||||
from .nick import (
|
||||
|
@ -1390,6 +1416,7 @@
|
|||
PeriscopeIE,
|
||||
PeriscopeUserIE,
|
||||
)
|
||||
from .pgatour import PGATourIE
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
|
@ -1606,6 +1633,11 @@
|
|||
from .rtp import RTPIE
|
||||
from .rtrfm import RTRFMIE
|
||||
from .rts import RTSIE
|
||||
from .rtvcplay import (
|
||||
RTVCPlayIE,
|
||||
RTVCPlayEmbedIE,
|
||||
RTVCKalturaIE,
|
||||
)
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
|
@ -1675,6 +1707,7 @@
|
|||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
from .servus import ServusIE
|
||||
|
@ -1772,6 +1805,7 @@
|
|||
BellatorIE,
|
||||
ParamountNetworkIE,
|
||||
)
|
||||
from .stageplus import StagePlusVODConcertIE
|
||||
from .startrek import StarTrekIE
|
||||
from .stitcher import (
|
||||
StitcherIE,
|
||||
|
@ -1954,6 +1988,7 @@
|
|||
from .triller import (
|
||||
TrillerIE,
|
||||
TrillerUserIE,
|
||||
TrillerShortIE,
|
||||
)
|
||||
from .trilulilu import TriluliluIE
|
||||
from .trovo import (
|
||||
|
@ -2280,6 +2315,8 @@
|
|||
WeiboMobileIE
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .wevidi import WeVidiIE
|
||||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimtv import WimTVIE
|
||||
|
@ -2334,8 +2371,6 @@
|
|||
from .yahoo import (
|
||||
YahooIE,
|
||||
YahooSearchIE,
|
||||
YahooGyaOPlayerIE,
|
||||
YahooGyaOIE,
|
||||
YahooJapanNewsIE,
|
||||
)
|
||||
from .yandexdisk import YandexDiskIE
|
||||
|
|
|
@ -436,6 +436,16 @@ def _real_extract(self, url):
|
|||
if 3 not in ondemand_types:
|
||||
# cannot acquire decryption key for these streams
|
||||
self.report_warning('This is a premium-only stream')
|
||||
info.update(traverse_obj(api_response, {
|
||||
'series': ('series', 'title'),
|
||||
'season': ('season', 'title'),
|
||||
'season_number': ('season', 'sequence'),
|
||||
'episode_number': ('episode', 'number'),
|
||||
}))
|
||||
if not title:
|
||||
title = traverse_obj(api_response, ('episode', 'title'))
|
||||
if not description:
|
||||
description = traverse_obj(api_response, ('episode', 'content'))
|
||||
|
||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||
elif video_type == 'slots':
|
||||
|
|
|
@ -1573,7 +1573,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
})
|
||||
elif mso_id == 'Spectrum':
|
||||
elif mso_id in ('Spectrum', 'Charter_Direct'):
|
||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||
# as a one-off implementation.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import ExtractorError, traverse_obj, url_or_none
|
||||
|
||||
|
||||
class AeonCoIE(InfoExtractor):
|
||||
|
@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
|
|||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
'info_dict': {
|
||||
'id': '728595228',
|
||||
'id': '759576926',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wrought',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
||||
'uploader': 'Biofilm Productions',
|
||||
'uploader_id': 'user140352216',
|
||||
'uploader_url': 'https://vimeo.com/user140352216',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
'info_dict': {
|
||||
'id': 'emyi4z-O0ls',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
|
||||
'uploader': 'TED-Ed',
|
||||
'uploader_id': '@TEDEd',
|
||||
'uploader_url': 'https://www.youtube.com/@TEDEd',
|
||||
'duration': 344,
|
||||
'upload_date': '20200827',
|
||||
'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
|
||||
'playable_in_embed': True,
|
||||
'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
|
||||
'categories': ['Education'],
|
||||
'like_count': int,
|
||||
'channel': 'TED-Ed',
|
||||
'chapters': 'count:7',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
|
||||
'tags': 'count:26',
|
||||
'availability': 'public',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'age_limit': 0,
|
||||
'live_status': 'not_live',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
||||
return self.url_result(vimeo_url, VimeoIE)
|
||||
embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
|
||||
if not embed_url:
|
||||
raise ExtractorError('No embed URL found in webpage')
|
||||
if 'player.vimeo.com' in embed_url:
|
||||
embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
|
||||
return self.url_result(embed_url)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
|
@ -133,7 +134,7 @@ def _get_all_children(self, reply):
|
|||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
|
@ -281,19 +282,60 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'video redirects to festival page',
|
||||
'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
|
||||
'info_dict': {
|
||||
'id': 'BV1wP4y1P72h',
|
||||
'ext': 'mp4',
|
||||
'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
|
||||
'timestamp': 1643947497,
|
||||
'upload_date': '20220204',
|
||||
'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
|
||||
'uploader': '叨叨冯聊音乐',
|
||||
'duration': 246.719,
|
||||
'uploader_id': '528182630',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
'note': 'newer festival video',
|
||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||
'info_dict': {
|
||||
'id': 'BV1ay4y1d77f',
|
||||
'ext': 'mp4',
|
||||
'title': '【崩坏3新春剧场】为特别的你送上祝福!',
|
||||
'timestamp': 1674273600,
|
||||
'upload_date': '20230121',
|
||||
'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
|
||||
'uploader': '果蝇轰',
|
||||
'duration': 1111.722,
|
||||
'uploader_id': '8469526',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
|
||||
video_data = initial_state['videoData']
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = traverse_obj(
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
|
@ -316,20 +358,39 @@ def _real_extract(self, url):
|
|||
|
||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note='Extracting festival video formats')['data']
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
'like_count': ('videoStatus', 'like', {int_or_none}),
|
||||
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
|
||||
}, get_all=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(initial_state, {
|
||||
'uploader': ('upData', 'name'),
|
||||
'uploader_id': ('upData', 'mid', {str_or_none}),
|
||||
'like_count': ('videoData', 'stat', 'like', {int_or_none}),
|
||||
'tags': ('tags', ..., 'tag_name'),
|
||||
'thumbnail': ('videoData', 'pic', {url_or_none}),
|
||||
}),
|
||||
**festival_info,
|
||||
**traverse_obj(video_data, {
|
||||
'description': 'desc',
|
||||
'timestamp': ('pubdate', {int_or_none}),
|
||||
'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
|
||||
'comment_count': ('stat', 'reply', {int_or_none}),
|
||||
}, get_all=False),
|
||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||
'title': title,
|
||||
'description': traverse_obj(initial_state, ('videoData', 'desc')),
|
||||
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
|
||||
'uploader': traverse_obj(initial_state, ('upData', 'name')),
|
||||
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
|
||||
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
|
||||
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
|
||||
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
|
||||
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
|
||||
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
||||
|
@ -996,6 +1057,53 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
'info_dict': {
|
||||
'id': '340317',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1604057820,
|
||||
'upload_date': '20201030',
|
||||
'episode_number': 5,
|
||||
'title': 'E5 - My Own Steel',
|
||||
'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
|
||||
'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode': 'Episode 5',
|
||||
'comment_count': int,
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 61.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
}, {
|
||||
'start_time': 61.0,
|
||||
'end_time': 134.0,
|
||||
'title': 'Intro'
|
||||
}, {
|
||||
'start_time': 1290.0,
|
||||
'end_time': 1379.0,
|
||||
'title': 'Outro'
|
||||
}],
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# user generated content comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||
'info_dict': {
|
||||
'id': '2045730385',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
}, {
|
||||
# episode id without intro and outro
|
||||
|
@ -1055,11 +1163,69 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
|||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/detail', display_id,
|
||||
note=f'Downloading reply comment of {root_id} - {next_id}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'ps': 20, # comment's reply per page (default: 3)
|
||||
'root': root_id,
|
||||
'next': next_id,
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'parent': replies.get('parent'),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||
}
|
||||
|
||||
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
yield from self._get_comments_reply(
|
||||
root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
|
||||
|
||||
def _get_comments(self, video_id, ep_id):
|
||||
for i in itertools.count(0):
|
||||
comment_api_raw_data = self._download_json(
|
||||
'https://api.bilibili.tv/reply/web/root', video_id,
|
||||
note=f'Downloading comment page {i + 1}',
|
||||
query={
|
||||
'platform': 'web',
|
||||
'pn': i, # page number
|
||||
'ps': 20, # comment per page (default: 20)
|
||||
'oid': video_id,
|
||||
'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
|
||||
'sort_type': 1, # 1: best, 2: recent
|
||||
})
|
||||
|
||||
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||
yield {
|
||||
'author': traverse_obj(replies, ('member', 'name')),
|
||||
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||
'text': traverse_obj(replies, ('content', 'message')),
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||
'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
|
||||
}
|
||||
if replies.get('count'):
|
||||
yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
|
||||
|
||||
if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||
video_id = ep_id or aid
|
||||
|
@ -1087,7 +1253,8 @@ def _real_extract(self, url):
|
|||
**self._extract_video_metadata(url, video_id, season_id),
|
||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
|
|||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
||||
for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
|
||||
'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
|
||||
'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
|
||||
'seedp29xb', 'zb10-7gsop1v78'))
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
|
|
318
yt_dlp/extractor/brainpop.py
Normal file
318
yt_dlp/extractor/brainpop.py
Normal file
|
@ -0,0 +1,318 @@
|
|||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
)
|
||||
|
||||
|
||||
class BrainPOPBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brainpop'
|
||||
_ORIGIN = '' # So that _VALID_URL doesn't crash
|
||||
_LOGIN_ERRORS = {
|
||||
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
|
||||
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
|
||||
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
|
||||
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
|
||||
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
|
||||
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
|
||||
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
|
||||
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
|
||||
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
|
||||
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
|
||||
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
|
||||
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
|
||||
}
|
||||
|
||||
@classproperty
|
||||
def _VALID_URL(cls):
|
||||
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
|
||||
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
|
||||
|
||||
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
|
||||
formats = []
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
|
||||
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
|
||||
})
|
||||
for f in formats:
|
||||
f.update(extra_fields)
|
||||
return formats
|
||||
|
||||
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
|
||||
formats = []
|
||||
additional_key_formats = {
|
||||
'%s': {},
|
||||
'ad_%s': {
|
||||
'format_note': 'Audio description',
|
||||
'source_preference': -2
|
||||
}
|
||||
}
|
||||
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||
for key_quality, key_index in enumerate(('high', 'low')):
|
||||
full_key_index = additional_key_format % (key_format % key_index)
|
||||
if data.get(full_key_index):
|
||||
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||
'quality': -1 - key_quality,
|
||||
**additional_key_fields,
|
||||
**extra_fields
|
||||
}))
|
||||
return formats
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_res = self._download_json(
|
||||
'https://api.brainpop.com/api/login', None,
|
||||
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Referer': self._ORIGIN
|
||||
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||
status_code = int_or_none(login_res['status_code'])
|
||||
if status_code != 1505:
|
||||
self.report_warning(
|
||||
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
|
||||
or f'Got status code {status_code}')
|
||||
|
||||
|
||||
class BrainPOPIE(BrainPOPBaseIE):
|
||||
_ORIGIN = 'https://www.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
|
||||
'md5': '3ead374233ae74c7f1b0029a01c972f0',
|
||||
'info_dict': {
|
||||
'id': '1f3259fa457292b4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Martin Luther King, Jr.',
|
||||
'display_id': 'martinlutherkingjr',
|
||||
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brainpop.com/science/space/bigbang/',
|
||||
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
|
||||
'info_dict': {
|
||||
'id': 'acae52cd48c99acf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Bang',
|
||||
'display_id': 'bigbang',
|
||||
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
movie_data = self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
|
||||
'Downloading movie data JSON', 'Unable to download movie data')['data']
|
||||
topic_data = traverse_obj(self._download_json(
|
||||
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
|
||||
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
|
||||
('data', 'topic'), expected_type=dict) or movie_data['topic']
|
||||
|
||||
if not traverse_obj(movie_data, ('access', 'allow')):
|
||||
reason = traverse_obj(movie_data, ('access', 'reason'))
|
||||
if 'logged' in reason:
|
||||
self.raise_login_required(reason, metadata_available=True)
|
||||
else:
|
||||
self.raise_no_formats(reason, video_id=display_id)
|
||||
movie_feature = movie_data['feature']
|
||||
movie_feature_data = movie_feature['data']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||
'language': movie_feature.get('language') or 'en',
|
||||
'language_preference': 10
|
||||
}))
|
||||
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||
'language': lang,
|
||||
'language_preference': -10
|
||||
}))
|
||||
|
||||
# TODO: Do localization fields also have subtitles?
|
||||
for name, url in movie_feature_data.items():
|
||||
lang = self._search_regex(
|
||||
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||
if lang and url:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': urljoin(self._CDN_URL, url)
|
||||
})
|
||||
|
||||
return {
|
||||
'id': topic_data['topic_id'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
|
||||
def _parse_js_topic_data(self, topic_data, display_id, token):
|
||||
movie_data = topic_data['movies']
|
||||
# TODO: Are there non-burned subtitles?
|
||||
formats = self._extract_adaptive_formats(movie_data, token, display_id)
|
||||
|
||||
return {
|
||||
'id': topic_data['EntryID'],
|
||||
'display_id': display_id,
|
||||
'title': topic_data.get('name'),
|
||||
'alt_title': topic_data.get('title'),
|
||||
'description': topic_data.get('synopsis'),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
topic_data = self._search_json(
|
||||
r'var\s+content\s*=\s*', webpage, 'content data',
|
||||
display_id, end_pattern=';')['category']['unit']['topic']
|
||||
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
|
||||
return self._parse_js_topic_data(topic_data, display_id, token)
|
||||
|
||||
|
||||
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://jr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
|
||||
_HLS_URL = 'https://hls-jr.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-jr.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
|
||||
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
|
||||
'info_dict': {
|
||||
'id': '347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emotions',
|
||||
'display_id': 'emotions',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
|
||||
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
|
||||
'info_dict': {
|
||||
'id': '29',
|
||||
'ext': 'mp4',
|
||||
'title': 'Arctic Habitats',
|
||||
'display_id': 'arctichabitats',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
|
||||
_ORIGIN = 'https://ell.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
|
||||
_HLS_URL = 'https://hls-esl.brainpop.com'
|
||||
_CDN_URL = 'https://cdn-esl.brainpop.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
|
||||
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1',
|
||||
'display_id': 'lesson1',
|
||||
'alt_title': 'Personal Pronouns',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
|
||||
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
|
||||
'info_dict': {
|
||||
'id': '101',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 5',
|
||||
'display_id': 'lesson5',
|
||||
'alt_title': 'Review: Unit 6',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Español'
|
||||
_ORIGIN = 'https://esp.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/mx'
|
||||
_TESTS = [{
|
||||
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
|
||||
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
|
||||
'info_dict': {
|
||||
'id': '3893',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ecosistemas',
|
||||
'display_id': 'ecosistemas',
|
||||
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
|
||||
'md5': '98c1b9559e0e33777209c425cda7dac4',
|
||||
'info_dict': {
|
||||
'id': '7146',
|
||||
'ext': 'mp4',
|
||||
'title': 'Emily Dickinson',
|
||||
'display_id': 'emily_dickinson',
|
||||
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Français'
|
||||
_ORIGIN = 'https://fr.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/fr'
|
||||
_TESTS = [{
|
||||
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
|
||||
'md5': '97e7f48af8af93f8a2be11709f239371',
|
||||
'info_dict': {
|
||||
'id': '1651',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sources d\'énergie',
|
||||
'display_id': 'sourcesdenergie',
|
||||
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
|
||||
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
|
||||
'info_dict': {
|
||||
'id': '5803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plagiat',
|
||||
'display_id': 'plagiat',
|
||||
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
|
||||
},
|
||||
'skip': 'Requires login',
|
||||
}]
|
||||
|
||||
|
||||
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
|
||||
IE_DESC = 'BrainPOP Hebrew'
|
||||
_ORIGIN = 'https://il.brainpop.com'
|
||||
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||
_HLS_URL = 'https://hls.brainpop.com'
|
||||
_CDN_URL = 'https://cdn.brainpop.com/he'
|
||||
_TESTS = [{
|
||||
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
|
||||
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
|
||||
'info_dict': {
|
||||
'id': '3782',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e993632fcda0545d9205602ec314ad67',
|
||||
'display_id': 'subjects_3782',
|
||||
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
|
||||
},
|
||||
}]
|
|
@ -1,117 +1,185 @@
|
|||
import re
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
int_or_none,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
try_get,
|
||||
dict_get,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||
'info_dict': {
|
||||
'id': 'epL0pmK1kQlT',
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.0,
|
||||
}
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._parse_json(self._search_regex(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||
display_id)
|
||||
info = {}
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
account_pid, release_pid = [None] * 2
|
||||
tve = settings.get('ls_tve')
|
||||
|
||||
if tve:
|
||||
query['manifest'] = 'm3u'
|
||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
||||
if mobj:
|
||||
account_pid, tp_path = mobj.groups()
|
||||
release_pid = tp_path.strip('/').split('/')[-1]
|
||||
else:
|
||||
account_pid = 'HNK2IC'
|
||||
tp_path = release_pid = tve['release_pid']
|
||||
if tve.get('entitlement') == 'auth':
|
||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||
if site == 'bravotv':
|
||||
site = 'bravo'
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId') or site,
|
||||
tve['title'], release_pid, tve.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid,
|
||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
else:
|
||||
shared_playlist = settings['ls_playlist']
|
||||
account_pid = shared_playlist['account_pid']
|
||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
||||
tp_path = release_pid = metadata.get('release_pid')
|
||||
if not release_pid:
|
||||
release_pid = metadata['guid']
|
||||
tp_path = 'media/guid/2140479951/' + release_pid
|
||||
info.update({
|
||||
'title': metadata['title'],
|
||||
'description': metadata.get('description'),
|
||||
'season_number': int_or_none(metadata.get('season_num')),
|
||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
||||
})
|
||||
query['switch'] = 'progressive'
|
||||
|
||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}),
|
||||
display_id, fatal=False)
|
||||
if tp_metadata:
|
||||
info.update({
|
||||
'title': tp_metadata.get('title'),
|
||||
'description': tp_metadata.get('description'),
|
||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
||||
'season_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
||||
'episode_number': int_or_none(
|
||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
||||
# For some reason the series is sometimes wrapped into a single element array.
|
||||
'series': try_get(
|
||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
||||
lambda x: x[0] if isinstance(x, list) else x,
|
||||
expected_type=str),
|
||||
'episode': dict_get(
|
||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
||||
})
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'id': release_pid,
|
||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {seconds_or_none}),
|
||||
'end_time': ('endTime', {seconds_or_none}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query), video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {seconds_or_none}),
|
||||
'timestamp': ('pubDate', {seconds_or_none}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
}))
|
||||
}
|
||||
|
|
|
@ -575,6 +575,7 @@ def build_format_id(kind):
|
|||
self.raise_no_formats(
|
||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||
|
||||
headers.pop('Authorization', None) # or else http formats will give error 400
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
|
@ -895,8 +896,9 @@ def extract_policy_key():
|
|||
store_pk(policy_key)
|
||||
return policy_key
|
||||
|
||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
||||
headers = {}
|
||||
token = smuggled_data.get('token')
|
||||
api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
|
||||
headers = {'Authorization': f'Bearer {token}'} if token else {}
|
||||
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
|
||||
if referrer:
|
||||
headers.update({
|
||||
|
|
|
@ -8,14 +8,16 @@
|
|||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
@ -404,7 +406,7 @@ def _real_extract(self, url):
|
|||
|
||||
class CBCGemPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca:playlist'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
# TV show playlist, all public videos
|
||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
|
||||
|
@ -414,6 +416,9 @@ class CBCGemPlaylistIE(InfoExtractor):
|
|||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
||||
|
||||
|
@ -473,49 +478,90 @@ def _real_extract(self, url):
|
|||
|
||||
class CBCGemLiveIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca:live'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://gem.cbc.ca/live/920604739687',
|
||||
'info_dict': {
|
||||
'title': 'Ottawa',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||
'is_live': True,
|
||||
'id': 'AyqZwxRqh8EH',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492106160,
|
||||
'upload_date': '20170413',
|
||||
'uploader': 'CBCC-NEW',
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live/920604739687',
|
||||
'info_dict': {
|
||||
'title': 'Ottawa',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||
'is_live': True,
|
||||
'id': 'AyqZwxRqh8EH',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492106160,
|
||||
'upload_date': '20170413',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
'skip': 'Live might have ended',
|
||||
}
|
||||
|
||||
# It's unclear where the chars at the end come from, but they appear to be
|
||||
# constant. Might need updating in the future.
|
||||
# There are two URLs, some livestreams are in one, and some
|
||||
# in the other. The JSON schema is the same for both.
|
||||
_API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT']
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live/44',
|
||||
'info_dict': {
|
||||
'id': '44',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
'title': r're:^Ottawa [0-9\-: ]+',
|
||||
'description': 'The live TV channel and local programming from Ottawa',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
},
|
||||
{
|
||||
'url': 'https://gem.cbc.ca/live-event/10835',
|
||||
'info_dict': {
|
||||
'id': '10835',
|
||||
'ext': 'mp4',
|
||||
'is_live': True,
|
||||
'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
|
||||
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||
'timestamp': 1679706000,
|
||||
'upload_date': '20230325',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Live might have ended',
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||
|
||||
for api_url in self._API_URLS:
|
||||
video_info = next((
|
||||
stream for stream in self._download_json(api_url, video_id)['entries']
|
||||
if stream.get('guid') == video_id), None)
|
||||
if video_info:
|
||||
break
|
||||
else:
|
||||
# Two types of metadata JSON
|
||||
if not video_info.get('formattedIdMedia'):
|
||||
video_info = traverse_obj(
|
||||
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||
get_all=False, default={})
|
||||
|
||||
video_stream_id = video_info.get('formattedIdMedia')
|
||||
if not video_stream_id:
|
||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||
|
||||
stream_data = self._download_json(
|
||||
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||
'appCode': 'mpx',
|
||||
'connectionType': 'hd',
|
||||
'deviceType': 'ipad',
|
||||
'idMedia': video_stream_id,
|
||||
'multibitrate': 'true',
|
||||
'output': 'json',
|
||||
'tech': 'hls',
|
||||
'manifestType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': video_info['content'][0]['url'],
|
||||
'id': video_id,
|
||||
'title': video_info.get('title'),
|
||||
'description': video_info.get('description'),
|
||||
'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
|
||||
'thumbnail': video_info.get('cbc$staticImage'),
|
||||
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||
'is_live': True,
|
||||
**traverse_obj(video_info, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('images', 'card', 'url'),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformFeedIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
|
@ -162,3 +168,110 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
|||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
||||
})
|
||||
|
||||
|
||||
class ParamountPressExpressIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
|
||||
'md5': '56631dbcadaab980d1fc47cb7b76cba4',
|
||||
'info_dict': {
|
||||
'id': '6322981580112',
|
||||
'ext': 'mp4',
|
||||
'title': 'I’m Felicia',
|
||||
'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
|
||||
'uploader_id': '6055873637001',
|
||||
'upload_date': '20230320',
|
||||
'timestamp': 1679334960,
|
||||
'duration': 49.557,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
|
||||
'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
|
||||
'info_dict': {
|
||||
'id': '6323036027112',
|
||||
'ext': 'mp4',
|
||||
'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
|
||||
'description': 'md5:b929867a357aac5544b783d834c78383',
|
||||
'uploader_id': '6055873637001',
|
||||
'upload_date': '20230321',
|
||||
'timestamp': 1679430180,
|
||||
'duration': 132.032,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
|
||||
'info_dict': {
|
||||
'id': 'OX9wJWOcqck',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
|
||||
'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
|
||||
'uploader': 'Paramount Plus',
|
||||
'uploader_id': '@paramountplus',
|
||||
'uploader_url': 'http://www.youtube.com/@paramountplus',
|
||||
'channel': 'Paramount Plus',
|
||||
'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
|
||||
'upload_date': '20230316',
|
||||
'duration': 88,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': ['Rugrats'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
|
||||
'info_dict': {
|
||||
'id': '_ljssSoDLkw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
|
||||
'description': 'md5:39581bcc3fd810209b642609f448af70',
|
||||
'uploader': 'SHOWTIME',
|
||||
'uploader_id': '@Showtime',
|
||||
'uploader_url': 'http://www.youtube.com/@Showtime',
|
||||
'channel': 'SHOWTIME',
|
||||
'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||
'upload_date': '20230209',
|
||||
'duration': 49,
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': 'count:27',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
|
||||
if is_youtube:
|
||||
return self.url_result(display_id, YoutubeIE)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
|
||||
token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
|
||||
|
||||
player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
|
||||
account_id = player.get('data-account') or '6055873637001'
|
||||
player_id = player.get('data-player') or 'OtLKgXlO9F'
|
||||
embed = player.get('data-embed') or 'default'
|
||||
|
||||
return self.url_result(smuggle_url(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
{'token': token}), BrightcoveNewIE)
|
||||
|
|
61
yt_dlp/extractor/clipchamp.py
Normal file
61
yt_dlp/extractor/clipchamp.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ClipchampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||
'info_dict': {
|
||||
'id': 'gRXZ4ZhdDaU',
|
||||
'ext': 'mp4',
|
||||
'title': 'Untitled video',
|
||||
'uploader': 'Alexander Schwartz',
|
||||
'timestamp': 1680805580,
|
||||
'upload_date': '20230406',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||
|
||||
storage_location = data.get('storage_location')
|
||||
if storage_location != 'cf_stream':
|
||||
raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
|
||||
|
||||
path = data['download_url']
|
||||
iframe = self._download_webpage(
|
||||
f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
|
||||
subdomain = self._search_regex(
|
||||
r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
|
||||
'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||
|
||||
formats = self._extract_mpd_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
|
||||
**traverse_obj(data, {
|
||||
'title': ('project', 'project_name', {str}),
|
||||
'timestamp': ('created_at', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
}),
|
||||
}
|
|
@ -2998,6 +2998,8 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
|||
'protocol': 'ism',
|
||||
'fragments': fragments,
|
||||
'has_drm': ism_doc.find('Protection') is not None,
|
||||
'language': stream_language,
|
||||
'audio_channels': int_or_none(track.get('Channels')),
|
||||
'_download_params': {
|
||||
'stream_type': stream_type,
|
||||
'duration': duration,
|
||||
|
@ -3528,8 +3530,8 @@ def _RETURN_TYPE(cls):
|
|||
@classmethod
|
||||
def is_single_video(cls, url):
|
||||
"""Returns whether the URL is of a single video, None if unknown"""
|
||||
assert cls.suitable(url), 'The URL must be suitable for the extractor'
|
||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||
if cls.suitable(url):
|
||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||
|
||||
@classmethod
|
||||
def is_suitable(cls, age_limit):
|
||||
|
@ -3671,18 +3673,22 @@ def _extract_chapters_helper(self, chapter_list, start_function, title_function,
|
|||
'start_time': start_function(chapter),
|
||||
'title': title_function(chapter),
|
||||
} for chapter in chapter_list or []]
|
||||
if not strict:
|
||||
if strict:
|
||||
warn = self.report_warning
|
||||
else:
|
||||
warn = self.write_debug
|
||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||
|
||||
chapters = [{'start_time': 0}]
|
||||
for idx, chapter in enumerate(chapter_list):
|
||||
if chapter['start_time'] is None:
|
||||
self.report_warning(f'Incomplete chapter {idx}')
|
||||
warn(f'Incomplete chapter {idx}')
|
||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||
chapters.append(chapter)
|
||||
elif chapter not in chapters:
|
||||
self.report_warning(
|
||||
f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
|
||||
issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
|
||||
else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
|
||||
warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
|
||||
return chapters[1:]
|
||||
|
||||
def _extract_chapters_from_description(self, description, duration):
|
||||
|
|
192
yt_dlp/extractor/dlf.py
Normal file
192
yt_dlp/extractor/dlf.py
Normal file
|
@ -0,0 +1,192 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DLFBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||
|
||||
def _parse_button_attrs(self, button, audio_id=None):
|
||||
attrs = extract_attributes(button)
|
||||
audio_id = audio_id or attrs['data-audio-diraid']
|
||||
|
||||
url = traverse_obj(
|
||||
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||
'data-audio-src', expected_type=url_or_none)
|
||||
ext = determine_ext(url)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'extractor_key': DLFIE.ie_key(),
|
||||
'extractor': DLFIE.IE_NAME,
|
||||
**traverse_obj(attrs, {
|
||||
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
|
||||
'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
|
||||
'thumbnail': ('data-audioimage', {url_or_none}),
|
||||
'uploader': 'data-audio-producer',
|
||||
'series': 'data-audio-series',
|
||||
'channel': 'data-audio-origin-site-name',
|
||||
'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
|
||||
}, get_all=False),
|
||||
'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||
}
|
||||
|
||||
|
||||
class DLFIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||
_TESTS = [
|
||||
# Audio as an HLS stream
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||
'info_dict': {
|
||||
'id': '03a3eb19',
|
||||
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||
'ext': 'm4a',
|
||||
'duration': 3298,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'On Stage',
|
||||
'channel': 'deutschlandfunk'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8'
|
||||
},
|
||||
'skip': 'This webpage no longer exists'
|
||||
}, {
|
||||
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||
'info_dict': {
|
||||
'id': 'd9cc1856',
|
||||
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||
'ext': 'mp3',
|
||||
'duration': 291,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Kommentare und Themen der Woche',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
|
||||
return self._parse_button_attrs(
|
||||
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||
|
||||
|
||||
class DLFCorpusIE(DLFBaseIE):
|
||||
IE_NAME = 'dlf:corpus'
|
||||
IE_DESC = 'DLF Multi-feed Archives'
|
||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||
_TESTS = [
|
||||
# Recorded news broadcast with referrals to related broadcasts
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||
'info_dict': {
|
||||
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '1fc5d64a',
|
||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||
'ext': 'mp3',
|
||||
'duration': 252,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '2ada145f',
|
||||
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||
'ext': 'mp3',
|
||||
'duration': 336,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Deutschlandfunk Nova',
|
||||
'channel': 'deutschlandfunk-nova'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '47e1a096',
|
||||
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||
'ext': 'mp3',
|
||||
'duration': 602,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '5e55e8c9',
|
||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||
'ext': 'mp3',
|
||||
'duration': 187,
|
||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||
'uploader': 'Deutschlandfunk',
|
||||
'series': 'Sport am Samstag',
|
||||
'channel': 'deutschlandfunk'
|
||||
}
|
||||
}]
|
||||
},
|
||||
# Podcast feed with tag buttons, playlist count fluctuates
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||
'info_dict': {
|
||||
'id': 'kommentare-und-themen-der-woche-100',
|
||||
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
# Podcast feed with no description
|
||||
{
|
||||
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||
'info_dict': {
|
||||
'id': 'podcast-tolle-idee-100',
|
||||
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
'title': self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, default=None),
|
||||
'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||
}
|
|
@ -12,7 +12,6 @@
|
|||
mimetype2ext,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
|
@ -25,7 +24,7 @@ class DRTVIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||
)
|
||||
(?P<id>[\da-z_-]+)
|
||||
|
@ -80,7 +79,7 @@ class DRTVIE(InfoExtractor):
|
|||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||
'timestamp': 1546628400,
|
||||
'upload_date': '20190104',
|
||||
'duration': 3504.618,
|
||||
'duration': 3504.619,
|
||||
'formats': 'mincount:20',
|
||||
'release_year': 2017,
|
||||
'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
|
||||
|
@ -101,14 +100,16 @@ class DRTVIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Bonderøven 2019 (1:8)',
|
||||
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
|
||||
'timestamp': 1603188600,
|
||||
'upload_date': '20201020',
|
||||
'timestamp': 1654856100,
|
||||
'upload_date': '20220610',
|
||||
'duration': 2576.6,
|
||||
'season': 'Bonderøven 2019',
|
||||
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
|
||||
'release_year': 2019,
|
||||
'season_number': 2019,
|
||||
'series': 'Frank & Kastaniegaarden'
|
||||
'series': 'Frank & Kastaniegaarden',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -140,10 +141,26 @@ class DRTVIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'this video has been removed',
|
||||
}, {
|
||||
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '14802310112',
|
||||
'timestamp': 1678786200,
|
||||
'duration': 120.043,
|
||||
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
|
||||
'series': 'P4 København regionale nyheder',
|
||||
'upload_date': '20230314',
|
||||
'release_year': 0,
|
||||
'description': 'Hør seneste regionale nyheder fra P4 København.',
|
||||
'season': 'Regionale nyheder',
|
||||
'title': 'Regionale nyheder',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
raw_video_id = self._match_id(url)
|
||||
raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
|
||||
|
||||
webpage = self._download_webpage(url, raw_video_id)
|
||||
|
||||
|
@ -170,15 +187,17 @@ def _real_extract(self, url):
|
|||
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||
else:
|
||||
programcard_url = _PROGRAMCARD_BASE
|
||||
page = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
|
||||
'data'), '1')['cache']['page']
|
||||
page = page[list(page.keys())[0]]
|
||||
item = try_get(
|
||||
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
|
||||
dict)
|
||||
video_id = item['customId'].split(':')[-1]
|
||||
if is_radio_url:
|
||||
video_id = self._search_nextjs_data(
|
||||
webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
|
||||
else:
|
||||
json_data = self._search_json(
|
||||
r'window\.__data\s*=', webpage, 'data', raw_video_id)
|
||||
video_id = traverse_obj(json_data, (
|
||||
'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
|
||||
{lambda x: x.split(':')[-1]}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
query['productionnumber'] = video_id
|
||||
|
||||
data = self._download_json(
|
||||
|
@ -269,10 +288,11 @@ def decrypt_uri(e):
|
|||
f['vcodec'] = 'none'
|
||||
formats.extend(f4m_formats)
|
||||
elif target == 'HLS':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
quality=preference, m3u8_id=format_id,
|
||||
fatal=False))
|
||||
quality=preference, m3u8_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
bitrate = link.get('Bitrate')
|
||||
if bitrate:
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
ExtractorError,
|
||||
UnsupportedError,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
extract_basic_auth,
|
||||
format_field,
|
||||
|
@ -32,6 +33,7 @@
|
|||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
|
@ -866,7 +868,7 @@ class GenericIE(InfoExtractor):
|
|||
},
|
||||
},
|
||||
{
|
||||
# Video.js embed, multiple formats
|
||||
# Youtube embed, formerly: Video.js embed, multiple formats
|
||||
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||
'info_dict': {
|
||||
'id': 'yygqldloqIk',
|
||||
|
@ -893,6 +895,7 @@ class GenericIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
},
|
||||
# rtl.nl embed
|
||||
{
|
||||
|
@ -2168,6 +2171,33 @@ class GenericIE(InfoExtractor):
|
|||
'age_limit': 18,
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Live HLS direct link',
|
||||
'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8',
|
||||
'info_dict': {
|
||||
'id': 'index',
|
||||
'title': r're:index',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Video.js VOD HLS',
|
||||
'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html',
|
||||
'info_dict': {
|
||||
'id': 'videojs_hls_test',
|
||||
'title': 'video',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'duration': 1800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def report_following_redirect(self, new_url):
|
||||
|
@ -2184,12 +2214,41 @@ def report_detected(self, name, num=1, note=None):
|
|||
|
||||
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
||||
|
||||
def _fragment_query(self, url):
|
||||
if self._configuration_arg('fragment_query'):
|
||||
query_string = urllib.parse.urlparse(url).query
|
||||
if query_string:
|
||||
return {'extra_param_to_segment_url': query_string}
|
||||
return {}
|
||||
def _extra_manifest_info(self, info, manifest_url):
|
||||
fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
|
||||
if fragment_query is not None:
|
||||
info['extra_param_to_segment_url'] = (
|
||||
urllib.parse.urlparse(fragment_query).query or fragment_query
|
||||
or urllib.parse.urlparse(manifest_url).query or None)
|
||||
|
||||
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
|
||||
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||
}) or None
|
||||
|
||||
variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0]
|
||||
if variant_query is not None:
|
||||
query = urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(variant_query).query or variant_query
|
||||
or urllib.parse.urlparse(manifest_url).query)
|
||||
for fmt in self._downloader._get_formats(info):
|
||||
fmt['url'] = update_url_query(fmt['url'], query)
|
||||
|
||||
# Attempt to detect live HLS or set VOD duration
|
||||
m3u8_format = next((f for f in self._downloader._get_formats(info)
|
||||
if determine_protocol(f) == 'm3u8_native'), None)
|
||||
if m3u8_format:
|
||||
is_live = self._configuration_arg('is_live', [None])[0]
|
||||
if is_live is not None:
|
||||
info['live_status'] = 'not_live' if is_live == 'false' else 'is_live'
|
||||
return
|
||||
headers = m3u8_format.get('http_headers') or info.get('http_headers')
|
||||
duration = self._extract_m3u8_vod_duration(
|
||||
m3u8_format['url'], info.get('id'), note='Checking m3u8 live status',
|
||||
errnote='Failed to download m3u8 media playlist', headers=headers)
|
||||
if not duration:
|
||||
info['live_status'] = 'is_live'
|
||||
info['duration'] = info.get('duration') or duration
|
||||
|
||||
def _extract_rss(self, url, video_id, doc):
|
||||
NS_MAP = {
|
||||
|
@ -2397,10 +2456,8 @@ def _real_extract(self, url):
|
|||
subtitles = {}
|
||||
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||
info_dict.update(self._fragment_query(url))
|
||||
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||
info_dict.update(self._fragment_query(url))
|
||||
elif format_id == 'f4m' or ext == 'f4m':
|
||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||
else:
|
||||
|
@ -2415,6 +2472,7 @@ def _real_extract(self, url):
|
|||
'subtitles': subtitles,
|
||||
'http_headers': headers or None,
|
||||
})
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
return info_dict
|
||||
|
||||
if not self.get_param('test', False) and not is_intentional:
|
||||
|
@ -2427,7 +2485,7 @@ def _real_extract(self, url):
|
|||
if first_bytes.startswith(b'#EXTM3U'):
|
||||
self.report_detected('M3U playlist')
|
||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||
info_dict.update(self._fragment_query(url))
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
return info_dict
|
||||
|
||||
# Maybe it's a direct link to a video?
|
||||
|
@ -2478,7 +2536,7 @@ def _real_extract(self, url):
|
|||
doc,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
info_dict.update(self._fragment_query(url))
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
self.report_detected('DASH manifest')
|
||||
return info_dict
|
||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||
|
@ -2567,8 +2625,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
|||
varname = mobj.group(1)
|
||||
sources = variadic(self._parse_json(
|
||||
mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
|
||||
formats = []
|
||||
subtitles = {}
|
||||
formats, subtitles, src = [], {}, None
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, str):
|
||||
|
@ -2591,8 +2648,6 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
|||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for fmt in formats:
|
||||
fmt.update(self._fragment_query(src))
|
||||
|
||||
if not formats:
|
||||
formats.append({
|
||||
|
@ -2608,11 +2663,11 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
|||
for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
|
||||
sub = self._parse_json(
|
||||
sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
src = str_or_none(sub.get('src'))
|
||||
if not src:
|
||||
sub_src = str_or_none(sub.get('src'))
|
||||
if not sub_src:
|
||||
continue
|
||||
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
|
||||
'url': urllib.parse.urljoin(url, src),
|
||||
'url': urllib.parse.urljoin(url, sub_src),
|
||||
'name': sub.get('label'),
|
||||
'http_headers': {
|
||||
'Referer': actual_url,
|
||||
|
@ -2620,7 +2675,10 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
|||
})
|
||||
if formats or subtitles:
|
||||
self.report_detected('video.js embed')
|
||||
return [{'formats': formats, 'subtitles': subtitles}]
|
||||
info_dict = {'formats': formats, 'subtitles': subtitles}
|
||||
if formats:
|
||||
self._extra_manifest_info(info_dict, src)
|
||||
return [info_dict]
|
||||
|
||||
# Look for generic KVS player (before json-ld bc of some urls that break otherwise)
|
||||
found = self._search_regex((
|
||||
|
@ -2795,10 +2853,10 @@ def filter_video(urls):
|
|||
return [self._extract_xspf_playlist(video_url, video_id)]
|
||||
elif ext == 'm3u8':
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||
entry_info_dict.update(self._fragment_query(video_url))
|
||||
self._extra_manifest_info(entry_info_dict, video_url)
|
||||
elif ext == 'mpd':
|
||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
||||
entry_info_dict.update(self._fragment_query(video_url))
|
||||
self._extra_manifest_info(entry_info_dict, video_url)
|
||||
elif ext == 'f4m':
|
||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
|
||||
class GeniusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P<article>a))/(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
|
||||
'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
|
||||
|
@ -41,19 +41,37 @@ class GeniusIE(InfoExtractor):
|
|||
'timestamp': 1631209167,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens',
|
||||
'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7',
|
||||
'info_dict': {
|
||||
'id': '6321509903112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”',
|
||||
'description': 'md5:1255f0e1161d07342ce56a8464ac339d',
|
||||
'tags': ['song id: 5457554'],
|
||||
'uploader_id': '4863540648001',
|
||||
'duration': 361.813,
|
||||
'upload_date': '20230301',
|
||||
'timestamp': 1677703908,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
display_id, is_article = self._match_valid_url(url).group('id', 'article')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
metadata = self._search_json(
|
||||
r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML)
|
||||
video_id = traverse_obj(
|
||||
metadata, ('video', 'provider_id'),
|
||||
('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False)
|
||||
r'<meta content="', webpage, 'metadata', display_id,
|
||||
end_pattern=r'"\s+itemprop="page_data"', transform_source=unescapeHTML)
|
||||
video_id = traverse_obj(metadata, (
|
||||
(('article', 'media', ...), ('video', None)),
|
||||
('provider_id', ('dfp_kv', lambda _, v: v['name'] == 'brightcove_video_id', 'values', ...))),
|
||||
get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Brightcove video id not found in webpage')
|
||||
# Not all article pages have videos, expect the error
|
||||
raise ExtractorError('Brightcove video ID not found in webpage', expected=bool(is_article))
|
||||
|
||||
config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
|
||||
account_id = config.get('brightcove_account_id', '4863540648001')
|
||||
|
@ -68,7 +86,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class GeniusLyricsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?'
|
||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics(?:[?/#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://genius.com/Lil-baby-heyy-lyrics',
|
||||
'playlist_mincount': 2,
|
||||
|
|
254
yt_dlp/extractor/globalplayer.py
Executable file
254
yt_dlp/extractor/globalplayer.py
Executable file
|
@ -0,0 +1,254 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
|
||||
|
||||
class GlobalPlayerBaseIE(InfoExtractor):
|
||||
def _get_page_props(self, url, video_id):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
|
||||
def _request_ext(self, url, video_id):
|
||||
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
||||
url, video_id, note='Determining source extension'))
|
||||
|
||||
def _extract_audio(self, episode, series):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(series, {
|
||||
'series': 'title',
|
||||
'series_id': 'id',
|
||||
'thumbnail': 'imageUrl',
|
||||
'uploader': 'itunesAuthor', # podcasts only
|
||||
}),
|
||||
**traverse_obj(episode, {
|
||||
'id': 'id',
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'thumbnail': 'imageUrl',
|
||||
'url': 'streamUrl',
|
||||
'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
|
||||
'title': 'title',
|
||||
}, get_all=False)
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
||||
'info_dict': {
|
||||
'id': '2mx1E',
|
||||
'ext': 'aac',
|
||||
'display_id': 'smoothchill-uk',
|
||||
'title': 're:^Smooth Chill.+$',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
||||
'description': 'Music To Chill To',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
# national station
|
||||
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
||||
'info_dict': {
|
||||
'id': '2mwx4',
|
||||
'ext': 'aac',
|
||||
'description': 'turn up the feel good!',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||
'live_status': 'is_live',
|
||||
'title': 're:^Heart UK.+$',
|
||||
'display_id': 'heart-uk',
|
||||
},
|
||||
}, {
|
||||
# regional variation
|
||||
'url': 'https://www.globalplayer.com/live/heart/london/',
|
||||
'info_dict': {
|
||||
'id': 'AMqg',
|
||||
'ext': 'aac',
|
||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||
'title': 're:^Heart London.+$',
|
||||
'live_status': 'is_live',
|
||||
'display_id': 'heart-london',
|
||||
'description': 'turn up the feel good!',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
station = self._get_page_props(url, video_id)['station']
|
||||
stream_url = station['streamUrl']
|
||||
|
||||
return {
|
||||
'id': station['id'],
|
||||
'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
|
||||
'url': stream_url,
|
||||
'ext': self._request_ext(stream_url, video_id),
|
||||
'vcodec': 'none',
|
||||
'is_live': True,
|
||||
**traverse_obj(station, {
|
||||
'title': (('name', 'brandName'), {str_or_none}),
|
||||
'description': 'tagline',
|
||||
'thumbnail': 'brandLogo',
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# "live playlist"
|
||||
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
||||
'info_dict': {
|
||||
'id': '8bLk',
|
||||
'ext': 'aac',
|
||||
'live_status': 'is_live',
|
||||
'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
|
||||
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
||||
'title': 're:^Classic FM Hall of Fame.+$'
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
station = self._get_page_props(url, video_id)['playlistData']
|
||||
stream_url = station['streamUrl']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': stream_url,
|
||||
'ext': self._request_ext(stream_url, video_id),
|
||||
'vcodec': 'none',
|
||||
'is_live': True,
|
||||
**traverse_obj(station, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': 'image',
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
# podcast
|
||||
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': '42KuaM',
|
||||
'title': 'Filthy Ritual',
|
||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||
'categories': ['Society & Culture', 'True Crime'],
|
||||
'uploader': 'Global',
|
||||
'description': 'md5:da5b918eac9ae319454a10a563afacf9',
|
||||
},
|
||||
}, {
|
||||
# radio catchup
|
||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': '46vyD7z',
|
||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||
'title': 'Nick Ferrari',
|
||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
props = self._get_page_props(url, video_id)
|
||||
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
||||
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
||||
'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
|
||||
**traverse_obj(series, {
|
||||
'description': 'description',
|
||||
'thumbnail': 'imageUrl',
|
||||
'title': 'title',
|
||||
'uploader': 'itunesAuthor', # podcasts only
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
# podcast
|
||||
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
||||
'info_dict': {
|
||||
'id': '7DrfNnE',
|
||||
'ext': 'mp3',
|
||||
'title': 'Filthy Ritual - Trailer',
|
||||
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||
'duration': 225.0,
|
||||
'timestamp': 1681254900,
|
||||
'series': 'Filthy Ritual',
|
||||
'series_id': '42KuaM',
|
||||
'upload_date': '20230411',
|
||||
'uploader': 'Global',
|
||||
},
|
||||
}, {
|
||||
# radio catchup
|
||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
||||
'info_dict': {
|
||||
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1682056800,
|
||||
'series': 'Nick Ferrari',
|
||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||
'upload_date': '20230421',
|
||||
'series_id': '46vyD7z',
|
||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||
'title': 'Nick Ferrari',
|
||||
'duration': 10800.0,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||
props = self._get_page_props(url, video_id)
|
||||
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
||||
|
||||
return self._extract_audio(
|
||||
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
||||
|
||||
|
||||
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
||||
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
||||
'info_dict': {
|
||||
'id': '2JsSZ7Gm2uP',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
||||
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
||||
'upload_date': '20230420',
|
||||
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._get_page_props(url, video_id)['videoData']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(meta, {
|
||||
'url': 'url',
|
||||
'thumbnail': ('image', 'url'),
|
||||
'title': 'title',
|
||||
'upload_date': ('publish_date', {unified_strdate}),
|
||||
'description': 'description',
|
||||
}),
|
||||
}
|
83
yt_dlp/extractor/gmanetwork.py
Normal file
83
yt_dlp/extractor/gmanetwork.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
from .common import InfoExtractor
|
||||
from .dailymotion import DailymotionIE
|
||||
from .youtube import YoutubeIE
|
||||
|
||||
|
||||
class GMANetworkVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P<id>\d+)/(?P<display_id>[\w-]+)/video'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home',
|
||||
'info_dict': {
|
||||
'id': '28BqW0AXPe0',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220919',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'uploader': 'YoüLOL',
|
||||
'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'duration': 5313,
|
||||
'comment_count': int,
|
||||
'tags': 'count:22',
|
||||
'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg',
|
||||
'release_timestamp': 1663594212,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'description': 'md5:811bdcea74f9c48051824e494756e926',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'YoüLOL',
|
||||
'availability': 'public',
|
||||
'release_date': '20220919',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home',
|
||||
'info_dict': {
|
||||
'id': 'yiDOExw2aSA',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'not_live',
|
||||
'channel': 'GMANetwork',
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:6d00cd658394fa1a5071200d3ed4be05',
|
||||
'duration': 1419,
|
||||
'age_limit': 0,
|
||||
'comment_count': int,
|
||||
'upload_date': '20181003',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'playable_in_embed': True,
|
||||
'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||
'title': 'More Than Words: Full Episode 80 (Finale)',
|
||||
'uploader_id': 'GMANETWORK',
|
||||
'categories': ['Entertainment'],
|
||||
'uploader': 'GMANetwork',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||
'tags': 'count:29',
|
||||
'view_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/user/GMANETWORK',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
# webpage route
|
||||
youtube_id = self._search_regex(
|
||||
r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P<yt_id>[\w-]+)', webpage, 'youtube_id', fatal=False)
|
||||
if youtube_id:
|
||||
return self.url_result(youtube_id, YoutubeIE, youtube_id)
|
||||
|
||||
# api call route
|
||||
# more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11
|
||||
network_url = self._search_regex(
|
||||
r'NETWORK_URL\s*=\s*[\'"](?P<url>[^\'"]+)', webpage, 'network_url')
|
||||
json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id)
|
||||
if json_data.get('video_file'):
|
||||
return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file'])
|
||||
else:
|
||||
return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file'])
|
|
@ -3,6 +3,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
)
|
||||
|
@ -19,7 +20,9 @@ class GronkhIE(InfoExtractor):
|
|||
'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
|
||||
'upload_date': '20221111'
|
||||
'upload_date': '20221111',
|
||||
'chapters': 'count:3',
|
||||
'duration': 31463,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
|
@ -30,7 +33,8 @@ class GronkhIE(InfoExtractor):
|
|||
'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
|
||||
'upload_date': '20211001'
|
||||
'upload_date': '20211001',
|
||||
'duration': 32058,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
|
@ -56,6 +60,12 @@ def _real_extract(self, url):
|
|||
'upload_date': unified_strdate(data_json.get('created_at')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(data_json.get('source_length')),
|
||||
'chapters': traverse_obj(data_json, (
|
||||
'chapters', lambda _, v: float_or_none(v['offset']) is not None, {
|
||||
'title': 'title',
|
||||
'start_time': ('offset', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,37 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HentaiStigmaIE(InfoExtractor):
|
||||
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
|
||||
_TEST = {
|
||||
'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
|
||||
'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
|
||||
'info_dict': {
|
||||
'id': 'inyouchuu-etsu-bonus',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inyouchuu Etsu Bonus',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
||||
webpage, 'title')
|
||||
wrap_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
||||
|
||||
video_url = self._html_search_regex(
|
||||
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'age_limit': 18,
|
||||
}
|
72
yt_dlp/extractor/hollywoodreporter.py
Normal file
72
yt_dlp/extractor/hollywoodreporter.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
)
|
||||
|
||||
|
||||
class HollywoodReporterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
|
||||
'info_dict': {
|
||||
'id': 'zH4jZaR5',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a9a1c073770a32f178955997712c4bd9',
|
||||
'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
|
||||
'upload_date': '20230312',
|
||||
'timestamp': 1678586423,
|
||||
'duration': 242.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
|
||||
video_id = data['data-video-showcase-trigger']
|
||||
showcase_type = data['data-video-showcase-type']
|
||||
|
||||
if showcase_type == 'jwplayer':
|
||||
return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
|
||||
elif showcase_type == 'youtube':
|
||||
return self.url_result(video_id, 'Youtube')
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
|
||||
|
||||
|
||||
class HollywoodReporterPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
|
||||
'playlist_mincount': 109,
|
||||
'info_dict': {
|
||||
'id': '57822',
|
||||
'title': 'heat-vision-breakdown',
|
||||
}
|
||||
}]
|
||||
|
||||
def _fetch_page(self, slug, pl_id, page):
|
||||
page += 1
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
|
||||
pl_id, note=f'Downloading playlist page {page}')
|
||||
section = get_element_by_class('video-playlist-river', webpage) or ''
|
||||
|
||||
for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
|
||||
yield self.url_result(url, HollywoodReporterIE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, pl_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)
|
15
yt_dlp/extractor/hrefli.py
Normal file
15
yt_dlp/extractor/hrefli.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class HrefLiRedirectIE(InfoExtractor):
|
||||
IE_NAME = 'href.li'
|
||||
IE_DESC = False # Do not list
|
||||
_VALID_URL = r'https?://href\.li/\?(?P<url>.+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://href.li/?https://www.reddit.com/r/cats/comments/12bluel/my_cat_helps_me_with_water/?utm_source=share&utm_medium=android_app&utm_name=androidcss&utm_term=1&utm_content=share_button',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(self._match_valid_url(url).group('url'))
|
|
@ -1,239 +1,199 @@
|
|||
import itertools
|
||||
import re
|
||||
import functools
|
||||
import urllib.parse
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
strip_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
qualities,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class IwaraBaseIE(InfoExtractor):
|
||||
_BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)'
|
||||
|
||||
def _extract_playlist(self, base_url, webpage):
|
||||
for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage):
|
||||
yield self.url_result(urljoin(base_url, path))
|
||||
|
||||
|
||||
class IwaraIE(IwaraBaseIE):
|
||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)'
|
||||
class IwaraIE(InfoExtractor):
|
||||
IE_NAME = 'iwara'
|
||||
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
||||
# md5 is unstable
|
||||
# this video cannot be played because of migration
|
||||
'only_matching': True,
|
||||
'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
|
||||
'info_dict': {
|
||||
'id': 'amVwUl1EHpAD9RD',
|
||||
'id': 'k2ayoueezfkx6gvq',
|
||||
'ext': 'mp4',
|
||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
||||
'age_limit': 18,
|
||||
'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png',
|
||||
'uploader': 'Reimu丨Action',
|
||||
'upload_date': '20150828',
|
||||
'description': 'md5:1d4905ce48c66c9299c617f08e106e0f',
|
||||
'title': 'Defeat of Irybelda - アイリベルダの敗北',
|
||||
'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
|
||||
'uploader': 'Inwerwm',
|
||||
'uploader_id': 'inwerwm',
|
||||
'tags': 'count:1',
|
||||
'like_count': 6133,
|
||||
'view_count': 1050343,
|
||||
'comment_count': 1,
|
||||
'timestamp': 1677843869,
|
||||
'modified_timestamp': 1679056362,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
|
||||
'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
|
||||
'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
|
||||
'md5': '20691ce1473ec2766c0788e14c60ce66',
|
||||
'info_dict': {
|
||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
||||
'ext': 'mp4',
|
||||
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'add_ie': ['GoogleDrive'],
|
||||
}, {
|
||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
||||
# md5 is unstable
|
||||
'info_dict': {
|
||||
'id': '6liAP9s2Ojc',
|
||||
'id': '1ywe1sbkqwumpdxz5',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 18,
|
||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
||||
'upload_date': '20160910',
|
||||
'uploader': 'aMMDsork',
|
||||
'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
|
||||
'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿',
|
||||
'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
|
||||
'uploader': '龙也zZZ',
|
||||
'uploader_id': 'user792540',
|
||||
'tags': [
|
||||
'uncategorized'
|
||||
],
|
||||
'like_count': 1809,
|
||||
'view_count': 25156,
|
||||
'comment_count': 1,
|
||||
'timestamp': 1678732213,
|
||||
'modified_timestamp': 1679110271,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}]
|
||||
|
||||
def _extract_formats(self, video_id, fileurl):
|
||||
up = urllib.parse.urlparse(fileurl)
|
||||
q = urllib.parse.parse_qs(up.query)
|
||||
paths = up.path.rstrip('/').split('/')
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
|
||||
x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
|
||||
|
||||
preference = qualities(['preview', '360', '540', 'Source'])
|
||||
|
||||
files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
|
||||
for fmt in files:
|
||||
yield traverse_obj(fmt, {
|
||||
'format_id': 'name',
|
||||
'url': ('src', ('view', 'download'), {self._proto_relative_url}),
|
||||
'ext': ('type', {mimetype2ext}),
|
||||
'quality': ('name', {preference}),
|
||||
'height': ('name', {int_or_none}),
|
||||
}, get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
|
||||
errmsg = video_data.get('message')
|
||||
# at this point we can actually get uploaded user info, but do we need it?
|
||||
if errmsg == 'errors.privateVideo':
|
||||
self.raise_login_required('Private video. Login if you have permissions to watch')
|
||||
elif errmsg:
|
||||
raise ExtractorError(f'Iwara says: {errmsg}')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
|
||||
hostname = urllib.parse.urlparse(urlh.geturl()).hostname
|
||||
# ecchi is 'sexy' in Japanese
|
||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
||||
|
||||
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
||||
|
||||
if not video_data:
|
||||
iframe_url = self._html_search_regex(
|
||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
||||
webpage, 'iframe URL', group='url')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': iframe_url,
|
||||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
title = remove_end(self._html_extract_title(webpage), ' | Iwara')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'class="username">([^<]+)', webpage, 'uploader', fatal=False)
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False))
|
||||
|
||||
description = strip_or_none(self._search_regex(
|
||||
r'<p>(.+?(?=</div))', webpage, 'description', fatal=False,
|
||||
flags=re.DOTALL))
|
||||
|
||||
formats = []
|
||||
for a_format in video_data:
|
||||
format_uri = url_or_none(a_format.get('uri'))
|
||||
if not format_uri:
|
||||
continue
|
||||
format_id = a_format.get('resolution')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'(\d+)p', format_id, 'height', default=None))
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(format_uri, 'https:'),
|
||||
'format_id': format_id,
|
||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
||||
'height': height,
|
||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
||||
'quality': 1 if format_id == 'Source' else 0,
|
||||
})
|
||||
if not video_data.get('fileUrl'):
|
||||
if video_data.get('embedUrl'):
|
||||
return self.url_result(video_data.get('embedUrl'))
|
||||
raise ExtractorError('This video is unplayable', expected=True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'thumbnail': self._proto_relative_url(thumbnail, 'https:'),
|
||||
'uploader': uploader,
|
||||
'upload_date': upload_date,
|
||||
'description': description,
|
||||
'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
|
||||
**traverse_obj(video_data, {
|
||||
'title': 'title',
|
||||
'description': 'body',
|
||||
'uploader': ('user', 'name'),
|
||||
'uploader_id': ('user', 'username'),
|
||||
'tags': ('tags', ..., 'id'),
|
||||
'like_count': 'numLikes',
|
||||
'view_count': 'numViews',
|
||||
'comment_count': 'numComments',
|
||||
'timestamp': ('createdAt', {unified_timestamp}),
|
||||
'modified_timestamp': ('updatedAt', {unified_timestamp}),
|
||||
'thumbnail': ('file', 'id', {str}, {
|
||||
lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
|
||||
}),
|
||||
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
|
||||
}
|
||||
|
||||
|
||||
class IwaraPlaylistIE(IwaraBaseIE):
|
||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'iwara:playlist'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://ecchi.iwara.tv/playlist/best-enf',
|
||||
'info_dict': {
|
||||
'title': 'Best enf',
|
||||
'uploader': 'Jared98112',
|
||||
'id': 'best-enf',
|
||||
},
|
||||
'playlist_mincount': 1097,
|
||||
}, {
|
||||
# urlencoded
|
||||
'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2',
|
||||
'info_dict': {
|
||||
'id': 'プレイリスト-2',
|
||||
'title': 'プレイリスト',
|
||||
'uploader': 'mainyu',
|
||||
},
|
||||
'playlist_mincount': 91,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
|
||||
playlist_id = urllib.parse.unquote(playlist_id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': playlist_id,
|
||||
'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False),
|
||||
'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False),
|
||||
'entries': self._extract_playlist(base_url, webpage),
|
||||
}
|
||||
|
||||
|
||||
class IwaraUserIE(IwaraBaseIE):
|
||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)'
|
||||
class IwaraUserIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'iwara:user'
|
||||
_PER_PAGE = 32
|
||||
|
||||
_TESTS = [{
|
||||
'note': 'number of all videos page is just 1 page. less than 40 videos',
|
||||
'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
|
||||
'url': 'https://iwara.tv/profile/user792540/videos',
|
||||
'info_dict': {
|
||||
'title': 'Uploaded videos from Infinity_YukariP',
|
||||
'id': 'infinityyukarip',
|
||||
'uploader': 'Infinity_YukariP',
|
||||
'uploader_id': 'infinityyukarip',
|
||||
'id': 'user792540',
|
||||
},
|
||||
'playlist_mincount': 39,
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'note': 'no even all videos page. probably less than 10 videos',
|
||||
'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
|
||||
'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
|
||||
'info_dict': {
|
||||
'title': 'Uploaded videos from mmd quintet',
|
||||
'id': 'mmd-quintet',
|
||||
'uploader': 'mmd quintet',
|
||||
'uploader_id': 'mmd-quintet',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'note': 'has paging. more than 40 videos',
|
||||
'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
|
||||
'info_dict': {
|
||||
'title': 'Uploaded videos from TheBlackbirdCalls',
|
||||
'id': 'theblackbirdcalls',
|
||||
'uploader': 'TheBlackbirdCalls',
|
||||
'uploader_id': 'theblackbirdcalls',
|
||||
},
|
||||
'playlist_mincount': 420,
|
||||
'playlist_mincount': 723,
|
||||
}, {
|
||||
'note': 'foreign chars in URL. there must be foreign characters in URL',
|
||||
'url': 'https://ecchi.iwara.tv/users/ぶた丼',
|
||||
'info_dict': {
|
||||
'title': 'Uploaded videos from ぶた丼',
|
||||
'id': 'ぶた丼',
|
||||
'uploader': 'ぶた丼',
|
||||
'uploader_id': 'ぶた丼',
|
||||
},
|
||||
'playlist_mincount': 170,
|
||||
'url': 'https://iwara.tv/profile/user792540',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://iwara.tv/profile/theblackbirdcalls',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, playlist_id, base_url):
|
||||
webpage = self._download_webpage(
|
||||
f'{base_url}/users/{playlist_id}', playlist_id)
|
||||
videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
|
||||
if not videos_url:
|
||||
yield from self._extract_playlist(base_url, webpage)
|
||||
return
|
||||
|
||||
videos_url = urljoin(base_url, videos_url)
|
||||
|
||||
for n in itertools.count(1):
|
||||
page = self._download_webpage(
|
||||
videos_url, playlist_id, note=f'Downloading playlist page {n}',
|
||||
query={'page': str(n - 1)} if n > 1 else {})
|
||||
yield from self._extract_playlist(
|
||||
base_url, page)
|
||||
|
||||
if f'page={n}' not in page:
|
||||
break
|
||||
def _entries(self, playlist_id, user_id, page):
|
||||
videos = self._download_json(
|
||||
'https://api.iwara.tv/videos', playlist_id,
|
||||
note=f'Downloading page {page}',
|
||||
query={
|
||||
'page': page,
|
||||
'sort': 'date',
|
||||
'user': user_id,
|
||||
'limit': self._PER_PAGE,
|
||||
})
|
||||
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
|
||||
playlist_id = urllib.parse.unquote(playlist_id)
|
||||
playlist_id = self._match_id(url)
|
||||
user_info = self._download_json(
|
||||
f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
|
||||
note='Requesting user info')
|
||||
user_id = traverse_obj(user_info, ('user', 'id'))
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_id, base_url), playlist_id)
|
||||
OnDemandPagedList(
|
||||
functools.partial(self._entries, playlist_id, user_id),
|
||||
self._PER_PAGE),
|
||||
playlist_id, traverse_obj(user_info, ('user', 'name')))
|
||||
|
||||
|
||||
class IwaraPlaylistIE(InfoExtractor):
|
||||
# the ID is an UUID but I don't think it's necessary to write concrete regex
|
||||
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
|
||||
IE_NAME = 'iwara:playlist'
|
||||
_PER_PAGE = 32
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
|
||||
'info_dict': {
|
||||
'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}]
|
||||
|
||||
def _entries(self, playlist_id, first_page, page):
|
||||
videos = self._download_json(
|
||||
'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
|
||||
query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
|
||||
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
page_0 = self._download_json(
|
||||
f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
|
||||
note='Requesting playlist info')
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(
|
||||
functools.partial(self._entries, playlist_id, page_0),
|
||||
self._PER_PAGE),
|
||||
playlist_id, traverse_obj(page_0, ('title', 'name')))
|
||||
|
|
|
@ -8,14 +8,16 @@ class JWPlatformIE(InfoExtractor):
|
|||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||
_TESTS = [{
|
||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
||||
'md5': '3aa16e4f6860e6e78b7df5829519aed3',
|
||||
'info_dict': {
|
||||
'id': 'nPripu9l',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny Trailer',
|
||||
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
|
||||
'upload_date': '20081127',
|
||||
'timestamp': 1227796140,
|
||||
'duration': 32.0,
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
||||
|
@ -37,18 +39,31 @@ class JWPlatformIE(InfoExtractor):
|
|||
},
|
||||
}, {
|
||||
# Player url not surrounded by quotes
|
||||
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
|
||||
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
|
||||
'info_dict': {
|
||||
'id': 'R10NQdhY',
|
||||
'title': 'Playgirl',
|
||||
'id': 'jUxh5uin',
|
||||
'title': 'Klassenfahrt',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20220624',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
|
||||
'timestamp': 1656064800,
|
||||
'description': 'BRD 1966, Will Tremper',
|
||||
'duration': 5146.0,
|
||||
'upload_date': '20230109',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720',
|
||||
'timestamp': 1673270298,
|
||||
'description': '',
|
||||
'duration': 5193.0,
|
||||
},
|
||||
'params': {'allowed_extractors': ['generic', 'jwplatform']},
|
||||
}, {
|
||||
# iframe src attribute includes backslash before URL string
|
||||
'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
|
||||
'info_dict': {
|
||||
'id': 'QD3gsexj',
|
||||
'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230127',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720',
|
||||
'timestamp': 1674862986,
|
||||
'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
|
||||
'duration': 263.0,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -57,7 +72,7 @@ def _extract_embed_urls(cls, url, webpage):
|
|||
# <input value=URL> is used by hyland.com
|
||||
# if we find <iframe>, dont look for <input>
|
||||
ret = re.findall(
|
||||
r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
||||
r'<%s[^>]+?%s=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
||||
webpage)
|
||||
if ret:
|
||||
return ret
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
class KickBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session')
|
||||
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
|
||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||
if not xsrf_token:
|
||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||
|
|
|
@ -1,33 +1,24 @@
|
|||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, format_field
|
||||
from ..utils import int_or_none, parse_qs, traverse_obj
|
||||
|
||||
|
||||
class LastFMPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, url, playlist_id):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
start_page_number = int_or_none(self._search_regex(
|
||||
r'\bpage=(\d+)', url, 'page', default=None)) or 1
|
||||
last_page_number = int_or_none(self._search_regex(
|
||||
r'>(\d+)</a>[^<]*</li>[^<]*<li[^>]+class="pagination-next', webpage, 'last_page', default=None))
|
||||
|
||||
for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
|
||||
single_page = traverse_obj(parse_qs(url), ('page', -1, {int_or_none}))
|
||||
for page in itertools.count(single_page or 1):
|
||||
webpage = self._download_webpage(
|
||||
url, playlist_id,
|
||||
note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
|
||||
query={'page': page_number})
|
||||
page_entries = [
|
||||
self.url_result(player_url, 'Youtube')
|
||||
for player_url in set(re.findall(r'data-youtube-url="([^"]+)"', webpage))
|
||||
]
|
||||
|
||||
for e in page_entries:
|
||||
yield e
|
||||
url, playlist_id, f'Downloading page {page}', query={'page': page})
|
||||
videos = re.findall(r'data-youtube-url="([^"]+)"', webpage)
|
||||
yield from videos
|
||||
if single_page or not videos:
|
||||
return
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(url, playlist_id), playlist_id)
|
||||
return self.playlist_from_matches(self._entries(url, playlist_id), playlist_id, ie='Youtube')
|
||||
|
||||
|
||||
class LastFMPlaylistIE(LastFMPlaylistBaseIE):
|
||||
|
@ -37,7 +28,7 @@ class LastFMPlaylistIE(LastFMPlaylistBaseIE):
|
|||
'info_dict': {
|
||||
'id': 'Oasis',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'https://www.last.fm/music/Oasis',
|
||||
'only_matching': True,
|
||||
|
@ -73,6 +64,18 @@ class LastFMUserIE(LastFMPlaylistBaseIE):
|
|||
'id': '12319471',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}, {
|
||||
'url': 'https://www.last.fm/user/naamloos1/playlists/12543760',
|
||||
'info_dict': {
|
||||
'id': '12543760',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://www.last.fm/user/naamloos1/playlists/12543760?page=3',
|
||||
'info_dict': {
|
||||
'id': '12543760',
|
||||
},
|
||||
'playlist_count': 32,
|
||||
}]
|
||||
|
||||
|
||||
|
|
|
@ -1,143 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class LineLiveBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||
|
||||
def _parse_broadcast_item(self, item):
|
||||
broadcast_id = compat_str(item['id'])
|
||||
title = item['title']
|
||||
is_live = item.get('isBroadcastingNow')
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'id': thumbnail_id,
|
||||
'url': thumbnail_url,
|
||||
})
|
||||
|
||||
channel = item.get('channel') or {}
|
||||
channel_id = str_or_none(channel.get('id'))
|
||||
|
||||
return {
|
||||
'id': broadcast_id,
|
||||
'title': title,
|
||||
'thumbnails': thumbnails,
|
||||
'timestamp': int_or_none(item.get('createdAt')),
|
||||
'channel': channel.get('name'),
|
||||
'channel_id': channel_id,
|
||||
'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
|
||||
'duration': int_or_none(item.get('archiveDuration')),
|
||||
'view_count': int_or_none(item.get('viewerCount')),
|
||||
'comment_count': int_or_none(item.get('chatCount')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class LineLiveIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.line.me/channels/5833718/broadcast/18373277',
|
||||
'md5': '2c15843b8cb3acd55009ddcb2db91f7c',
|
||||
'info_dict': {
|
||||
'id': '18373277',
|
||||
'title': '2021/12/05 (15分犬)定例譲渡会🐶',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1638674925,
|
||||
'upload_date': '20211205',
|
||||
'thumbnail': 'md5:e1f5817e60f4a72b7e43377cf308d7ef',
|
||||
'channel_url': 'https://live.line.me/channels/5833718',
|
||||
'channel': 'Yahooニュース掲載🗞プロフ見てね🐕🐕',
|
||||
'channel_id': '5833718',
|
||||
'duration': 937,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'is_live': False,
|
||||
}
|
||||
}, {
|
||||
# archiveStatus == 'DELETED'
|
||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, broadcast_id = self._match_valid_url(url).groups()
|
||||
broadcast = self._download_json(
|
||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||
broadcast_id)
|
||||
item = broadcast['item']
|
||||
info = self._parse_broadcast_item(item)
|
||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||
formats = []
|
||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||
if not v:
|
||||
continue
|
||||
if k == 'abr':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v, broadcast_id, 'mp4', protocol,
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
f = {
|
||||
'ext': 'mp4',
|
||||
'format_id': 'hls-' + k,
|
||||
'protocol': protocol,
|
||||
'url': v,
|
||||
}
|
||||
if not k.isdigit():
|
||||
f['vcodec'] = 'none'
|
||||
formats.append(f)
|
||||
if not formats:
|
||||
archive_status = item.get('archiveStatus')
|
||||
if archive_status != 'ARCHIVED':
|
||||
self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
|
||||
info['formats'] = formats
|
||||
return info
|
||||
|
||||
|
||||
class LineLiveChannelIE(LineLiveBaseIE):
|
||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||
_TEST = {
|
||||
'url': 'https://live.line.me/channels/5893542',
|
||||
'info_dict': {
|
||||
'id': '5893542',
|
||||
'title': 'いくらちゃんだよぉ🦒',
|
||||
'description': 'md5:4d418087973ad081ceb1b3481f0b1816',
|
||||
},
|
||||
'playlist_mincount': 29
|
||||
}
|
||||
|
||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||
while True:
|
||||
for row in (archived_broadcasts.get('rows') or []):
|
||||
share_url = str_or_none(row.get('shareURL'))
|
||||
if not share_url:
|
||||
continue
|
||||
info = self._parse_broadcast_item(row)
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': share_url,
|
||||
'ie_key': LineLiveIE.ie_key(),
|
||||
})
|
||||
yield info
|
||||
if not archived_broadcasts.get('hasNextPage'):
|
||||
return
|
||||
archived_broadcasts = self._download_json(
|
||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||
channel_id, query={
|
||||
'lastId': info['id'],
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||
return self.playlist_result(
|
||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||
channel_id, channel.get('title'), channel.get('information'))
|
|
@ -8,12 +8,12 @@
|
|||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
traverse_obj
|
||||
)
|
||||
|
||||
|
||||
class MedalTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
|
@ -80,25 +80,14 @@ class MedalTVIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
path = self._match_valid_url(url).group('path')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
next_data = self._search_json(
|
||||
'<script[^>]*__NEXT_DATA__[^>]*>', webpage,
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||
|
||||
build_id = next_data.get('buildId')
|
||||
if not build_id:
|
||||
raise ExtractorError(
|
||||
'Could not find build ID.', video_id=video_id)
|
||||
|
||||
locale = next_data.get('locale', 'en')
|
||||
|
||||
api_response = self._download_json(
|
||||
f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
|
||||
|
||||
clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
|
||||
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||
if not clip:
|
||||
raise ExtractorError(
|
||||
'Could not find video information.', video_id=video_id)
|
||||
|
@ -152,7 +141,7 @@ def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
|||
|
||||
# Necessary because the id of the author is not known in advance.
|
||||
# Won't raise an issue if no profile can be found as this is optional.
|
||||
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
|
||||
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||
author_id = str_or_none(author.get('userId'))
|
||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||
|
||||
|
|
|
@ -2,16 +2,44 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class MediaStreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
|
||||
class MediaStreamBaseIE(InfoExtractor):
|
||||
_EMBED_BASE_URL = 'https://mdstrm.com/embed'
|
||||
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||
|
||||
def _extract_mediastream_urls(self, webpage):
|
||||
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
||||
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||
|
||||
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
||||
yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
|
||||
|
||||
yield from re.findall(
|
||||
rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)
|
||||
<(?:div|ps-mediastream)[^>]+
|
||||
(class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
|
||||
data-video-id="(?P<video_id>\w+)"
|
||||
(?:\s*data-video-type="(?P<video_type>[^"]+))?
|
||||
(?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
|
||||
https://mdstrm\.com/(?P<live>live-stream))?
|
||||
''', webpage):
|
||||
|
||||
video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
|
||||
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
||||
|
||||
|
||||
class MediaStreamIE(MediaStreamBaseIE):
|
||||
_VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
||||
|
@ -23,6 +51,7 @@ class MediaStreamIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
|
@ -35,9 +64,7 @@ class MediaStreamIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream'
|
||||
},
|
||||
'params': {'skip_download': 'Livestream'},
|
||||
}, {
|
||||
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
||||
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
||||
|
@ -48,6 +75,7 @@ class MediaStreamIE(InfoExtractor):
|
|||
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
||||
'info_dict': {
|
||||
|
@ -57,6 +85,7 @@ class MediaStreamIE(InfoExtractor):
|
|||
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
||||
'info_dict': {
|
||||
|
@ -66,26 +95,12 @@ class MediaStreamIE(InfoExtractor):
|
|||
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
||||
yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
|
||||
|
||||
yield from re.findall(
|
||||
r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)
|
||||
<(?:div|ps-mediastream)[^>]+
|
||||
class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
|
||||
data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
|
||||
(?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
|
||||
''', webpage):
|
||||
|
||||
video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
|
||||
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
for embed_url in self._extract_mediastream_urls(webpage):
|
||||
yield self.url_result(embed_url, MediaStreamIE, None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -94,7 +109,7 @@ def _real_extract(self, url):
|
|||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video_format in player_config['src']:
|
||||
|
@ -122,7 +137,7 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
|
||||
class WinSportsVideoIE(InfoExtractor):
|
||||
class WinSportsVideoIE(MediaStreamBaseIE):
|
||||
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
|
@ -158,21 +173,36 @@ class WinSportsVideoIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||
'info_dict': {
|
||||
'id': '6402adb62bbf3b18d454e1b0',
|
||||
'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||
'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
|
||||
'description': 'Gol anulado Bucaramanga',
|
||||
'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
|
||||
media_setting_json = self._search_json(
|
||||
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
|
||||
data = self._search_json(
|
||||
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
|
||||
|
||||
mediastream_id = traverse_obj(
|
||||
media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
|
||||
get_all=False) or json_ld.get('url')
|
||||
if not mediastream_id:
|
||||
mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
|
||||
traverse_obj(data, (
|
||||
(('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
|
||||
or next(self._extract_mediastream_urls(webpage), None)))
|
||||
|
||||
if not mediastream_url:
|
||||
self.raise_no_formats('No MediaStream embed found in webpage')
|
||||
|
||||
title = clean_html(remove_end(
|
||||
self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
|
||||
or self._og_search_title(webpage), '| Win Sports'))
|
||||
|
||||
return self.url_result(
|
||||
urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
|
||||
display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))
|
||||
mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)
|
||||
|
|
|
@ -12,9 +12,13 @@
|
|||
RegexNotFoundError,
|
||||
UserNotLive,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
|
@ -22,7 +26,6 @@
|
|||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
xpath_attr,
|
||||
)
|
||||
|
||||
|
||||
|
@ -660,6 +663,7 @@ class NBCStationsIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
|
||||
'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
|
||||
'duration': 112.513,
|
||||
'timestamp': 1661135892,
|
||||
'upload_date': '20220822',
|
||||
'uploader': 'NBC 4',
|
||||
|
@ -676,6 +680,7 @@ class NBCStationsIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Huracán complica que televidente de Tucson reciba reembolso',
|
||||
'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
|
||||
'duration': 172.406,
|
||||
'timestamp': 1660886507,
|
||||
'upload_date': '20220819',
|
||||
'uploader': 'Telemundo Arizona',
|
||||
|
@ -685,6 +690,22 @@ class NBCStationsIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# direct mp4 link
|
||||
'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
|
||||
'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
|
||||
'info_dict': {
|
||||
'id': '2961135',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highs Near Freezing in Boston on Wednesday',
|
||||
'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
|
||||
'duration': 235.669,
|
||||
'timestamp': 1675268656,
|
||||
'upload_date': '20230201',
|
||||
'uploader': '',
|
||||
'channel_id': 'WBTS',
|
||||
'channel': 'nbcboston',
|
||||
},
|
||||
}]
|
||||
|
||||
_RESOLUTIONS = {
|
||||
|
@ -711,7 +732,7 @@ def _real_extract(self, url):
|
|||
if not video_data:
|
||||
raise ExtractorError('No video metadata found in webpage', expected=True)
|
||||
|
||||
info, formats, subtitles = {}, [], {}
|
||||
info, formats = {}, []
|
||||
is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
|
||||
query = {
|
||||
'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
|
||||
|
@ -747,13 +768,14 @@ def _real_extract(self, url):
|
|||
|
||||
video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'ext': ext,
|
||||
'width': int_or_none(self._RESOLUTIONS.get(height)),
|
||||
'height': int_or_none(height),
|
||||
'format_id': 'http-mp4',
|
||||
'format_id': f'http-{ext}',
|
||||
})
|
||||
|
||||
info.update({
|
||||
|
@ -770,14 +792,25 @@ def _real_extract(self, url):
|
|||
smil = self._download_xml(
|
||||
f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
|
||||
note='Downloading SMIL data', query=query, fatal=is_live)
|
||||
if smil:
|
||||
manifest_url = xpath_attr(smil, f'.//{{{default_ns}}}video', 'src', fatal=is_live)
|
||||
subtitles = self._parse_smil_subtitles(smil, default_ns)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
|
||||
live=is_live, errnote='No HLS formats found')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
subtitles = self._parse_smil_subtitles(smil, default_ns) if smil else {}
|
||||
for video in smil.findall(self._xpath_ns('.//video', default_ns)) if smil else []:
|
||||
info['duration'] = float_or_none(remove_end(video.get('dur'), 'ms'), 1000)
|
||||
video_src_url = video.get('src')
|
||||
ext = mimetype2ext(video.get('type'), default=determine_ext(video_src_url))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_src_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
|
||||
live=is_live, errnote='No HLS formats found')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif video_src_url:
|
||||
formats.append({
|
||||
'url': video_src_url,
|
||||
'format_id': f'https-{ext}',
|
||||
'ext': ext,
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
})
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats('No video content found in webpage', expected=True)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, parse_iso8601
|
||||
|
||||
_BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||
|
||||
|
||||
class NebulaBaseIE(InfoExtractor):
|
||||
|
@ -183,6 +183,10 @@ class NebulaIE(NebulaBaseIE):
|
|||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _fetch_video_metadata(self, slug):
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urljoin
|
||||
urljoin,
|
||||
url_or_none
|
||||
)
|
||||
|
||||
|
||||
|
@ -334,3 +335,140 @@ def _real_extract(self, url):
|
|||
for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []]
|
||||
|
||||
return self.playlist_result(bangumis, program_id, title, description)
|
||||
|
||||
|
||||
class NhkRadiruIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
||||
'skip': 'Episode expired on 2023-04-16',
|
||||
'info_dict': {
|
||||
'channel': 'NHK-FM',
|
||||
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
||||
'ext': 'm4a',
|
||||
'id': '0449_01_3853544',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'timestamp': 1680969600,
|
||||
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
||||
'upload_date': '20230408',
|
||||
'release_timestamp': 1680962400,
|
||||
'release_date': '20230408',
|
||||
'was_live': True,
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
|
||||
'info_dict': {
|
||||
'id': '0458_01',
|
||||
'title': 'ベストオブクラシック',
|
||||
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||
'channel': 'NHK-FM',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# one with letters in the id
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
|
||||
'note': 'Expires on 2024-03-31',
|
||||
'info_dict': {
|
||||
'id': 'F300_06_3738470',
|
||||
'ext': 'm4a',
|
||||
'title': '有島武郎「一房のぶどう」',
|
||||
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
|
||||
'channel': 'NHKラジオ第1、NHK-FM',
|
||||
'timestamp': 1635757200,
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
|
||||
'release_date': '20161207',
|
||||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||
'release_timestamp': 1481126700,
|
||||
'upload_date': '20211101',
|
||||
}
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||
'skip': 'Expires on 2023-04-17',
|
||||
'info_dict': {
|
||||
'id': 'F261_01_3855109',
|
||||
'ext': 'm4a',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'timestamp': 1681635900,
|
||||
'release_date': '20230416',
|
||||
'series': 'NHKラジオニュース',
|
||||
'title': '午後6時のNHKニュース',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'upload_date': '20230416',
|
||||
'release_timestamp': 1681635600,
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
'id': episode_id,
|
||||
'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False),
|
||||
'container': 'm4a_dash', # force fixup, AAC-only HLS
|
||||
'was_live': True,
|
||||
'series': series_meta.get('title'),
|
||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||
**traverse_obj(episode, {
|
||||
'title': 'file_title',
|
||||
'description': 'file_title_sub',
|
||||
'timestamp': ('open_time', {unified_timestamp}),
|
||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
||||
if site_id == 'F261':
|
||||
json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
|
||||
else:
|
||||
json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
|
||||
|
||||
meta = self._download_json(json_url, programme_id)['main']
|
||||
|
||||
series_meta = traverse_obj(meta, {
|
||||
'title': 'program_name',
|
||||
'channel': 'media_name',
|
||||
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
|
||||
}, get_all=False)
|
||||
|
||||
if headline_id:
|
||||
return self._extract_episode_info(
|
||||
traverse_obj(meta, (
|
||||
'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
|
||||
programme_id, series_meta)
|
||||
|
||||
def entries():
|
||||
for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
|
||||
yield self._extract_episode_info(headline, programme_id, series_meta)
|
||||
|
||||
return self.playlist_result(
|
||||
entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
|
||||
|
||||
|
||||
class NhkRadioNewsPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
# airs daily, on-the-hour most hours
|
||||
'url': 'https://www.nhk.or.jp/radionews/',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'F261_01',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'title': 'NHKラジオニュース',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
||||
|
|
|
@ -477,23 +477,32 @@ def _get_subtitles(self, video_id, api_data, session_api_data):
|
|||
user_id_str = session_api_data.get('serviceUserId')
|
||||
|
||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||
raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
|
||||
if not raw_danmaku:
|
||||
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
|
||||
|
||||
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
|
||||
new_danmaku = self._extract_new_comments(
|
||||
new_comments.get('server'), video_id,
|
||||
new_comments.get('params'), new_comments.get('threadKey'))
|
||||
|
||||
if not legacy_danmaku and not new_danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(raw_danmaku),
|
||||
'data': json.dumps(legacy_danmaku + new_danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
def _extract_all_comments(self, video_id, threads, user_id, user_key):
|
||||
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
|
||||
auth_data = {
|
||||
'user_id': user_id,
|
||||
'userkey': user_key,
|
||||
} if user_id and user_key else {'user_id': ''}
|
||||
|
||||
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
|
||||
|
||||
# Request Start
|
||||
post_data = [{'ping': {'content': 'rs:0'}}]
|
||||
for i, thread in enumerate(threads):
|
||||
|
@ -532,17 +541,32 @@ def _extract_all_comments(self, video_id, threads, user_id, user_key):
|
|||
# Request Final
|
||||
post_data.append({'ping': {'content': 'rf:0'}})
|
||||
|
||||
for api_url in self._COMMENT_API_ENDPOINTS:
|
||||
comments = self._download_json(
|
||||
api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
},
|
||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||
if comments:
|
||||
return comments
|
||||
return self._download_json(
|
||||
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
},
|
||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||
|
||||
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
||||
comments = self._download_json(
|
||||
f'{endpoint}/v1/threads', video_id, data=json.dumps({
|
||||
'additionals': {},
|
||||
'params': params,
|
||||
'threadKey': thread_key,
|
||||
}).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
'x-client-os-type': 'others',
|
||||
'x-frontend-id': '6',
|
||||
'x-frontend-version': '0',
|
||||
},
|
||||
note='Downloading comments (new)', errnote='Failed to download comments (new)')
|
||||
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
|
||||
|
||||
|
||||
class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||
|
@ -636,10 +660,10 @@ def _real_extract(self, url):
|
|||
|
||||
class NiconicoSeriesIE(InfoExtractor):
|
||||
IE_NAME = 'niconico:series'
|
||||
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nicovideo.jp/series/110226',
|
||||
'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
|
||||
'info_dict': {
|
||||
'id': '110226',
|
||||
'title': 'ご立派ァ!のシリーズ',
|
||||
|
@ -659,7 +683,7 @@ class NiconicoSeriesIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
|
||||
title = self._search_regex(
|
||||
(r'<title>「(.+)(全',
|
||||
|
@ -667,10 +691,9 @@ def _real_extract(self, url):
|
|||
webpage, 'title', fatal=False)
|
||||
if title:
|
||||
title = unescapeHTML(title)
|
||||
playlist = [
|
||||
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
|
||||
for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
|
||||
return self.playlist_result(playlist, list_id, title)
|
||||
json_data = next(self._yield_json_ld(webpage, None, fatal=False))
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
|
||||
|
||||
|
||||
class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
format_field,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
@ -24,7 +25,7 @@ class ParlerIE(InfoExtractor):
|
|||
'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
|
||||
'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
|
||||
'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
|
||||
'timestamp': 1659744000,
|
||||
'timestamp': 1659785481,
|
||||
'upload_date': '20220806',
|
||||
'uploader': 'Tulsi Gabbard',
|
||||
'uploader_id': 'TulsiGabbard',
|
||||
|
@ -34,78 +35,57 @@ class ParlerIE(InfoExtractor):
|
|||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'md5': '11687e2f5bb353682cee338d181422ed',
|
||||
'info_dict': {
|
||||
'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
|
||||
'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
||||
'description': 'This man should run for office',
|
||||
'timestamp': 1659657600,
|
||||
'upload_date': '20220805',
|
||||
'uploader': 'Benny Johnson',
|
||||
'uploader_id': 'BennyJohnson',
|
||||
'uploader_url': 'https://parler.com/BennyJohnson',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
|
||||
'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
|
||||
'info_dict': {
|
||||
'id': 'r5vkSaz8PxQ',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
|
||||
'title': 'Tom MacDonald Names Reaction',
|
||||
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
|
||||
'upload_date': '20220716',
|
||||
'duration': 1267,
|
||||
'uploader': 'Mahesh Chookolingo',
|
||||
'uploader_id': 'maheshchookolingo',
|
||||
'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
|
||||
'channel': 'Mahesh Chookolingo',
|
||||
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'categories': ['Entertainment'],
|
||||
'tags': list,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 1267,
|
||||
'like_count': int,
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'upload_date': '20220716',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/r5vkSaz8PxQ/maxresdefault.jpg',
|
||||
'tags': 'count:17',
|
||||
'availability': 'public',
|
||||
'categories': ['Entertainment'],
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Who Knows What! With Mahesh & Friends',
|
||||
'title': 'Tom MacDonald Names Reaction',
|
||||
'uploader': 'Who Knows What! With Mahesh & Friends',
|
||||
'uploader_id': '@maheshchookolingo',
|
||||
'age_limit': 0,
|
||||
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
|
||||
'view_count': int,
|
||||
'uploader_url': 'http://www.youtube.com/@maheshchookolingo',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
|
||||
data=urlencode_postdata({'uuid': video_id}))['data'][0]
|
||||
primary = data['primary']
|
||||
|
||||
embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
|
||||
if embed:
|
||||
return self.url_result(embed[0], YoutubeIE)
|
||||
data = self._download_json(f'https://api.parler.com/v0/public/parleys/{video_id}',
|
||||
video_id)['data']
|
||||
if data.get('link'):
|
||||
return self.url_result(data['link'], YoutubeIE)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': traverse_obj(primary, ('video_data', 'videoSrc')),
|
||||
'thumbnail': traverse_obj(primary, ('video_data', 'thumbnailUrl')),
|
||||
'title': '',
|
||||
'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
|
||||
'timestamp': unified_timestamp(primary.get('date_created')),
|
||||
'uploader': strip_or_none(primary.get('name')),
|
||||
'uploader_id': strip_or_none(primary.get('username')),
|
||||
'uploader_url': format_field(strip_or_none(primary.get('username')), None, 'https://parler.com/%s'),
|
||||
'view_count': int_or_none(primary.get('view_count')),
|
||||
'comment_count': int_or_none(traverse_obj(data, ('engagement', 'commentCount'))),
|
||||
'repost_count': int_or_none(traverse_obj(data, ('engagement', 'echoCount'))),
|
||||
'title': strip_or_none(data.get('title')) or '',
|
||||
**traverse_obj(data, {
|
||||
'url': ('video', 'videoSrc'),
|
||||
'thumbnail': ('video', 'thumbnailUrl'),
|
||||
'description': ('body', {clean_html}),
|
||||
'timestamp': ('date_created', {unified_timestamp}),
|
||||
'uploader': ('user', 'name', {strip_or_none}),
|
||||
'uploader_id': ('user', 'username', {str}),
|
||||
'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
'comment_count': ('total_comments', {int_or_none}),
|
||||
'repost_count': ('echos', {int_or_none}),
|
||||
})
|
||||
}
|
||||
|
|
47
yt_dlp/extractor/pgatour.py
Normal file
47
yt_dlp/extractor/pgatour.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class PGATourIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
|
||||
'info_dict': {
|
||||
'id': '6322447785112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
|
||||
'uploader_id': '6116716431001',
|
||||
'upload_date': '20230312',
|
||||
'timestamp': 1678653136,
|
||||
'duration': 20.011,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
|
||||
'info_dict': {
|
||||
'id': '6322506425112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Follow THE PLAYERS trophy on Championship Sunday',
|
||||
'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
|
||||
'uploader_id': '6082840763001',
|
||||
'upload_date': '20230313',
|
||||
'timestamp': 1678739835,
|
||||
'duration': 123.435,
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'tags': 'count:8',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
|
||||
|
||||
# From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
|
||||
account_id = '6116716431001' if is_tourcast else '6082840763001'
|
||||
player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
|
||||
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE)
|
|
@ -1,42 +1,60 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, urljoin
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
get_element_by_class,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PornezIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
|
||||
'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
|
||||
'info_dict': {
|
||||
'id': '344819',
|
||||
'ext': 'mp4',
|
||||
'title': r'mistresst funny_penis_names wmv',
|
||||
'title': 'mistresst funny_penis_names wmv',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/',
|
||||
'info_dict': {
|
||||
'id': '156161',
|
||||
'ext': 'mp4',
|
||||
'title': 'Watch leana lovings stiff for stepdaughter porn video.',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
iframe_src = self._html_search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
|
||||
iframe_src = urljoin('https://pornez.net', iframe_src)
|
||||
title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
|
||||
if title is None:
|
||||
title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
|
||||
thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
|
||||
webpage = self._download_webpage(iframe_src, video_id)
|
||||
entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
|
||||
for format in entries['formats']:
|
||||
height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
|
||||
format['format_id'] = '%sp' % height
|
||||
format['height'] = int_or_none(height)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id')
|
||||
|
||||
iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe')
|
||||
iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id)
|
||||
|
||||
entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0]
|
||||
for fmt in entries['formats']:
|
||||
height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height')
|
||||
fmt['format_id'] = '%sp' % height
|
||||
fmt['height'] = int_or_none(height)
|
||||
|
||||
entries.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': 18
|
||||
'title': (clean_html(get_element_by_class('video-title', webpage))
|
||||
or self._html_search_meta(
|
||||
['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)),
|
||||
'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None),
|
||||
'age_limit': 18,
|
||||
})
|
||||
return entries
|
||||
|
|
|
@ -58,6 +58,11 @@ def dl(*args, **kwargs):
|
|||
def _real_initialize(self):
|
||||
self._logged_in = False
|
||||
|
||||
def _set_age_cookies(self, host):
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
|
||||
self._set_cookie(host, 'accessPH', '1')
|
||||
|
||||
def _login(self, host):
|
||||
if self._logged_in:
|
||||
return
|
||||
|
@ -267,8 +272,7 @@ def _real_extract(self, url):
|
|||
video_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
|
||||
self._set_cookie(host, 'age_verified', '1')
|
||||
self._set_age_cookies(host)
|
||||
|
||||
def dl_webpage(platform):
|
||||
self._set_cookie(host, 'platform', platform)
|
||||
|
@ -569,6 +573,7 @@ def _real_extract(self, url):
|
|||
mobj = self._match_valid_url(url)
|
||||
user_id = mobj.group('id')
|
||||
videos_url = '%s/videos' % mobj.group('url')
|
||||
self._set_age_cookies(mobj.group('host'))
|
||||
page = self._extract_page(url)
|
||||
if page:
|
||||
videos_url = update_url_query(videos_url, {'page': page})
|
||||
|
@ -633,6 +638,7 @@ def _real_extract(self, url):
|
|||
item_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
self._set_age_cookies(host)
|
||||
|
||||
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||
|
||||
|
@ -812,5 +818,6 @@ def _real_extract(self, url):
|
|||
item_id = mobj.group('id')
|
||||
|
||||
self._login(host)
|
||||
self._set_age_cookies(host)
|
||||
|
||||
return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import random
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -9,12 +8,14 @@
|
|||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
urlencode_postdata,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RedditIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/(?P<slug>(?:r|user)/[^/]+/comments/(?P<id>[^/?#&]+))'
|
||||
_NETRC_MACHINE = 'reddit'
|
||||
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||
'info_dict': {
|
||||
|
@ -109,6 +110,46 @@ class RedditIE(InfoExtractor):
|
|||
'age_limit': 0,
|
||||
'channel_id': 'dumbfuckers_club',
|
||||
},
|
||||
}, {
|
||||
# post link without subreddit
|
||||
'url': 'https://www.reddit.com/comments/124pp33',
|
||||
'md5': '15eec9d828adcef4468b741a7e45a395',
|
||||
'info_dict': {
|
||||
'id': 'antsenjc2jqa1',
|
||||
'ext': 'mp4',
|
||||
'display_id': '124pp33',
|
||||
'title': 'Harmless prank of some old friends',
|
||||
'uploader': 'Dudezila',
|
||||
'channel_id': 'ContagiousLaughter',
|
||||
'duration': 17,
|
||||
'upload_date': '20230328',
|
||||
'timestamp': 1680012043,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'age_limit': 0,
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
# quarantined subreddit post
|
||||
'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/',
|
||||
'md5': '3156ea69e3c1f1b6259683c5abd36e71',
|
||||
'info_dict': {
|
||||
'id': '8bwtclfggpsa1',
|
||||
'ext': 'mp4',
|
||||
'display_id': '12fujy3',
|
||||
'title': 'Based Hasan?',
|
||||
'uploader': 'KingNigelXLII',
|
||||
'channel_id': 'GenZedong',
|
||||
'duration': 16,
|
||||
'upload_date': '20230408',
|
||||
'timestamp': 1680979138,
|
||||
'age_limit': 0,
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'skip': 'Requires account that has opted-in to the GenZedong subreddit',
|
||||
}, {
|
||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
||||
'only_matching': True,
|
||||
|
@ -137,21 +178,45 @@ class RedditIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _gen_session_id():
|
||||
id_length = 16
|
||||
rand_max = 1 << (id_length * 4)
|
||||
return '%0.*x' % (id_length, random.randrange(rand_max))
|
||||
def _perform_login(self, username, password):
|
||||
captcha = self._download_json(
|
||||
'https://www.reddit.com/api/requires_captcha/login.json', None,
|
||||
'Checking login requirement')['required']
|
||||
if captcha:
|
||||
raise ExtractorError('Reddit is requiring captcha before login', expected=True)
|
||||
login = self._download_json(
|
||||
f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({
|
||||
'op': 'login-main',
|
||||
'user': username,
|
||||
'passwd': password,
|
||||
'api_type': 'json',
|
||||
}), note='Logging in', errnote='Login request failed')
|
||||
errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1)))
|
||||
if errors:
|
||||
raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True)
|
||||
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
|
||||
raise ExtractorError('Unable to login, no cookie was returned')
|
||||
|
||||
def _real_extract(self, url):
|
||||
subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
|
||||
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
|
||||
|
||||
self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
|
||||
self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
|
||||
data = self._download_json(f'https://{subdomain}reddit.com/{slug}/.json', video_id, fatal=False)
|
||||
data = self._download_json(
|
||||
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
|
||||
if not data:
|
||||
# Fall back to old.reddit.com in case the requested subdomain fails
|
||||
data = self._download_json(f'https://old.reddit.com/{slug}/.json', video_id)
|
||||
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
|
||||
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
|
||||
data = self._download_json(
|
||||
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
|
||||
|
||||
if traverse_obj(data, 'error') == 403:
|
||||
reason = data.get('reason')
|
||||
if reason == 'quarantined':
|
||||
self.raise_login_required('Quarantined subreddit; an account that has opted in is required')
|
||||
elif reason == 'private':
|
||||
self.raise_login_required('Private subreddit; an account that has been approved is required')
|
||||
else:
|
||||
raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}')
|
||||
|
||||
data = data[0]['data']['children'][0]['data']
|
||||
video_url = data['url']
|
||||
|
||||
|
|
|
@ -1,5 +1,12 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RozhlasIE(InfoExtractor):
|
||||
|
@ -50,7 +57,7 @@ class RozhlasVltavaIE(InfoExtractor):
|
|||
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
|
||||
'md5': 'ba2fdbc1242fc16771c7695d271ec355',
|
||||
'info_dict': {
|
||||
'id': 8891337,
|
||||
'id': '8891337',
|
||||
'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
|
@ -69,7 +76,7 @@ class RozhlasVltavaIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
|
||||
'info_dict': {
|
||||
'id': 8554744,
|
||||
'id': '8554744',
|
||||
'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
|
||||
},
|
||||
'playlist_count': 5,
|
||||
|
@ -139,27 +146,62 @@ class RozhlasVltavaIE(InfoExtractor):
|
|||
'chapter_number': 5,
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969',
|
||||
'info_dict': {
|
||||
'id': '8946969',
|
||||
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '10631121',
|
||||
'ext': 'm4a',
|
||||
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
|
||||
'description': 'Karel Šiktanc: Černý jezdec, bílý kůň',
|
||||
'duration': 2656,
|
||||
'artist': 'Tvůrčí skupina Drama a literatura',
|
||||
'channel_id': 'dvojka',
|
||||
},
|
||||
}],
|
||||
'params': {'skip_download': 'dash'},
|
||||
}]
|
||||
|
||||
def _extract_video(self, entry):
|
||||
chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart')))
|
||||
formats = []
|
||||
audio_id = entry['meta']['ga']['contentId']
|
||||
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
|
||||
ext = audio.get('variant')
|
||||
if ext == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio['url'], audio_id, mpd_id=ext, fatal=False))
|
||||
elif ext == 'hls':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': audio['url'],
|
||||
'ext': ext,
|
||||
'format_id': ext,
|
||||
'abr': int_or_none(audio.get('bitrate')),
|
||||
'acodec': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': entry['meta']['ga']['contentId'],
|
||||
'title': traverse_obj(entry, ('meta', 'ga', 'contentName')),
|
||||
'description': entry.get('title'),
|
||||
'duration': entry.get('duration'),
|
||||
'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')),
|
||||
'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')),
|
||||
'id': audio_id,
|
||||
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
|
||||
'chapter_number': chapter_number,
|
||||
'formats': [{
|
||||
'url': audio_link['url'],
|
||||
'ext': audio_link.get('variant'),
|
||||
'format_id': audio_link.get('variant'),
|
||||
'abr': audio_link.get('bitrate'),
|
||||
'acodec': audio_link.get('variant'),
|
||||
'vcodec': 'none',
|
||||
} for audio_link in entry['audioLinks']],
|
||||
'formats': formats,
|
||||
**traverse_obj(entry, {
|
||||
'title': ('meta', 'ga', 'contentName'),
|
||||
'description': 'title',
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'artist': ('meta', 'ga', 'contentAuthor'),
|
||||
'channel_id': ('meta', 'ga', 'contentCreator'),
|
||||
})
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -173,7 +215,7 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': data.get('embedId'),
|
||||
'id': str_or_none(data.get('embedId')) or video_id,
|
||||
'title': traverse_obj(data, ('series', 'title')),
|
||||
'entries': map(self._extract_video, data['playlist']),
|
||||
}
|
||||
|
|
285
yt_dlp/extractor/rtvcplay.py
Normal file
285
yt_dlp/extractor/rtvcplay.py
Normal file
|
@ -0,0 +1,285 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor, ExtractorError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RTVCPlayBaseIE(InfoExtractor):
|
||||
_BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co'
|
||||
|
||||
def _extract_player_config(self, webpage, video_id):
|
||||
return self._search_json(
|
||||
r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage),
|
||||
'player_config', video_id, transform_source=js_to_json)
|
||||
|
||||
def _extract_formats_and_subtitles_player_config(self, player_config, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))):
|
||||
ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url']))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
source['url'], video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
class RTVCPlayIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
|
||||
'info_dict': {
|
||||
'id': 'canal-institucional',
|
||||
'title': r're:^Canal Institucional',
|
||||
'description': 'md5:eff9e548394175928059320c006031ea',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
|
||||
'info_dict': {
|
||||
'id': 'senal-colombia',
|
||||
'title': r're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
|
||||
'info_dict': {
|
||||
'id': 'radio-nacional',
|
||||
'title': r're:^Radio Nacional',
|
||||
'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
|
||||
'md5': '1288ee6f6d1330d880f98bff2ed710a3',
|
||||
'info_dict': {
|
||||
'id': 'senoritas',
|
||||
'title': 'Señoritas',
|
||||
'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
|
||||
'md5': 'f040a7380a269ad633cf837384d5e9fc',
|
||||
'info_dict': {
|
||||
'id': 'james-regresa-clases-28022022',
|
||||
'title': 'James regresa a clases - 28/02/2022',
|
||||
'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
|
||||
'info_dict': {
|
||||
'id': 'llinas-el-cerebro-y-el-universo',
|
||||
'title': 'Llinás, el cerebro y el universo',
|
||||
'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
|
||||
'info_dict': {
|
||||
'id': 'profe-en-tu-casa',
|
||||
'title': 'Profe en tu casa',
|
||||
'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 537,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||
'info_dict': {
|
||||
'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||
'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
|
||||
'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
|
||||
'info_dict': {
|
||||
'id': 'diez-versiones',
|
||||
'title': 'Diez versiones',
|
||||
'description': 'md5:997471ed971cb3fd8e41969457675306',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, category = self._match_valid_url(url).group('id', 'category')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
hydration = self._search_json(
|
||||
r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration',
|
||||
video_id, transform_source=js_to_json)['content']['currentContent']
|
||||
|
||||
asset_id = traverse_obj(hydration, ('video', 'assetid'))
|
||||
if asset_id:
|
||||
hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id)
|
||||
else:
|
||||
hls_url = traverse_obj(hydration, ('channel', 'hls'))
|
||||
|
||||
metadata = traverse_obj(hydration, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
|
||||
}, get_all=False)
|
||||
|
||||
# Probably it's a program's page
|
||||
if not hls_url:
|
||||
seasons = traverse_obj(
|
||||
hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'),
|
||||
get_all=False)
|
||||
if not seasons:
|
||||
podcast_episodes = hydration.get('audios')
|
||||
if not podcast_episodes:
|
||||
raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
|
||||
|
||||
return self.playlist_result([
|
||||
self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, {
|
||||
'title': 'title',
|
||||
'description': ('description', {clean_html}),
|
||||
'episode_number': ('chapter_number', {float_or_none}, {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
})) for episode in podcast_episodes], video_id, **metadata)
|
||||
|
||||
entries = [self.url_result(
|
||||
urljoin(url, episode['slug']), url_transparent=True,
|
||||
**traverse_obj(season, {
|
||||
'season': 'title',
|
||||
'season_number': ('season', {int_or_none}),
|
||||
}), **traverse_obj(episode, {
|
||||
'title': 'title',
|
||||
'thumbnail': ('image', 'cover', 'path'),
|
||||
'episode_number': ('chapter_number', {int_or_none}),
|
||||
})) for season in seasons for episode in traverse_obj(season, ('contents', ...))]
|
||||
|
||||
return self.playlist_result(entries, video_id, **metadata)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': category == 'en-vivo',
|
||||
**metadata,
|
||||
}
|
||||
|
||||
|
||||
class RTVCPlayEmbedIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||
'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
|
||||
'info_dict': {
|
||||
'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||
'title': 'Tráiler: Señoritas',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_config = self._extract_player_config(webpage, video_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||
|
||||
asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid'))
|
||||
metadata = {} if not asset_id else self._download_json(
|
||||
f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('image', ..., 'thumbnail', 'path'),
|
||||
}, get_all=False)
|
||||
}
|
||||
|
||||
|
||||
class RTVCKalturaIE(RTVCPlayBaseIE):
|
||||
_VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
|
||||
'info_dict': {
|
||||
'id': 'indexSC',
|
||||
'title': r're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_config = self._extract_player_config(webpage, video_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||
|
||||
channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId'))
|
||||
metadata = {} if not channel_id else self._download_json(
|
||||
f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False)
|
||||
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'thumbnail': ('channel', 'image', 'logo', 'path'),
|
||||
})
|
||||
}
|
|
@ -7,8 +7,11 @@
|
|||
ExtractorError,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
|
@ -164,7 +167,13 @@ def _real_extract(self, url):
|
|||
|
||||
formats = []
|
||||
for ext, ext_info in (video.get('ua') or {}).items():
|
||||
for height, video_info in (ext_info or {}).items():
|
||||
if isinstance(ext_info, dict):
|
||||
for height, video_info in ext_info.items():
|
||||
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
|
||||
video_info.setdefault('meta', {})['h'] = height
|
||||
ext_info = ext_info.values()
|
||||
|
||||
for video_info in ext_info:
|
||||
meta = video_info.get('meta') or {}
|
||||
if not video_info.get('url'):
|
||||
continue
|
||||
|
@ -175,12 +184,16 @@ def _real_extract(self, url):
|
|||
video_info['url'], video_id,
|
||||
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
|
||||
continue
|
||||
timeline = ext == 'timeline'
|
||||
if timeline:
|
||||
ext = determine_ext(video_info['url'])
|
||||
formats.append({
|
||||
'ext': ext,
|
||||
'acodec': 'none' if timeline else None,
|
||||
'url': video_info['url'],
|
||||
'format_id': '%s-%sp' % (ext, height),
|
||||
'height': int_or_none(height),
|
||||
'fps': video.get('fps'),
|
||||
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
|
||||
'format_note': 'Timeline' if timeline else None,
|
||||
'fps': None if timeline else video.get('fps'),
|
||||
**traverse_obj(meta, {
|
||||
'tbr': 'bitrate',
|
||||
'filesize': 'size',
|
||||
|
@ -247,6 +260,43 @@ class RumbleIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'http://www.rumble.com/vDMUM1?key=value',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'timeline format',
|
||||
'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
|
||||
'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
|
||||
'params': {'format': 'wv'},
|
||||
'info_dict': {
|
||||
'id': 'v2bou5f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Redacted News',
|
||||
'upload_date': '20230322',
|
||||
'timestamp': 1679445010,
|
||||
'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris',
|
||||
'duration': 892,
|
||||
'channel': 'Redacted News',
|
||||
'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
|
||||
'channel_url': 'https://rumble.com/c/Redacted',
|
||||
'live_status': 'not_live',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
|
||||
'info_dict': {
|
||||
'id': 'v2blzyy',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'was_live',
|
||||
'release_timestamp': 1679446804,
|
||||
'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
|
||||
'release_date': '20230322',
|
||||
'timestamp': 1679445692,
|
||||
'duration': 4435,
|
||||
'upload_date': '20230322',
|
||||
'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
|
||||
'uploader': 'Kim Iversen',
|
||||
'channel_url': 'https://rumble.com/c/KimIversen',
|
||||
'channel': 'Kim Iversen',
|
||||
'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -11,7 +17,7 @@ class SBSIE(InfoExtractor):
|
|||
https?://(?:www\.)?sbs\.com\.au/(?:
|
||||
ondemand(?:
|
||||
/video/(?:single/)?|
|
||||
/movie/[^/]+/|
|
||||
/(?:movie|tv-program)/[^/]+/|
|
||||
/(?:tv|news)-series/(?:[^/]+/){3}|
|
||||
.*?\bplay=|/watch/
|
||||
)|news/(?:embeds/)?video/
|
||||
|
@ -27,18 +33,21 @@ class SBSIE(InfoExtractor):
|
|||
# Original URL is handled by the generic IE which finds the iframe:
|
||||
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
||||
'md5': '31f84a7a19b53635db63c73f8ab0c4a7',
|
||||
'info_dict': {
|
||||
'id': '_rFBPRPO4pMR',
|
||||
'id': '320403011771', # '_rFBPRPO4pMR',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dingo Conservation (The Feed)',
|
||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||
'thumbnail': r're:http://.*\.jpg',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 308,
|
||||
'timestamp': 1408613220,
|
||||
'upload_date': '20140821',
|
||||
'uploader': 'SBSC',
|
||||
'tags': None,
|
||||
'categories': None,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
}, {
|
||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||
'only_matching': True,
|
||||
|
@ -70,34 +79,80 @@ class SBSIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
_AUS_TV_PARENTAL_GUIDELINES = {
|
||||
'P': 0,
|
||||
'C': 7,
|
||||
'G': 0,
|
||||
'PG': 0,
|
||||
'M': 14,
|
||||
'MA15+': 15,
|
||||
'MAV15+': 15,
|
||||
'R18+': 18,
|
||||
}
|
||||
_PLAYER_API = 'https://www.sbs.com.au/api/v3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
player_params = self._download_json(
|
||||
'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
|
||||
formats, subtitles = self._extract_smil_formats_and_subtitles(
|
||||
update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id)
|
||||
|
||||
error = player_params.get('error')
|
||||
if error:
|
||||
error_message = 'Sorry, The video you are looking for does not exist.'
|
||||
video_data = error.get('results') or {}
|
||||
error_code = error.get('errorCode')
|
||||
if error_code == 'ComingSoon':
|
||||
error_message = '%s is not yet available.' % video_data.get('title', '')
|
||||
elif error_code in ('Forbidden', 'intranetAccessOnly'):
|
||||
error_message = 'Sorry, This video cannot be accessed via this website'
|
||||
elif error_code == 'Expired':
|
||||
error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
||||
if not formats:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id,
|
||||
note='Checking geo-restriction', fatal=False, expected_status=403)
|
||||
if urlh:
|
||||
error_reasons = urlh.headers.get_all('x-error-reason') or []
|
||||
if 'geo-blocked' in error_reasons:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
self.raise_no_formats('No formats are available', video_id=video_id)
|
||||
|
||||
urls = player_params['releaseUrls']
|
||||
theplatform_url = (urls.get('progressive') or urls.get('html')
|
||||
or urls.get('standard') or player_params['relatedItemsURL'])
|
||||
media = traverse_obj(self._download_json(
|
||||
f'{self._PLAYER_API}/video_stream', video_id, fatal=False,
|
||||
query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {}
|
||||
|
||||
media.update(self._download_json(
|
||||
f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}',
|
||||
video_id, fatal=not media) or {})
|
||||
|
||||
# For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'.
|
||||
if traverse_obj(media, ('partOfSeries', {dict})):
|
||||
media['epName'] = traverse_obj(media, ('title', {str}))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'id': video_id,
|
||||
'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
|
||||
'is_live': player_params.get('streamType') == 'live',
|
||||
**traverse_obj(media, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'channel': ('taxonomy', 'channel', 'name', {str}),
|
||||
'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}),
|
||||
'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode': ('epName', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}),
|
||||
'release_year': ('releaseYear', {int_or_none}),
|
||||
'duration': ('duration', ({float_or_none}, {parse_duration})),
|
||||
'is_live': ('liveStream', {bool}),
|
||||
'age_limit': (
|
||||
('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(media, {
|
||||
'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),
|
||||
'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}),
|
||||
'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), {
|
||||
'id': ('name', {str}),
|
||||
'url': 'contentUrl',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'uploader': 'SBSC',
|
||||
}
|
||||
|
|
31
yt_dlp/extractor/senalcolombia.py
Normal file
31
yt_dlp/extractor/senalcolombia.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
from .common import InfoExtractor
|
||||
from .rtvcplay import RTVCKalturaIE
|
||||
|
||||
|
||||
class SenalColombiaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?P<id>senal-en-vivo)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.senalcolombia.tv/senal-en-vivo',
|
||||
'info_dict': {
|
||||
'id': 'indexSC',
|
||||
'title': 're:^Señal Colombia',
|
||||
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
hydration = self._search_json(
|
||||
r'<script\b[^>]*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>',
|
||||
webpage, 'hydration', display_id)
|
||||
|
||||
return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id)
|
518
yt_dlp/extractor/stageplus.py
Normal file
518
yt_dlp/extractor/stageplus.py
Normal file
|
@ -0,0 +1,518 @@
|
|||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class StagePlusVODConcertIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'stageplus'
|
||||
_VALID_URL = r'https?://(?:www\.)?stage-plus\.com/video/(?P<id>vod_concert_\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.stage-plus.com/video/vod_concert_APNM8GRFDPHMASJKBSPJACG',
|
||||
'playlist_count': 6,
|
||||
'info_dict': {
|
||||
'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
|
||||
'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
|
||||
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
|
||||
'artist': ['Yuja Wang', 'Lorenzo Viotti'],
|
||||
'upload_date': '20230331',
|
||||
'timestamp': 1680249600,
|
||||
'release_date': '20210709',
|
||||
'release_timestamp': 1625788800,
|
||||
'thumbnails': 'count:3',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'performance_work_A1IN4PJFE9MM2RJ3CLBMUSJBBSOJAD9O',
|
||||
'ext': 'mp4',
|
||||
'title': 'Piano Concerto No. 2 in C Minor, Op. 18',
|
||||
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
|
||||
'upload_date': '20230331',
|
||||
'timestamp': 1680249600,
|
||||
'release_date': '20210709',
|
||||
'release_timestamp': 1625788800,
|
||||
'duration': 2207,
|
||||
'chapters': 'count:5',
|
||||
'artist': ['Yuja Wang'],
|
||||
'composer': ['Sergei Rachmaninoff'],
|
||||
'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
|
||||
'album_artist': ['Yuja Wang', 'Lorenzo Viotti'],
|
||||
'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
|
||||
'track_number': 1,
|
||||
'genre': 'Instrumental Concerto',
|
||||
},
|
||||
}],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
# TODO: Prune this after livestream and/or album extractors are added
|
||||
_GRAPHQL_QUERY = '''query videoDetailPage($videoId: ID!, $sliderItemsFirst: Int = 24) {
|
||||
node(id: $videoId) {
|
||||
__typename
|
||||
...LiveConcertFields
|
||||
... on LiveConcert {
|
||||
artists {
|
||||
edges {
|
||||
role {
|
||||
...RoleFields
|
||||
}
|
||||
node {
|
||||
id
|
||||
name
|
||||
sortName
|
||||
}
|
||||
}
|
||||
}
|
||||
isAtmos
|
||||
maxResolution
|
||||
groups {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
}
|
||||
shortDescription
|
||||
performanceWorks {
|
||||
...livePerformanceWorkFields
|
||||
}
|
||||
totalDuration
|
||||
sliders {
|
||||
...contentContainerFields
|
||||
}
|
||||
vodConcert {
|
||||
__typename
|
||||
id
|
||||
}
|
||||
}
|
||||
...VideoFields
|
||||
... on Video {
|
||||
artists {
|
||||
edges {
|
||||
role {
|
||||
...RoleFields
|
||||
}
|
||||
node {
|
||||
id
|
||||
name
|
||||
sortName
|
||||
}
|
||||
}
|
||||
}
|
||||
isAtmos
|
||||
maxResolution
|
||||
isLossless
|
||||
description
|
||||
productionDate
|
||||
takedownDate
|
||||
sliders {
|
||||
...contentContainerFields
|
||||
}
|
||||
}
|
||||
...VodConcertFields
|
||||
... on VodConcert {
|
||||
artists {
|
||||
edges {
|
||||
role {
|
||||
...RoleFields
|
||||
}
|
||||
node {
|
||||
id
|
||||
name
|
||||
sortName
|
||||
}
|
||||
}
|
||||
}
|
||||
isAtmos
|
||||
maxResolution
|
||||
groups {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
}
|
||||
performanceWorks {
|
||||
...PerformanceWorkFields
|
||||
}
|
||||
shortDescription
|
||||
productionDate
|
||||
takedownDate
|
||||
sliders {
|
||||
...contentContainerFields
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragment LiveConcertFields on LiveConcert {
|
||||
endTime
|
||||
id
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
reruns {
|
||||
...liveConcertRerunFields
|
||||
}
|
||||
publicationLevel
|
||||
startTime
|
||||
streamStartTime
|
||||
subtitle
|
||||
title
|
||||
typeDisplayName
|
||||
stream {
|
||||
...liveStreamFields
|
||||
}
|
||||
trailerStream {
|
||||
...streamFields
|
||||
}
|
||||
geoAccessCountries
|
||||
geoAccessMode
|
||||
}
|
||||
|
||||
fragment PictureFields on Picture {
|
||||
id
|
||||
url
|
||||
type
|
||||
}
|
||||
|
||||
fragment liveConcertRerunFields on LiveConcertRerun {
|
||||
streamStartTime
|
||||
endTime
|
||||
startTime
|
||||
stream {
|
||||
...rerunStreamFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment rerunStreamFields on RerunStream {
|
||||
publicationLevel
|
||||
streamType
|
||||
url
|
||||
}
|
||||
|
||||
fragment liveStreamFields on LiveStream {
|
||||
publicationLevel
|
||||
streamType
|
||||
url
|
||||
}
|
||||
|
||||
fragment streamFields on Stream {
|
||||
publicationLevel
|
||||
streamType
|
||||
url
|
||||
}
|
||||
|
||||
fragment RoleFields on Role {
|
||||
__typename
|
||||
id
|
||||
type
|
||||
displayName
|
||||
}
|
||||
|
||||
fragment livePerformanceWorkFields on LivePerformanceWork {
|
||||
__typename
|
||||
id
|
||||
artists {
|
||||
...artistWithRoleFields
|
||||
}
|
||||
groups {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
}
|
||||
}
|
||||
}
|
||||
work {
|
||||
...workFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment artistWithRoleFields on ArtistWithRoleConnection {
|
||||
edges {
|
||||
role {
|
||||
...RoleFields
|
||||
}
|
||||
node {
|
||||
id
|
||||
name
|
||||
sortName
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragment workFields on Work {
|
||||
id
|
||||
title
|
||||
movements {
|
||||
id
|
||||
title
|
||||
}
|
||||
composers {
|
||||
id
|
||||
name
|
||||
}
|
||||
genre {
|
||||
id
|
||||
title
|
||||
}
|
||||
}
|
||||
|
||||
fragment contentContainerFields on CuratedContentContainer {
|
||||
__typename
|
||||
...SliderFields
|
||||
...BannerFields
|
||||
}
|
||||
|
||||
fragment SliderFields on Slider {
|
||||
id
|
||||
headline
|
||||
items(first: $sliderItemsFirst) {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
__typename
|
||||
...AlbumFields
|
||||
...ArtistFields
|
||||
...EpochFields
|
||||
...GenreFields
|
||||
...GroupFields
|
||||
...LiveConcertFields
|
||||
...PartnerFields
|
||||
...PerformanceWorkFields
|
||||
...VideoFields
|
||||
...VodConcertFields
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fragment AlbumFields on Album {
|
||||
artistAndGroupDisplayInfo
|
||||
id
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
title
|
||||
}
|
||||
|
||||
fragment ArtistFields on Artist {
|
||||
id
|
||||
name
|
||||
roles {
|
||||
...RoleFields
|
||||
}
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment EpochFields on Epoch {
|
||||
id
|
||||
endYear
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
startYear
|
||||
title
|
||||
}
|
||||
|
||||
fragment GenreFields on Genre {
|
||||
id
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
title
|
||||
}
|
||||
|
||||
fragment GroupFields on Group {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment PartnerFields on Partner {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
subtypeDisplayName
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
}
|
||||
|
||||
fragment PerformanceWorkFields on PerformanceWork {
|
||||
__typename
|
||||
id
|
||||
artists {
|
||||
...artistWithRoleFields
|
||||
}
|
||||
groups {
|
||||
edges {
|
||||
node {
|
||||
id
|
||||
name
|
||||
typeDisplayName
|
||||
}
|
||||
}
|
||||
}
|
||||
work {
|
||||
...workFields
|
||||
}
|
||||
stream {
|
||||
...streamFields
|
||||
}
|
||||
vodConcert {
|
||||
__typename
|
||||
id
|
||||
}
|
||||
duration
|
||||
cuePoints {
|
||||
mark
|
||||
title
|
||||
}
|
||||
}
|
||||
|
||||
fragment VideoFields on Video {
|
||||
id
|
||||
archiveReleaseDate
|
||||
title
|
||||
subtitle
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
stream {
|
||||
...streamFields
|
||||
}
|
||||
trailerStream {
|
||||
...streamFields
|
||||
}
|
||||
duration
|
||||
typeDisplayName
|
||||
duration
|
||||
geoAccessCountries
|
||||
geoAccessMode
|
||||
publicationLevel
|
||||
takedownDate
|
||||
}
|
||||
|
||||
fragment VodConcertFields on VodConcert {
|
||||
id
|
||||
archiveReleaseDate
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
subtitle
|
||||
title
|
||||
typeDisplayName
|
||||
totalDuration
|
||||
geoAccessCountries
|
||||
geoAccessMode
|
||||
trailerStream {
|
||||
...streamFields
|
||||
}
|
||||
publicationLevel
|
||||
takedownDate
|
||||
}
|
||||
|
||||
fragment BannerFields on Banner {
|
||||
description
|
||||
link
|
||||
pictures {
|
||||
...PictureFields
|
||||
}
|
||||
title
|
||||
}'''
|
||||
|
||||
_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
auth = self._download_json('https://audience.api.stageplus.io/oauth/token', None, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://www.stage-plus.com',
|
||||
}, data=json.dumps({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
'device_info': 'Chrome (Windows)',
|
||||
'client_device_id': str(uuid.uuid4()),
|
||||
}, separators=(',', ':')).encode(), note='Logging in')
|
||||
|
||||
if auth.get('access_token'):
|
||||
self._TOKEN = auth['access_token']
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._TOKEN:
|
||||
return
|
||||
|
||||
self._TOKEN = try_call(
|
||||
lambda: self._get_cookies('https://www.stage-plus.com/')['dgplus_access_token'].value)
|
||||
if not self._TOKEN:
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
concert_id = self._match_id(url)
|
||||
|
||||
data = self._download_json('https://audience.api.stageplus.io/graphql', concert_id, headers={
|
||||
'authorization': f'Bearer {self._TOKEN}',
|
||||
'content-type': 'application/json',
|
||||
'Origin': 'https://www.stage-plus.com',
|
||||
}, data=json.dumps({
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'videoId': concert_id},
|
||||
'operationName': 'videoDetailPage'
|
||||
}, separators=(',', ':')).encode())['data']['node']
|
||||
|
||||
metadata = traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': ('shortDescription', {str}),
|
||||
'artist': ('artists', 'edges', ..., 'node', 'name'),
|
||||
'timestamp': ('archiveReleaseDate', {unified_timestamp}),
|
||||
'release_timestamp': ('productionDate', {unified_timestamp}),
|
||||
})
|
||||
|
||||
thumbnails = traverse_obj(data, ('pictures', lambda _, v: url_or_none(v['url']), {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
})) or None
|
||||
|
||||
m3u8_headers = {'jwt': self._TOKEN}
|
||||
|
||||
entries = []
|
||||
for idx, video in enumerate(traverse_obj(data, (
|
||||
'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', headers=m3u8_headers)
|
||||
entries.append({
|
||||
'id': video['id'],
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': m3u8_headers,
|
||||
'album': metadata.get('title'),
|
||||
'album_artist': metadata.get('artist'),
|
||||
'track_number': idx,
|
||||
**metadata,
|
||||
**traverse_obj(video, {
|
||||
'title': ('work', 'title'),
|
||||
'track': ('work', 'title'),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'chapters': (
|
||||
'cuePoints', lambda _, v: float_or_none(v['mark']) is not None, {
|
||||
'title': 'title',
|
||||
'start_time': ('mark', {float_or_none}),
|
||||
}),
|
||||
'artist': ('artists', 'edges', ..., 'node', 'name'),
|
||||
'composer': ('work', 'composers', ..., 'name'),
|
||||
'genre': ('work', 'genre', 'title'),
|
||||
}),
|
||||
})
|
||||
|
||||
return self.playlist_result(entries, concert_id, thumbnails=thumbnails, **metadata)
|
|
@ -38,11 +38,23 @@ class TelecaribePlayIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.play.telecaribe.co/liveplus',
|
||||
'info_dict': {
|
||||
'id': 'liveplus',
|
||||
'title': r're:^Señal en vivo Plus',
|
||||
'live_status': 'is_live',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
'skip': 'Geo-restricted to Colombia',
|
||||
}]
|
||||
|
||||
def _download_player_webpage(self, webpage, display_id):
|
||||
page_id = self._search_regex(
|
||||
(r'window.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
|
||||
(r'window\.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
|
||||
webpage, 'page_id')
|
||||
|
||||
props = self._download_json(self._search_regex(
|
||||
|
@ -59,14 +71,16 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
player = self._download_player_webpage(webpage, display_id)
|
||||
|
||||
if display_id != 'live':
|
||||
livestream_url = self._search_regex(
|
||||
r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url', default=None)
|
||||
|
||||
if not livestream_url:
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
|
||||
self._get_clean_title(self._og_search_title(webpage)))
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'),
|
||||
display_id, 'mp4')
|
||||
livestream_url, display_id, 'mp4', live=True)
|
||||
|
||||
return {
|
||||
'id': display_id,
|
||||
|
|
|
@ -5,15 +5,22 @@
|
|||
|
||||
|
||||
class TheSunIE(InfoExtractor):
|
||||
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?the-?sun(\.co\.uk|\.com)/[^/]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
|
||||
'info_dict': {
|
||||
'id': '2261604',
|
||||
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.the-sun.com/entertainment/7611415/1000lb-sisters-fans-rip-amy-dangerous-health-decision/',
|
||||
'info_dict': {
|
||||
'id': '7611415',
|
||||
'title': 'md5:e0b9b976f79dc770e5c80f22f40bb844',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
|
@ -12,15 +13,19 @@
|
|||
LazyList,
|
||||
UnsupportedError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
qualities,
|
||||
remove_start,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
@ -200,6 +205,16 @@ def parse_url_key(url_key):
|
|||
|
||||
known_resolutions = {}
|
||||
|
||||
def mp3_meta(url):
|
||||
return {
|
||||
'format_note': 'Music track',
|
||||
'ext': 'mp3',
|
||||
'acodec': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'width': None,
|
||||
'height': None,
|
||||
} if determine_ext(url) == 'mp3' else {}
|
||||
|
||||
def extract_addr(addr, add_meta={}):
|
||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||
if res:
|
||||
|
@ -215,7 +230,8 @@ def extract_addr(addr, add_meta={}):
|
|||
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
||||
**add_meta, **parsed_meta,
|
||||
'format_note': join_nonempty(
|
||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' ')
|
||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
|
||||
**mp3_meta(url),
|
||||
} for url in addr.get('url_list') or []]
|
||||
|
||||
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
||||
|
@ -271,17 +287,15 @@ def extract_addr(addr, add_meta={}):
|
|||
thumbnails = []
|
||||
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
|
||||
'origin_cover', 'dynamic_cover'):
|
||||
cover = video_info.get(cover_id)
|
||||
if cover:
|
||||
for cover_url in cover['url_list']:
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
})
|
||||
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
})
|
||||
|
||||
stats_info = aweme_detail.get('statistics', {})
|
||||
author_info = aweme_detail.get('author', {})
|
||||
music_info = aweme_detail.get('music', {})
|
||||
stats_info = aweme_detail.get('statistics') or {}
|
||||
author_info = aweme_detail.get('author') or {}
|
||||
music_info = aweme_detail.get('music') or {}
|
||||
user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
|
||||
'sec_uid', 'id', 'uid', 'unique_id',
|
||||
expected_type=str_or_none, get_all=False))
|
||||
|
@ -303,20 +317,27 @@ def extract_addr(addr, add_meta={}):
|
|||
'extractor_key': TikTokIE.ie_key(),
|
||||
'extractor': TikTokIE.IE_NAME,
|
||||
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
|
||||
'title': aweme_detail.get('desc'),
|
||||
'description': aweme_detail.get('desc'),
|
||||
'view_count': int_or_none(stats_info.get('play_count')),
|
||||
'like_count': int_or_none(stats_info.get('digg_count')),
|
||||
'repost_count': int_or_none(stats_info.get('share_count')),
|
||||
'comment_count': int_or_none(stats_info.get('comment_count')),
|
||||
'uploader': str_or_none(author_info.get('unique_id')),
|
||||
'creator': str_or_none(author_info.get('nickname')),
|
||||
'uploader_id': str_or_none(author_info.get('uid')),
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'description': ('desc', {str}),
|
||||
'timestamp': ('create_time', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(stats_info, {
|
||||
'view_count': 'play_count',
|
||||
'like_count': 'digg_count',
|
||||
'repost_count': 'share_count',
|
||||
'comment_count': 'comment_count',
|
||||
}, expected_type=int_or_none),
|
||||
**traverse_obj(author_info, {
|
||||
'uploader': 'unique_id',
|
||||
'uploader_id': 'uid',
|
||||
'creator': 'nickname',
|
||||
'channel_id': 'sec_uid',
|
||||
}, expected_type=str_or_none),
|
||||
'uploader_url': user_url,
|
||||
'track': music_track,
|
||||
'album': str_or_none(music_info.get('album')) or None,
|
||||
'artist': music_author or None,
|
||||
'timestamp': int_or_none(aweme_detail.get('create_time')),
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
|
||||
'thumbnails': thumbnails,
|
||||
|
@ -328,37 +349,27 @@ def extract_addr(addr, add_meta={}):
|
|||
'_format_sort_fields': ('quality', 'codec', 'size', 'br'),
|
||||
}
|
||||
|
||||
def _parse_aweme_video_web(self, aweme_detail, webpage_url):
|
||||
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
|
||||
video_info = aweme_detail['video']
|
||||
author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
|
||||
music_info = aweme_detail.get('music') or {}
|
||||
stats_info = aweme_detail.get('stats') or {}
|
||||
user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
|
||||
'secUid', 'id', 'uid', 'uniqueId',
|
||||
expected_type=str_or_none, get_all=False)
|
||||
or aweme_detail.get('authorSecId'))
|
||||
channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
|
||||
user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
|
||||
|
||||
formats = []
|
||||
play_url = video_info.get('playAddr')
|
||||
width = video_info.get('width')
|
||||
height = video_info.get('height')
|
||||
if isinstance(play_url, str):
|
||||
formats = [{
|
||||
width = int_or_none(video_info.get('width'))
|
||||
height = int_or_none(video_info.get('height'))
|
||||
|
||||
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(play_url),
|
||||
'ext': 'mp4',
|
||||
'width': width,
|
||||
'height': height,
|
||||
}]
|
||||
elif isinstance(play_url, list):
|
||||
formats = [{
|
||||
'url': self._proto_relative_url(url),
|
||||
'ext': 'mp4',
|
||||
'width': width,
|
||||
'height': height,
|
||||
} for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none) if url]
|
||||
})
|
||||
|
||||
download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
|
||||
if download_url:
|
||||
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'url': self._proto_relative_url(download_url),
|
||||
|
@ -366,38 +377,48 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url):
|
|||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
thumbnails = []
|
||||
for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
|
||||
if aweme_detail.get(thumbnail_name):
|
||||
thumbnails = [{
|
||||
'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
|
||||
'width': width,
|
||||
'height': height
|
||||
}]
|
||||
for thumb_url in traverse_obj(aweme_detail, (
|
||||
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumb_url),
|
||||
'width': width,
|
||||
'height': height,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
|
||||
'title': aweme_detail.get('desc'),
|
||||
'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
|
||||
'view_count': int_or_none(stats_info.get('playCount')),
|
||||
'like_count': int_or_none(stats_info.get('diggCount')),
|
||||
'repost_count': int_or_none(stats_info.get('shareCount')),
|
||||
'comment_count': int_or_none(stats_info.get('commentCount')),
|
||||
'timestamp': int_or_none(aweme_detail.get('createTime')),
|
||||
'creator': str_or_none(author_info.get('nickname')),
|
||||
'uploader': str_or_none(author_info.get('uniqueId') or aweme_detail.get('author')),
|
||||
'uploader_id': str_or_none(traverse_obj(author_info, 'id', 'uid', 'authorId')),
|
||||
'id': video_id,
|
||||
**traverse_obj(aweme_detail, {
|
||||
'title': ('desc', {str}),
|
||||
'description': ('desc', {str}),
|
||||
'duration': ('video', 'duration', {int_or_none}),
|
||||
'timestamp': ('createTime', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(author_info or aweme_detail, {
|
||||
'creator': ('nickname', {str}),
|
||||
'uploader': (('uniqueId', 'author'), {str}),
|
||||
'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(stats_info, {
|
||||
'view_count': 'playCount',
|
||||
'like_count': 'diggCount',
|
||||
'repost_count': 'shareCount',
|
||||
'comment_count': 'commentCount',
|
||||
}, expected_type=int_or_none),
|
||||
**traverse_obj(music_info, {
|
||||
'track': 'title',
|
||||
'album': ('album', {lambda x: x or None}),
|
||||
'artist': 'authorName',
|
||||
}, expected_type=str),
|
||||
'channel_id': channel_id,
|
||||
'uploader_url': user_url,
|
||||
'track': str_or_none(music_info.get('title')),
|
||||
'album': str_or_none(music_info.get('album')) or None,
|
||||
'artist': str_or_none(music_info.get('authorName')),
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': str_or_none(aweme_detail.get('desc')),
|
||||
'http_headers': {
|
||||
'Referer': webpage_url
|
||||
'Referer': webpage_url,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -431,7 +452,8 @@ class TikTokIE(TikTokBaseIE):
|
|||
'artist': 'Ysrbeats',
|
||||
'album': 'Lehanga',
|
||||
'track': 'Lehanga',
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
|
||||
'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
|
||||
|
@ -446,6 +468,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'uploader': 'patrox',
|
||||
'uploader_id': '18702747',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
|
||||
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
|
||||
'creator': 'patroX',
|
||||
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
|
||||
'upload_date': '20190930',
|
||||
|
@ -456,7 +479,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'comment_count': int,
|
||||
'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
|
||||
'track': 'Big Fun',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Banned audio, only available on the app
|
||||
'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
|
||||
|
@ -469,6 +492,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
||||
'uploader_id': '6974687867511718913',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
|
||||
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
|
||||
'track': 'Boka Dance',
|
||||
'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
||||
'timestamp': 1626121503,
|
||||
|
@ -479,7 +503,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Sponsored video, only available with feed workaround
|
||||
'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
|
||||
|
@ -492,6 +516,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'creator': 'Slap And Run',
|
||||
'uploader_id': '7036055384943690754',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
|
||||
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
|
||||
'track': 'Promoted Music',
|
||||
'timestamp': 1639754738,
|
||||
'duration': 30,
|
||||
|
@ -502,7 +527,6 @@ class TikTokIE(TikTokBaseIE):
|
|||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
|
||||
}, {
|
||||
# Video without title and description
|
||||
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
|
||||
|
@ -515,6 +539,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
'creator': 'Pokemon',
|
||||
'uploader_id': '6820838815978423302',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
|
||||
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
|
||||
'track': 'original sound',
|
||||
'timestamp': 1643714123,
|
||||
'duration': 6,
|
||||
|
@ -549,6 +574,56 @@ class TikTokIE(TikTokBaseIE):
|
|||
'comment_count': int,
|
||||
},
|
||||
'skip': 'This video is unavailable',
|
||||
}, {
|
||||
# slideshow audio-only mp3 format
|
||||
'url': 'https://www.tiktok.com/@_le_cannibale_/video/7139980461132074283',
|
||||
'info_dict': {
|
||||
'id': '7139980461132074283',
|
||||
'ext': 'mp3',
|
||||
'title': 'TikTok video #7139980461132074283',
|
||||
'description': '',
|
||||
'creator': 'Antaura',
|
||||
'uploader': '_le_cannibale_',
|
||||
'uploader_id': '6604511138619654149',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
|
||||
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
|
||||
'artist': 'nathan !',
|
||||
'track': 'grahamscott canon',
|
||||
'upload_date': '20220905',
|
||||
'timestamp': 1662406249,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https://.+\.webp',
|
||||
},
|
||||
}, {
|
||||
# only available via web
|
||||
'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
|
||||
'md5': '8d8c0be14127020cd9f5def4a2e6b411',
|
||||
'info_dict': {
|
||||
'id': '7206382937372134662',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||
'creator': 'MoxyPatch',
|
||||
'uploader': 'moxypatch',
|
||||
'uploader_id': '7039142049363379205',
|
||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||
'artist': 'your worst nightmare',
|
||||
'track': 'original sound',
|
||||
'upload_date': '20230303',
|
||||
'timestamp': 1677866781,
|
||||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https://.+',
|
||||
'thumbnails': 'count:3',
|
||||
},
|
||||
'expected_warnings': ['Unable to find video in feed'],
|
||||
}, {
|
||||
# Auto-captions available
|
||||
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
|
||||
|
@ -563,7 +638,7 @@ def _real_extract(self, url):
|
|||
self.report_warning(f'{e}; trying with webpage')
|
||||
|
||||
url = self._create_url(user_id, video_id)
|
||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
|
||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
|
||||
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
|
||||
if next_data:
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
|
||||
|
@ -574,7 +649,7 @@ def _real_extract(self, url):
|
|||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
|
||||
|
||||
if status == 0:
|
||||
return self._parse_aweme_video_web(video_data, url)
|
||||
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||
elif status == 10216:
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
raise ExtractorError('Video not available', video_id=video_id)
|
||||
|
@ -801,6 +876,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'description': '#杨超越 小小水手带你去远航❤️',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 19782,
|
||||
'timestamp': 1620905839,
|
||||
|
@ -810,6 +886,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||
|
@ -821,8 +898,9 @@ class DouyinIE(TikTokBaseIE):
|
|||
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||
'uploader_id': '408654318141572',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||
'creator': '杨超越工作室',
|
||||
'duration': 42608,
|
||||
'duration': 42479,
|
||||
'timestamp': 1625739481,
|
||||
'upload_date': '20210708',
|
||||
'track': '@杨超越工作室创作的原声',
|
||||
|
@ -830,6 +908,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||
|
@ -841,8 +920,9 @@ class DouyinIE(TikTokBaseIE):
|
|||
'description': '#一起看海 出现在你的夏日里',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 17228,
|
||||
'duration': 17343,
|
||||
'timestamp': 1619098692,
|
||||
'upload_date': '20210422',
|
||||
'track': '@杨超越创作的原声',
|
||||
|
@ -850,6 +930,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.douyin.com/video/6950251282489675042',
|
||||
|
@ -878,6 +959,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||
'uploader_id': '110403406559',
|
||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||
'creator': '杨超越',
|
||||
'duration': 15115,
|
||||
'timestamp': 1621261163,
|
||||
|
@ -887,6 +969,7 @@ class DouyinIE(TikTokBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
}]
|
||||
_APP_VERSIONS = [('23.3.0', '230300')]
|
||||
|
@ -918,7 +1001,7 @@ def _real_extract(self, url):
|
|||
|
||||
render_data = self._parse_json(
|
||||
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
|
||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
|
||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
|
||||
|
||||
|
||||
class TikTokVMIE(InfoExtractor):
|
||||
|
@ -983,40 +1066,173 @@ def _real_extract(self, url):
|
|||
return self.url_result(new_url)
|
||||
|
||||
|
||||
class TikTokLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/live'
|
||||
class TikTokLiveIE(TikTokBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://(?:
|
||||
(?:www\.)?tiktok\.com/@(?P<uploader>[\w.-]+)/live|
|
||||
m\.tiktok\.com/share/live/(?P<id>\d+)
|
||||
)'''
|
||||
IE_NAME = 'tiktok:live'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tiktok.com/@weathernewslive/live',
|
||||
'info_dict': {
|
||||
'id': '7210809319192726273',
|
||||
'ext': 'mp4',
|
||||
'title': r're:ウェザーニュースLiVE[\d\s:-]*',
|
||||
'creator': 'ウェザーニュースLiVE',
|
||||
'uploader': 'weathernewslive',
|
||||
'uploader_id': '6621496731283095554',
|
||||
'uploader_url': 'https://www.tiktok.com/@weathernewslive',
|
||||
'live_status': 'is_live',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/@pilarmagenta/live',
|
||||
'info_dict': {
|
||||
'id': '7209423610325322522',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'creator': 'Pilarmagenta',
|
||||
'uploader': 'pilarmagenta',
|
||||
'uploader_id': '6624846890674683909',
|
||||
'uploader_url': 'https://www.tiktok.com/@pilarmagenta',
|
||||
'live_status': 'is_live',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'skip': 'Livestream',
|
||||
}, {
|
||||
'url': 'https://m.tiktok.com/share/live/7209423610325322522/?language=en',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tiktok.com/@iris04201/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api(self, url, param, room_id, uploader, key=None):
|
||||
response = traverse_obj(self._download_json(
|
||||
url, room_id, fatal=False, query={
|
||||
'aid': '1988',
|
||||
param: room_id,
|
||||
}), (key, {dict}), default={})
|
||||
|
||||
# status == 2 if live else 4
|
||||
if int_or_none(response.get('status')) == 2:
|
||||
return response
|
||||
# If room_id is obtained via mobile share URL and cannot be refreshed, do not wait for live
|
||||
elif not uploader:
|
||||
raise ExtractorError('This livestream has ended', expected=True)
|
||||
raise UserNotLive(video_id=uploader)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
webpage = self._download_webpage(url, uploader, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
|
||||
room_id = self._html_search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
||||
uploader, room_id = self._match_valid_url(url).group('uploader', 'id')
|
||||
webpage = self._download_webpage(
|
||||
url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)
|
||||
|
||||
if webpage:
|
||||
data = try_call(lambda: self._get_sigi_state(webpage, uploader or room_id))
|
||||
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
|
||||
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
||||
or room_id)
|
||||
uploader = uploader or traverse_obj(
|
||||
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
|
||||
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
|
||||
|
||||
if not room_id:
|
||||
raise UserNotLive(video_id=uploader)
|
||||
live_info = traverse_obj(self._download_json(
|
||||
'https://www.tiktok.com/api/live/detail/', room_id, query={
|
||||
'aid': '1988',
|
||||
'roomID': room_id,
|
||||
}), 'LiveRoomInfo', expected_type=dict, default={})
|
||||
|
||||
if 'status' not in live_info:
|
||||
raise ExtractorError('Unexpected response from TikTok API')
|
||||
# status = 2 if live else 4
|
||||
if not int_or_none(live_info['status']) == 2:
|
||||
raise UserNotLive(video_id=uploader)
|
||||
formats = []
|
||||
live_info = self._call_api(
|
||||
'https://webcast.tiktok.com/webcast/room/info', 'room_id', room_id, uploader, key='data')
|
||||
|
||||
get_quality = qualities(('SD1', 'ld', 'SD2', 'sd', 'HD1', 'hd', 'FULL_HD1', 'uhd', 'ORIGION', 'origin'))
|
||||
parse_inner = lambda x: self._parse_json(x, None)
|
||||
|
||||
for quality, stream in traverse_obj(live_info, (
|
||||
'stream_url', 'live_core_sdk_data', 'pull_data', 'stream_data',
|
||||
{parse_inner}, 'data', {dict}), default={}).items():
|
||||
|
||||
sdk_params = traverse_obj(stream, ('main', 'sdk_params', {parse_inner}, {
|
||||
'vcodec': ('VCodec', {str}),
|
||||
'tbr': ('vbitrate', {lambda x: int_or_none(x, 1000)}),
|
||||
'resolution': ('resolution', {lambda x: re.match(r'(?i)\d+x\d+|\d+p', x).group().lower()}),
|
||||
}))
|
||||
|
||||
flv_url = traverse_obj(stream, ('main', 'flv', {url_or_none}))
|
||||
if flv_url:
|
||||
formats.append({
|
||||
'url': flv_url,
|
||||
'ext': 'flv',
|
||||
'format_id': f'flv-{quality}',
|
||||
'quality': get_quality(quality),
|
||||
**sdk_params,
|
||||
})
|
||||
|
||||
hls_url = traverse_obj(stream, ('main', 'hls', {url_or_none}))
|
||||
if hls_url:
|
||||
formats.append({
|
||||
'url': hls_url,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'format_id': f'hls-{quality}',
|
||||
'quality': get_quality(quality),
|
||||
**sdk_params,
|
||||
})
|
||||
|
||||
def get_vcodec(*keys):
|
||||
return traverse_obj(live_info, (
|
||||
'stream_url', *keys, {parse_inner}, 'VCodec', {str}))
|
||||
|
||||
for stream in ('hls', 'rtmp'):
|
||||
stream_url = traverse_obj(live_info, ('stream_url', f'{stream}_pull_url', {url_or_none}))
|
||||
if stream_url:
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'ext': 'mp4' if stream == 'hls' else 'flv',
|
||||
'protocol': 'm3u8_native' if stream == 'hls' else 'https',
|
||||
'format_id': f'{stream}-pull',
|
||||
'vcodec': get_vcodec(f'{stream}_pull_url_params'),
|
||||
'quality': get_quality('ORIGION'),
|
||||
})
|
||||
|
||||
for f_id, f_url in traverse_obj(live_info, ('stream_url', 'flv_pull_url', {dict}), default={}).items():
|
||||
if not url_or_none(f_url):
|
||||
continue
|
||||
formats.append({
|
||||
'url': f_url,
|
||||
'ext': 'flv',
|
||||
'format_id': f'flv-{f_id}'.lower(),
|
||||
'vcodec': get_vcodec('flv_pull_url_params', f_id),
|
||||
'quality': get_quality(f_id),
|
||||
})
|
||||
|
||||
# If uploader is a guest on another's livestream, primary endpoint will not have m3u8 URLs
|
||||
if not traverse_obj(formats, lambda _, v: v['ext'] == 'mp4'):
|
||||
live_info = merge_dicts(live_info, self._call_api(
|
||||
'https://www.tiktok.com/api/live/detail/', 'roomID', room_id, uploader, key='LiveRoomInfo'))
|
||||
if url_or_none(live_info.get('liveUrl')):
|
||||
formats.append({
|
||||
'url': live_info['liveUrl'],
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
'format_id': 'hls-fallback',
|
||||
'vcodec': 'h264',
|
||||
'quality': get_quality('origin'),
|
||||
})
|
||||
|
||||
uploader = uploader or traverse_obj(live_info, ('ownerInfo', 'uniqueId'), ('owner', 'display_id'))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'title': live_info.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage, default=''),
|
||||
'uploader': uploader,
|
||||
'uploader_id': traverse_obj(live_info, ('ownerInfo', 'id')),
|
||||
'creator': traverse_obj(live_info, ('ownerInfo', 'nickname')),
|
||||
'concurrent_view_count': traverse_obj(live_info, ('liveRoomStats', 'userCount'), expected_type=int),
|
||||
'formats': self._extract_m3u8_formats(live_info['liveUrl'], room_id, 'mp4', live=True),
|
||||
'uploader_url': format_field(uploader, None, self._UPLOADER_URL_FORMAT) or None,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'_format_sort_fields': ('quality', 'ext'),
|
||||
**traverse_obj(live_info, {
|
||||
'title': 'title',
|
||||
'uploader_id': (('ownerInfo', 'owner'), 'id', {str_or_none}),
|
||||
'creator': (('ownerInfo', 'owner'), 'nickname'),
|
||||
'concurrent_view_count': (('user_count', ('liveRoomStats', 'userCount')), {int_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
UnsupportedError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
urljoin,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -22,25 +28,22 @@ def _perform_login(self, username, password):
|
|||
if self._API_HEADERS.get('Authorization'):
|
||||
return
|
||||
|
||||
user_check = self._download_json(
|
||||
headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
|
||||
user_check = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
|
||||
fatal=False, expected_status=400, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://triller.co',
|
||||
}, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
|
||||
if user_check.get('status'): # endpoint returns "status":false if username exists
|
||||
fatal=False, expected_status=400, headers=headers,
|
||||
data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
|
||||
|
||||
if user_check: # endpoint returns `"status":false` if username exists
|
||||
raise ExtractorError('Unable to login: Invalid username', expected=True)
|
||||
|
||||
credentials = {
|
||||
'username': username,
|
||||
'password': password,
|
||||
}
|
||||
login = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
|
||||
fatal=False, expected_status=400, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Origin': 'https://triller.co',
|
||||
}, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
|
||||
f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
|
||||
expected_status=400, headers=headers, data=json.dumps({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}, separators=(',', ':')).encode()) or {}
|
||||
|
||||
if not login.get('auth_token'):
|
||||
if login.get('error') == 1008:
|
||||
raise ExtractorError('Unable to login: Incorrect password', expected=True)
|
||||
|
@ -55,100 +58,100 @@ def _get_comments(self, video_id, limit=15):
|
|||
headers=self._API_HEADERS, query={'limit': limit}) or {}
|
||||
if not comment_info.get('comments'):
|
||||
return
|
||||
for comment_dict in comment_info['comments']:
|
||||
yield {
|
||||
'author': traverse_obj(comment_dict, ('author', 'username')),
|
||||
'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
|
||||
'id': comment_dict.get('id'),
|
||||
'text': comment_dict.get('body'),
|
||||
'timestamp': unified_timestamp(comment_dict.get('timestamp')),
|
||||
}
|
||||
yield from traverse_obj(comment_info, ('comments', ..., {
|
||||
'id': ('id', {str_or_none}),
|
||||
'text': 'body',
|
||||
'author': ('author', 'username'),
|
||||
'author_id': ('author', 'user_id'),
|
||||
'timestamp': ('timestamp', {unified_timestamp}),
|
||||
}))
|
||||
|
||||
def _check_user_info(self, user_info):
|
||||
if not user_info:
|
||||
self.report_warning('Unable to extract user info')
|
||||
elif user_info.get('private') and not user_info.get('followed_by_me'):
|
||||
if user_info.get('private') and not user_info.get('followed_by_me'):
|
||||
raise ExtractorError('This video is private', expected=True)
|
||||
elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
|
||||
raise ExtractorError('The author of the video is blocked', expected=True)
|
||||
return user_info
|
||||
|
||||
def _parse_video_info(self, video_info, username, user_info=None):
|
||||
video_uuid = video_info.get('video_uuid')
|
||||
video_id = video_info.get('id')
|
||||
def _parse_video_info(self, video_info, username, user_id, display_id=None):
|
||||
video_id = str(video_info['id'])
|
||||
display_id = display_id or video_info.get('video_uuid')
|
||||
|
||||
if traverse_obj(video_info, (
|
||||
None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
|
||||
{lambda x: re.search(r'/copyright/', x)}), get_all=False):
|
||||
self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
|
||||
|
||||
def format_info(url):
|
||||
return {
|
||||
'url': url,
|
||||
'ext': determine_ext(url),
|
||||
'format_id': url_basename(url).split('.')[0],
|
||||
}
|
||||
|
||||
formats = []
|
||||
video_url = traverse_obj(video_info, 'video_url', 'stream_url')
|
||||
if video_url:
|
||||
|
||||
if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
|
||||
for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': 'mp4',
|
||||
'vcodec': 'h264',
|
||||
'width': video_info.get('width'),
|
||||
'height': video_info.get('height'),
|
||||
'format_id': url_basename(video_url).split('.')[0],
|
||||
'filesize': video_info.get('filesize'),
|
||||
})
|
||||
video_set = video_info.get('video_set') or []
|
||||
for video in video_set:
|
||||
resolution = video.get('resolution') or ''
|
||||
formats.append({
|
||||
'url': video['url'],
|
||||
'ext': 'mp4',
|
||||
**format_info(video['url']),
|
||||
**parse_resolution(video.get('resolution')),
|
||||
'vcodec': video.get('codec'),
|
||||
'vbr': int_or_none(video.get('bitrate'), 1000),
|
||||
'width': int_or_none(resolution.split('x')[0]),
|
||||
'height': int_or_none(resolution.split('x')[1]),
|
||||
'format_id': url_basename(video['url']).split('.')[0],
|
||||
})
|
||||
audio_url = video_info.get('audio_url')
|
||||
if audio_url:
|
||||
|
||||
video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
|
||||
if video_url:
|
||||
formats.append({
|
||||
'url': audio_url,
|
||||
'ext': 'm4a',
|
||||
'format_id': url_basename(audio_url).split('.')[0],
|
||||
**format_info(video_url),
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(video_info, {
|
||||
'width': 'width',
|
||||
'height': 'height',
|
||||
'filesize': 'filesize',
|
||||
}, expected_type=int_or_none),
|
||||
})
|
||||
|
||||
manifest_url = video_info.get('transcoded_url')
|
||||
if manifest_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
audio_url = url_or_none(video_info.get('audio_url'))
|
||||
if audio_url:
|
||||
formats.append(format_info(audio_url))
|
||||
|
||||
comment_count = int_or_none(video_info.get('comment_count'))
|
||||
|
||||
user_info = user_info or traverse_obj(video_info, 'user', default={})
|
||||
comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': str_or_none(video_id) or video_uuid,
|
||||
'title': video_info.get('description') or f'Video by {username}',
|
||||
'thumbnail': video_info.get('thumbnail_url'),
|
||||
'description': video_info.get('description'),
|
||||
'uploader': str_or_none(username),
|
||||
'uploader_id': str_or_none(user_info.get('user_id')),
|
||||
'creator': str_or_none(user_info.get('name')),
|
||||
'timestamp': unified_timestamp(video_info.get('timestamp')),
|
||||
'upload_date': unified_strdate(video_info.get('timestamp')),
|
||||
'duration': int_or_none(video_info.get('duration')),
|
||||
'view_count': int_or_none(video_info.get('play_count')),
|
||||
'like_count': int_or_none(video_info.get('likes_count')),
|
||||
'artist': str_or_none(video_info.get('song_artist')),
|
||||
'track': str_or_none(video_info.get('song_title')),
|
||||
'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'uploader': username,
|
||||
'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
|
||||
'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
|
||||
'uploader_url': f'https://triller.co/@{username}',
|
||||
'extractor_key': TrillerIE.ie_key(),
|
||||
'extractor': TrillerIE.IE_NAME,
|
||||
'formats': formats,
|
||||
'comment_count': comment_count,
|
||||
'__post_extractor': self.extract_comments(video_id, comment_count),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
|
||||
'description': 'description',
|
||||
'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'timestamp': ('timestamp', {unified_timestamp}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'like_count': ('likes_count', {int_or_none}),
|
||||
'artist': 'song_artist',
|
||||
'track': 'song_title',
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class TrillerIE(TrillerBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?triller\.co/
|
||||
@(?P<username>[\w\._]+)/video/
|
||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
||||
@(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||
|
@ -165,16 +168,14 @@ class TrillerIE(TrillerBaseIE):
|
|||
'timestamp': 1660598222,
|
||||
'upload_date': '20220815',
|
||||
'duration': 47,
|
||||
'height': 3840,
|
||||
'width': 2160,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Megan Thee Stallion',
|
||||
'track': 'Her',
|
||||
'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||
'uploader_url': 'https://triller.co/@theestallion',
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
'skip': 'This video has been removed due to licensing restrictions',
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'md5': '874055f462af5b0699b9dbb527a505a0',
|
||||
|
@ -182,6 +183,7 @@ class TrillerIE(TrillerBaseIE):
|
|||
'id': '71621339',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||
'uploader': 'charlidamelio',
|
||||
|
@ -190,59 +192,75 @@ class TrillerIE(TrillerBaseIE):
|
|||
'timestamp': 1660773354,
|
||||
'upload_date': '20220817',
|
||||
'duration': 16,
|
||||
'height': 1920,
|
||||
'width': 1080,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Dixie',
|
||||
'track': 'Someone to Blame',
|
||||
'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||
'uploader_url': 'https://triller.co/@charlidamelio',
|
||||
'comment_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||
'md5': 'af7b3553e4b8bfca507636471ee2eb41',
|
||||
'info_dict': {
|
||||
'id': '71837829',
|
||||
'ext': 'mp4',
|
||||
'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop',
|
||||
'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
|
||||
'uploader': 'theestallion',
|
||||
'uploader_id': '18992236',
|
||||
'creator': 'Megan Thee Stallion',
|
||||
'timestamp': 1662486178,
|
||||
'upload_date': '20220906',
|
||||
'duration': 30,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Unknown',
|
||||
'track': 'Unknown',
|
||||
'uploader_url': 'https://triller.co/@theestallion',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, video_uuid = self._match_valid_url(url).group('username', 'id')
|
||||
username, display_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
video_info = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{video_uuid}',
|
||||
video_uuid, note='Downloading video info API JSON',
|
||||
errnote='Unable to download video info API JSON',
|
||||
headers=self._API_HEADERS), ('videos', 0))
|
||||
if not video_info:
|
||||
raise ExtractorError('No video info found in API response')
|
||||
video_info = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
|
||||
headers=self._API_HEADERS)['videos'][0]
|
||||
|
||||
user_info = self._check_user_info(video_info.get('user') or {})
|
||||
return self._parse_video_info(video_info, username, user_info)
|
||||
self._check_user_info(video_info.get('user') or {})
|
||||
|
||||
return self._parse_video_info(video_info, username, None, display_id)
|
||||
|
||||
|
||||
class TrillerUserIE(TrillerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
|
||||
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
# first videos request only returns 2 videos
|
||||
'url': 'https://triller.co/@theestallion',
|
||||
'playlist_mincount': 9,
|
||||
'playlist_mincount': 12,
|
||||
'info_dict': {
|
||||
'id': '18992236',
|
||||
'title': 'theestallion',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://triller.co/@charlidamelio',
|
||||
'playlist_mincount': 25,
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': '1875551',
|
||||
'title': 'charlidamelio',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._API_HEADERS.get('Authorization'):
|
||||
guest = self._download_json(
|
||||
f'{self._API_BASE_URL}/user/create_guest',
|
||||
None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
|
||||
f'{self._API_BASE_URL}/user/create_guest', None,
|
||||
note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
|
||||
'platform': 'Web',
|
||||
'app_version': '',
|
||||
})
|
||||
|
@ -251,44 +269,65 @@ def _real_initialize(self):
|
|||
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
|
||||
|
||||
def _extract_video_list(self, username, user_id, limit=6):
|
||||
query = {
|
||||
'limit': limit,
|
||||
}
|
||||
def _entries(self, username, user_id, limit=6):
|
||||
query = {'limit': limit}
|
||||
for page in itertools.count(1):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
video_list = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
||||
username, note=f'Downloading user video list page {page}',
|
||||
errnote='Unable to download user video list', headers=self._API_HEADERS,
|
||||
query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
if not video_list.get('videos'):
|
||||
break
|
||||
yield from video_list['videos']
|
||||
query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
|
||||
videos = self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
||||
username, note=f'Downloading user video list page {page}',
|
||||
headers=self._API_HEADERS, query=query)
|
||||
|
||||
for video in traverse_obj(videos, ('videos', ...)):
|
||||
yield self._parse_video_info(video, username, user_id)
|
||||
|
||||
query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
|
||||
if not query['before_time']:
|
||||
break
|
||||
|
||||
def _entries(self, videos, username, user_info):
|
||||
for video in videos:
|
||||
yield self._parse_video_info(video, username, user_info)
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
|
||||
user_info = self._check_user_info(self._download_json(
|
||||
f'{self._API_BASE_URL}/api/users/by_username/{username}',
|
||||
username, note='Downloading user info',
|
||||
errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))
|
||||
username, note='Downloading user info', headers=self._API_HEADERS)['user'])
|
||||
|
||||
user_id = str_or_none(user_info.get('user_id'))
|
||||
videos = self._extract_video_list(username, user_id)
|
||||
thumbnail = user_info.get('avatar_url')
|
||||
if not user_id:
|
||||
raise ExtractorError('Unable to extract user ID')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
|
||||
self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
|
||||
|
||||
|
||||
class TrillerShortIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://v.triller.co/WWZNWk',
|
||||
'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
|
||||
'info_dict': {
|
||||
'id': '66210052',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||
'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
|
||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||
'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||
'uploader': 'statefairent',
|
||||
'uploader_id': '487545193',
|
||||
'creator': 'Official Summer Fair of LA',
|
||||
'timestamp': 1629655457,
|
||||
'upload_date': '20210822',
|
||||
'duration': 19,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'artist': 'Unknown',
|
||||
'track': 'Unknown',
|
||||
'uploader_url': 'https://triller.co/@statefairent',
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).geturl()
|
||||
if self.suitable(real_url): # Prevent infinite loop in case redirect fails
|
||||
raise UnsupportedError(real_url)
|
||||
return self.url_result(real_url)
|
||||
|
|
|
@ -482,21 +482,34 @@ def _real_extract(self, url):
|
|||
class TVPVODBaseIE(InfoExtractor):
|
||||
_API_BASE_URL = 'https://vod.tvp.pl/api/products'
|
||||
|
||||
def _call_api(self, resource, video_id, **kwargs):
|
||||
return self._download_json(
|
||||
def _call_api(self, resource, video_id, query={}, **kwargs):
|
||||
is_valid = lambda x: 200 <= x < 300
|
||||
document, urlh = self._download_json_handle(
|
||||
f'{self._API_BASE_URL}/{resource}', video_id,
|
||||
query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs)
|
||||
query={'lang': 'pl', 'platform': 'BROWSER', **query},
|
||||
expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
|
||||
if is_valid(urlh.status):
|
||||
return document
|
||||
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
|
||||
|
||||
def _parse_video(self, video):
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'tvp:' + video['externalUid'],
|
||||
'ie_key': TVPEmbedIE.ie_key(),
|
||||
'title': video.get('title'),
|
||||
'description': traverse_obj(video, ('lead', 'description')),
|
||||
'age_limit': int_or_none(video.get('rating')),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
||||
def _parse_video(self, video, with_url=True):
|
||||
info_dict = traverse_obj(video, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': 'title',
|
||||
'age_limit': ('rating', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'series': ('season', 'serial', 'title', {str_or_none}),
|
||||
'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}),
|
||||
})
|
||||
info_dict['description'] = clean_html(dict_get(video, ('lead', 'description')))
|
||||
if with_url:
|
||||
info_dict.update({
|
||||
'_type': 'url',
|
||||
'url': video['webUrl'],
|
||||
'ie_key': TVPVODVideoIE.ie_key(),
|
||||
})
|
||||
return info_dict
|
||||
|
||||
|
||||
class TVPVODVideoIE(TVPVODBaseIE):
|
||||
|
@ -506,37 +519,70 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
|||
_TESTS = [{
|
||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||
'info_dict': {
|
||||
'id': '60468609',
|
||||
'id': '311357',
|
||||
'ext': 'mp4',
|
||||
'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
|
||||
'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
|
||||
'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
|
||||
'duration': 300,
|
||||
'episode_number': 24,
|
||||
'episode': 'Episode 24',
|
||||
'age_limit': 0,
|
||||
'series': 'Laboratorium alchemika',
|
||||
'thumbnail': 're:https://.+',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
|
||||
'info_dict': {
|
||||
'id': '51640077',
|
||||
'id': '339667',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu',
|
||||
'series': 'Ukraiński sługa narodu',
|
||||
'title': 'Ukraiński sługa narodu',
|
||||
'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
|
||||
'age_limit': 12,
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'duration': 3051,
|
||||
'thumbnail': 're:https://.+',
|
||||
'thumbnail': 're:https?://.+',
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'embed fails with "payment required"',
|
||||
'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869',
|
||||
'info_dict': {
|
||||
'id': '398869',
|
||||
'ext': 'mp4',
|
||||
'title': 'odc. 7',
|
||||
'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0',
|
||||
'duration': 2750,
|
||||
'age_limit': 16,
|
||||
'series': 'Polowanie na ćmy',
|
||||
'episode_number': 7,
|
||||
'episode': 'Episode 7',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._parse_video(self._call_api(f'vods/{video_id}', video_id))
|
||||
info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False)
|
||||
|
||||
playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
|
||||
|
||||
info_dict['formats'] = []
|
||||
for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')):
|
||||
info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False))
|
||||
for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')):
|
||||
info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False))
|
||||
|
||||
info_dict['subtitles'] = {}
|
||||
for sub in playlist.get('subtitles') or []:
|
||||
info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({
|
||||
'url': sub['url'],
|
||||
'ext': 'ttml',
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class TVPVODSeriesIE(TVPVODBaseIE):
|
||||
|
@ -551,7 +597,7 @@ class TVPVODSeriesIE(TVPVODBaseIE):
|
|||
'age_limit': 12,
|
||||
'categories': ['seriale'],
|
||||
},
|
||||
'playlist_count': 129,
|
||||
'playlist_count': 130,
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -179,6 +179,14 @@ def _download_access_token(self, video_id, token_kind, param_name):
|
|||
video_id, ops,
|
||||
'Downloading %s access token GraphQL' % token_kind)['data'][method]
|
||||
|
||||
def _get_thumbnails(self, thumbnail):
|
||||
return [{
|
||||
'url': re.sub(r'\d+x\d+(\.\w+)($|(?=[?#]))', r'0x0\g<1>', thumbnail),
|
||||
'preference': 1,
|
||||
}, {
|
||||
'url': thumbnail,
|
||||
}] if thumbnail else None
|
||||
|
||||
|
||||
class TwitchVodIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:vod'
|
||||
|
@ -460,15 +468,13 @@ def _extract_info_gql(self, info, item_id):
|
|||
is_live, thumbnail = True, None
|
||||
else:
|
||||
is_live = False
|
||||
for p in ('width', 'height'):
|
||||
thumbnail = thumbnail.replace('{%s}' % p, '0')
|
||||
|
||||
return {
|
||||
'id': vod_id,
|
||||
'title': info.get('title') or 'Untitled Broadcast',
|
||||
'description': info.get('description'),
|
||||
'duration': int_or_none(info.get('lengthSeconds')),
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': self._get_thumbnails(thumbnail),
|
||||
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
|
||||
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
|
||||
'timestamp': unified_timestamp(info.get('publishedAt')),
|
||||
|
@ -1053,7 +1059,7 @@ def _real_extract(self, url):
|
|||
'display_id': channel_name,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'thumbnails': self._get_thumbnails(thumbnail),
|
||||
'uploader': uploader,
|
||||
'uploader_id': channel_name,
|
||||
'timestamp': timestamp,
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||
|
@ -17,6 +16,7 @@
|
|||
format_field,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
|
@ -32,11 +32,9 @@
|
|||
class TwitterBaseIE(InfoExtractor):
|
||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||
_TOKENS = {
|
||||
'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
|
||||
'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
|
||||
}
|
||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
||||
_guest_token = None
|
||||
|
||||
def _extract_variant_formats(self, variant, video_id):
|
||||
variant_url = variant.get('url')
|
||||
|
@ -94,7 +92,7 @@ def is_logged_in(self):
|
|||
|
||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||
cookies = self._get_cookies(self._API_BASE)
|
||||
headers = {}
|
||||
headers = self._AUTH.copy()
|
||||
|
||||
csrf_cookie = cookies.get('ct0')
|
||||
if csrf_cookie:
|
||||
|
@ -107,54 +105,34 @@ def _call_api(self, path, video_id, query={}, graphql=False):
|
|||
'x-twitter-active-user': 'yes',
|
||||
})
|
||||
|
||||
last_error = None
|
||||
for bearer_token in self._TOKENS:
|
||||
for first_attempt in (True, False):
|
||||
headers['Authorization'] = f'Bearer {bearer_token}'
|
||||
for first_attempt in (True, False):
|
||||
if not self.is_logged_in and not self._guest_token:
|
||||
headers.pop('x-guest-token', None)
|
||||
self._guest_token = traverse_obj(self._download_json(
|
||||
f'{self._API_BASE}guest/activate.json', video_id,
|
||||
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
||||
if self._guest_token:
|
||||
headers['x-guest-token'] = self._guest_token
|
||||
elif not self.is_logged_in:
|
||||
raise ExtractorError('Could not retrieve guest token')
|
||||
|
||||
if not self.is_logged_in:
|
||||
if not self._TOKENS[bearer_token]:
|
||||
headers.pop('x-guest-token', None)
|
||||
guest_token_response = self._download_json(
|
||||
self._API_BASE + 'guest/activate.json', video_id,
|
||||
'Downloading guest token', data=b'', headers=headers)
|
||||
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
||||
result = self._download_json(
|
||||
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
||||
video_id, headers=headers, query=query, expected_status=allowed_status,
|
||||
note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
|
||||
|
||||
self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
|
||||
if not self._TOKENS[bearer_token]:
|
||||
raise ExtractorError('Could not retrieve guest token')
|
||||
if result.get('errors'):
|
||||
errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
|
||||
if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
|
||||
self.to_screen('Guest token has expired. Refreshing guest token')
|
||||
self._guest_token = None
|
||||
continue
|
||||
|
||||
headers['x-guest-token'] = self._TOKENS[bearer_token]
|
||||
raise ExtractorError(
|
||||
f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
|
||||
|
||||
try:
|
||||
allowed_status = {400, 403, 404} if graphql else {403}
|
||||
result = self._download_json(
|
||||
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
||||
video_id, headers=headers, query=query, expected_status=allowed_status)
|
||||
|
||||
except ExtractorError as e:
|
||||
if last_error:
|
||||
raise last_error
|
||||
|
||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
|
||||
raise
|
||||
|
||||
last_error = e
|
||||
self.report_warning(
|
||||
'Twitter API gave 404 response, retrying with deprecated auth token. '
|
||||
'Only one media item can be extracted')
|
||||
break # continue outer loop with next bearer_token
|
||||
|
||||
if result.get('errors'):
|
||||
errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
|
||||
if first_attempt and any('bad guest token' in error.lower() for error in errors):
|
||||
self.to_screen('Guest token has expired. Refreshing guest token')
|
||||
self._TOKENS[bearer_token] = None
|
||||
continue
|
||||
|
||||
error_message = ', '.join(set(errors)) or 'Unknown error'
|
||||
raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
|
||||
|
||||
return result
|
||||
return result
|
||||
|
||||
def _build_graphql_query(self, media_id):
|
||||
raise NotImplementedError('Method must be implemented to support GraphQL')
|
||||
|
@ -313,6 +291,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': [],
|
||||
'age_limit': 18,
|
||||
},
|
||||
|
@ -391,6 +370,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': ['Damndaniel'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -431,6 +411,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -480,6 +461,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': ['Maria'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -505,6 +487,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -529,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -589,6 +573,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': [],
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
@ -630,12 +615,12 @@ class TwitterIE(TwitterBaseIE):
|
|||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'tags': ['HurricaneIan'],
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
# Adult content, uses old token
|
||||
# Fails if not logged in (GraphQL)
|
||||
# Adult content, fails if not logged in (GraphQL)
|
||||
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
||||
'info_dict': {
|
||||
'id': '1575199163847000068',
|
||||
|
@ -655,9 +640,8 @@ class TwitterIE(TwitterBaseIE):
|
|||
'age_limit': 18,
|
||||
'tags': []
|
||||
},
|
||||
'expected_warnings': ['404'],
|
||||
'skip': 'Requires authentication',
|
||||
}, {
|
||||
# Description is missing one https://t.co url (GraphQL)
|
||||
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
|
@ -669,14 +653,13 @@ class TwitterIE(TwitterBaseIE):
|
|||
'upload_date': '20210519',
|
||||
'age_limit': 0,
|
||||
'repost_count': int,
|
||||
'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
|
||||
'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
|
||||
'uploader_id': 'Srirachachau',
|
||||
'comment_count': int,
|
||||
'uploader_url': 'https://twitter.com/Srirachachau',
|
||||
'timestamp': 1621447860,
|
||||
},
|
||||
}, {
|
||||
# Description is missing one https://t.co url (GraphQL)
|
||||
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
||||
'playlist_mincount': 2,
|
||||
'info_dict': {
|
||||
|
@ -688,7 +671,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'uploader': str,
|
||||
'timestamp': 1665143744,
|
||||
'uploader_url': 'https://twitter.com/DavidToons_',
|
||||
'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
|
||||
'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
|
||||
'tags': [],
|
||||
'comment_count': int,
|
||||
'upload_date': '20221007',
|
||||
|
@ -752,7 +735,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'info_dict': {
|
||||
'id': '1600649511827013632',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
|
||||
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1670459604.0,
|
||||
'uploader_id': 'CTVJLaidlaw',
|
||||
|
@ -764,6 +747,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||
'display_id': '1600649710662213632',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||
'upload_date': '20221208',
|
||||
'age_limit': 0,
|
||||
|
@ -791,6 +775,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||
|
@ -806,6 +791,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'repost_count': int,
|
||||
'duration': 9.531,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'upload_date': '20221203',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1670092210.0,
|
||||
|
@ -815,7 +801,6 @@ class TwitterIE(TwitterBaseIE):
|
|||
},
|
||||
'params': {'noplaylist': True},
|
||||
}, {
|
||||
# Media view count is GraphQL only, force in test
|
||||
'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
|
||||
'info_dict': {
|
||||
'id': '1600009362759733248',
|
||||
|
@ -826,10 +811,10 @@ class TwitterIE(TwitterBaseIE):
|
|||
'view_count': int,
|
||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||
'age_limit': 0,
|
||||
'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
|
||||
'uploader': 'Mün The Shinobi',
|
||||
'repost_count': int,
|
||||
'upload_date': '20221206',
|
||||
'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': [],
|
||||
|
@ -837,9 +822,8 @@ class TwitterIE(TwitterBaseIE):
|
|||
'duration': 139.987,
|
||||
'timestamp': 1670306984.0,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
|
||||
}, {
|
||||
# url to retweet id
|
||||
# url to retweet id, legacy API
|
||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||
'info_dict': {
|
||||
'id': '1623274794488659969',
|
||||
|
@ -860,6 +844,7 @@ class TwitterIE(TwitterBaseIE):
|
|||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
|
||||
}, {
|
||||
# onion route
|
||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||
|
@ -905,11 +890,13 @@ def _graphql_to_legacy(self, data, twid):
|
|||
'tweet_results', 'result', ('tweet', None),
|
||||
), expected_type=dict, default={}, get_all=False)
|
||||
|
||||
if result.get('__typename') not in ('Tweet', None):
|
||||
if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
|
||||
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
|
||||
|
||||
if 'tombstone' in result:
|
||||
cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
|
||||
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
||||
if cause and 'adult content' in cause:
|
||||
self.raise_login_required(cause)
|
||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||
|
||||
status = result.get('legacy', {})
|
||||
|
@ -922,7 +909,7 @@ def _graphql_to_legacy(self, data, twid):
|
|||
# extra transformation is needed since result does not match legacy format
|
||||
binding_values = {
|
||||
binding_value.get('key'): binding_value.get('value')
|
||||
for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
|
||||
for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
|
||||
}
|
||||
if binding_values:
|
||||
status['card']['binding_values'] = binding_values
|
||||
|
@ -965,12 +952,7 @@ def _build_graphql_query(self, media_id):
|
|||
|
||||
def _real_extract(self, url):
|
||||
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
||||
if self.is_logged_in or self._configuration_arg('force_graphql'):
|
||||
self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
|
||||
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
||||
status = self._graphql_to_legacy(result, twid)
|
||||
|
||||
else:
|
||||
if self._configuration_arg('legacy_api') and not self.is_logged_in:
|
||||
status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||
'cards_platform': 'Web-12',
|
||||
'include_cards': 1,
|
||||
|
@ -978,6 +960,9 @@ def _real_extract(self, url):
|
|||
'include_user_entities': 0,
|
||||
'tweet_mode': 'extended',
|
||||
}), 'retweeted_status', None)
|
||||
else:
|
||||
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
||||
status = self._graphql_to_legacy(result, twid)
|
||||
|
||||
title = description = status['full_text'].replace('\n', ' ')
|
||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||
|
@ -1142,7 +1127,8 @@ def get_binding_value(k):
|
|||
if not entries:
|
||||
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
||||
if not expanded_url or expanded_url == url:
|
||||
raise ExtractorError('No video could be found in this tweet', expected=True)
|
||||
self.raise_no_formats('No video could be found in this tweet', expected=True)
|
||||
return info
|
||||
|
||||
return self.url_result(expanded_url, display_id=twid, **info)
|
||||
|
||||
|
|
108
yt_dlp/extractor/wevidi.py
Normal file
108
yt_dlp/extractor/wevidi.py
Normal file
|
@ -0,0 +1,108 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, float_or_none, get_element_by_class, js_to_json, traverse_obj
|
||||
|
||||
|
||||
class WeVidiIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wevidi\.net/watch/(?P<id>[\w-]{11})'
|
||||
_TESTS = [{
|
||||
'url': 'https://wevidi.net/watch/2th7UO5F4KV',
|
||||
'md5': 'b913d1ff5bbad499e2c7ef4aa6d829d7',
|
||||
'info_dict': {
|
||||
'id': '2th7UO5F4KV',
|
||||
'ext': 'mp4',
|
||||
'title': 'YouTube Alternative: WeVidi - customizable channels & more',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:73a27d0a87d49fbcc5584566326ebeed',
|
||||
'uploader': 'eclecRC',
|
||||
'duration': 932.098,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/ievRuuQHbPS',
|
||||
'md5': 'ce8a94989a959bff9003fa27ee572935',
|
||||
'info_dict': {
|
||||
'id': 'ievRuuQHbPS',
|
||||
'ext': 'mp4',
|
||||
'title': 'WeVidi Playlists',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee',
|
||||
'uploader': 'WeVidi',
|
||||
'duration': 36.1999,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/PcMzDWaQSWb',
|
||||
'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec',
|
||||
'info_dict': {
|
||||
'id': 'PcMzDWaQSWb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cat blep',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f',
|
||||
'uploader': 'WeVidi',
|
||||
'duration': 41.972,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/wJnRqDHNe_u',
|
||||
'md5': 'c8f263dd47e66cc17546b3abf47b5a77',
|
||||
'info_dict': {
|
||||
'id': 'wJnRqDHNe_u',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gissy Talks: YouTube Alternatives',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e65036f0d4af80e0af191bd11af5195e',
|
||||
'uploader': 'GissyEva',
|
||||
'duration': 630.451,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://wevidi.net/watch/4m1c4yJR_yc',
|
||||
'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed',
|
||||
'info_dict': {
|
||||
'id': '4m1c4yJR_yc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Enough of that! - Awesome Exilez Podcast',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:96af99dd63468b2dfab3020560e3e9b2',
|
||||
'uploader': 'eclecRC',
|
||||
'duration': 6.804,
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_formats(self, wvplayer_props):
|
||||
# Taken from WeVidi player JS: https://wevidi.net/layouts/default/static/player.min.js
|
||||
resolution_map = {
|
||||
1: 144,
|
||||
2: 240,
|
||||
3: 360,
|
||||
4: 480,
|
||||
5: 720,
|
||||
6: 1080
|
||||
}
|
||||
|
||||
src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
|
||||
for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
|
||||
format_id = str(-(res // -2) - 1)
|
||||
yield {
|
||||
'acodec': 'mp4a.40.2',
|
||||
'ext': 'mp4',
|
||||
'format_id': format_id,
|
||||
'height': resolution_map.get(res),
|
||||
'url': f'https://www.wevidi.net/videoplayback/{src_path}/{format_id}',
|
||||
'vcodec': 'avc1.42E01E',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
wvplayer_props = self._search_json(
|
||||
r'WVPlayer\(', webpage, 'player', video_id,
|
||||
transform_source=lambda x: js_to_json(x.replace('||', '}')))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clean_html(get_element_by_class('video_title', webpage)),
|
||||
'description': clean_html(get_element_by_class('descr_long', webpage)),
|
||||
'uploader': clean_html(get_element_by_class('username', webpage)),
|
||||
'formats': list(self._extract_formats(wvplayer_props)),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'duration': float_or_none(wvplayer_props.get('duration')),
|
||||
}
|
50
yt_dlp/extractor/whyp.py
Normal file
50
yt_dlp/extractor/whyp.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class WhypIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
||||
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
||||
'info_dict': {
|
||||
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
||||
'id': '18337',
|
||||
'title': 'Home Page Example Track',
|
||||
'description': 'md5:bd758000fb93f3159339c852b5b9133c',
|
||||
'ext': 'mp3',
|
||||
'duration': 52.82,
|
||||
'uploader': 'Brad',
|
||||
'uploader_id': '1',
|
||||
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.whyp.it/tracks/18337',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
unique_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, unique_id)
|
||||
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
||||
|
||||
return {
|
||||
'url': data['audio_url'],
|
||||
'id': unique_id,
|
||||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'uploader': ('user', 'username'),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
}),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'http_headers': {'Referer': 'https://whyp.it/'},
|
||||
}
|
|
@ -2,7 +2,6 @@
|
|||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
|
@ -11,7 +10,6 @@
|
|||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
|
@ -337,121 +335,6 @@ def _search_results(self, query):
|
|||
break
|
||||
|
||||
|
||||
class YahooGyaOPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:gyao:player'
|
||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
|
||||
'info_dict': {
|
||||
'id': '5993125228001',
|
||||
'ext': 'mp4',
|
||||
'title': 'フューリー 【字幕版】',
|
||||
'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
|
||||
'uploader_id': '4235717419001',
|
||||
'upload_date': '20190124',
|
||||
'timestamp': 1548294365,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/episode/5fa1226c-ef8d-4e93-af7a-fd92f4e30597',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url).replace('/', ':')
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
resp = self._download_json(
|
||||
'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
|
||||
'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
|
||||
'query': '''{
|
||||
content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
|
||||
video {
|
||||
delivery {
|
||||
id
|
||||
}
|
||||
title
|
||||
}
|
||||
}
|
||||
}''' % video_id,
|
||||
}, headers=headers)
|
||||
content = resp['data']['content']
|
||||
if not content:
|
||||
msg = resp['errors'][0]['message']
|
||||
if msg == 'not in japan':
|
||||
self.raise_geo_restricted(countries=['JP'])
|
||||
raise ExtractorError(msg)
|
||||
video = content['video']
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': video['title'],
|
||||
'url': smuggle_url(
|
||||
'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
|
||||
{'geo_countries': ['JP']}),
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class YahooGyaOIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:gyao'
|
||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://gyao.yahoo.co.jp/title/%E3%82%BF%E3%82%A4%E3%83%A0%E3%83%9C%E3%82%AB%E3%83%B3%E3%82%B7%E3%83%AA%E3%83%BC%E3%82%BA%20%E3%83%A4%E3%83%83%E3%82%BF%E3%83%BC%E3%83%9E%E3%83%B3/5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
|
||||
'info_dict': {
|
||||
'id': '5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, program_id):
|
||||
page = 1
|
||||
while True:
|
||||
playlist = self._download_json(
|
||||
f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
|
||||
note=f'Downloading JSON metadata page {page}')
|
||||
if not playlist:
|
||||
break
|
||||
for video in playlist['videos']:
|
||||
video_id = video.get('id')
|
||||
if not video_id:
|
||||
continue
|
||||
if video.get('streamingAvailability') == 'notYet':
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
|
||||
YahooGyaOPlayerIE.ie_key(), video_id)
|
||||
if playlist.get('ended'):
|
||||
break
|
||||
page += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url).replace('/', ':')
|
||||
return self.playlist_result(self._entries(program_id), program_id)
|
||||
|
||||
|
||||
class YahooJapanNewsIE(InfoExtractor):
|
||||
IE_NAME = 'yahoo:japannews'
|
||||
IE_DESC = 'Yahoo! Japan News'
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
js_to_json,
|
||||
str_or_none,
|
||||
|
@ -26,48 +27,8 @@ class YoukuIE(InfoExtractor):
|
|||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# MD5 is unstable
|
||||
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
||||
'info_dict': {
|
||||
'id': 'XMTc1ODE5Njcy',
|
||||
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
||||
'ext': 'mp4',
|
||||
'duration': 74.73,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': '。躲猫猫、',
|
||||
'uploader_id': '36017967',
|
||||
'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
|
||||
'tags': list,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
||||
'info_dict': {
|
||||
'id': 'XODgxNjg1Mzk2',
|
||||
'ext': 'mp4',
|
||||
'title': '武媚娘传奇 85',
|
||||
'duration': 1999.61,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': '疯狂豆花',
|
||||
'uploader_id': '62583473',
|
||||
'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
||||
'info_dict': {
|
||||
'id': 'XMTI1OTczNDM5Mg',
|
||||
'ext': 'mp4',
|
||||
'title': '花千骨 04',
|
||||
'duration': 2363,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': '放剧场-花千骨',
|
||||
'uploader_id': '772849359',
|
||||
'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||
'note': 'Video protected with password',
|
||||
|
@ -81,6 +42,7 @@ class YoukuIE(InfoExtractor):
|
|||
'uploader_id': '322014285',
|
||||
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
|
||||
'tags': list,
|
||||
'skip': '404',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '100600',
|
||||
|
@ -192,7 +154,7 @@ def _real_extract(self, url):
|
|||
else:
|
||||
msg = 'Youku server reported error %i' % error.get('code')
|
||||
if error_note is not None:
|
||||
msg += ': ' + error_note
|
||||
msg += ': ' + clean_html(error_note)
|
||||
raise ExtractorError(msg)
|
||||
|
||||
# get video title
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
int_or_none,
|
||||
merge_dicts,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
@ -86,32 +87,31 @@ class YouPornIE(InfoExtractor):
|
|||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
definitions = self._download_json(
|
||||
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
|
||||
display_id)
|
||||
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
|
||||
|
||||
def get_format_data(data, f):
|
||||
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
|
||||
|
||||
formats = []
|
||||
for definition in definitions:
|
||||
if not isinstance(definition, dict):
|
||||
continue
|
||||
video_url = url_or_none(definition.get('videoUrl'))
|
||||
if not video_url:
|
||||
continue
|
||||
f = {
|
||||
'url': video_url,
|
||||
'filesize': int_or_none(definition.get('videoSize')),
|
||||
}
|
||||
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
|
||||
for hls_url in traverse_obj(get_format_data(definitions, 'hls'), (
|
||||
lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'), (..., 'videoUrl')):
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||
|
||||
for definition in get_format_data(definitions, 'mp4'):
|
||||
f = traverse_obj(definition, {
|
||||
'url': 'videoUrl',
|
||||
'filesize': ('videoSize', {int_or_none})
|
||||
})
|
||||
height = int_or_none(definition.get('quality'))
|
||||
# Video URL's path looks like this:
|
||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||
# We will benefit from it by extracting some metadata
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', definition['videoUrl'])
|
||||
if mobj:
|
||||
if not height:
|
||||
height = int(mobj.group('height'))
|
||||
|
@ -179,6 +179,7 @@ def extract_tag_box(regex, title):
|
|||
'tags')
|
||||
|
||||
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
|
||||
data.pop('url', None)
|
||||
return merge_dicts(data, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,6 +5,7 @@
|
|||
str_or_none,
|
||||
js_to_json,
|
||||
parse_filesize,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -12,8 +13,8 @@
|
|||
|
||||
class ZoomIE(InfoExtractor):
|
||||
IE_NAME = 'zoom'
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[A-Za-z0-9_.-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
|
||||
'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
|
||||
'info_dict': {
|
||||
|
@ -22,36 +23,73 @@ class ZoomIE(InfoExtractor):
|
|||
'title': 'China\'s "two sessions" and the new five-year plan',
|
||||
},
|
||||
'skip': 'Recording requires email authentication to access',
|
||||
}
|
||||
}, {
|
||||
# play URL
|
||||
'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
|
||||
'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
|
||||
'info_dict': {
|
||||
'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
|
||||
},
|
||||
}, {
|
||||
# share URL
|
||||
'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
|
||||
'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
|
||||
'info_dict': {
|
||||
'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, play_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, play_id)
|
||||
def _get_page_data(self, webpage, video_id):
|
||||
return self._search_json(
|
||||
r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
|
||||
|
||||
def _get_real_webpage(self, url, base_url, video_id, url_type):
|
||||
webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
|
||||
try:
|
||||
form = self._form_hidden_inputs('password_form', webpage)
|
||||
except ExtractorError:
|
||||
form = None
|
||||
if form:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This video is protected by a passcode, use the --video-password option', expected=True)
|
||||
is_meeting = form.get('useWhichPasswd') == 'meeting'
|
||||
validation = self._download_json(
|
||||
base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
|
||||
play_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
|
||||
'id': form[('meet' if is_meeting else 'file') + 'Id'],
|
||||
'passwd': password,
|
||||
'action': form.get('action'),
|
||||
}))
|
||||
if not validation.get('status'):
|
||||
raise ExtractorError(validation['errorMessage'], expected=True)
|
||||
webpage = self._download_webpage(url, play_id)
|
||||
return webpage
|
||||
|
||||
data = self._parse_json(self._search_regex(
|
||||
r'(?s)window\.__data__\s*=\s*({.+?});',
|
||||
webpage, 'data'), play_id, js_to_json)
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This video is protected by a passcode, use the --video-password option', expected=True)
|
||||
is_meeting = form.get('useWhichPasswd') == 'meeting'
|
||||
validation = self._download_json(
|
||||
base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
|
||||
video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
|
||||
'id': form[('meet' if is_meeting else 'file') + 'Id'],
|
||||
'passwd': password,
|
||||
'action': form.get('action'),
|
||||
}))
|
||||
if not validation.get('status'):
|
||||
raise ExtractorError(validation['errorMessage'], expected=True)
|
||||
return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
|
||||
|
||||
def _real_extract(self, url):
|
||||
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
|
||||
|
||||
if url_type == 'share':
|
||||
webpage = self._get_real_webpage(url, base_url, video_id, 'share')
|
||||
meeting_id = self._get_page_data(webpage, video_id)['meetingId']
|
||||
redirect_path = self._download_json(
|
||||
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
|
||||
video_id, note='Downloading share info JSON')['result']['redirectUrl']
|
||||
url = urljoin(base_url, redirect_path)
|
||||
|
||||
webpage = self._get_real_webpage(url, base_url, video_id, 'play')
|
||||
file_id = self._get_page_data(webpage, video_id)['fileId']
|
||||
if not file_id:
|
||||
# When things go wrong, file_id can be empty string
|
||||
raise ExtractorError('Unable to extract file ID')
|
||||
|
||||
data = self._download_json(
|
||||
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
|
||||
note='Downloading play info JSON')['result']
|
||||
|
||||
subtitles = {}
|
||||
for _type in ('transcript', 'cc', 'chapter'):
|
||||
|
@ -67,11 +105,11 @@ def _real_extract(self, url):
|
|||
formats.append({
|
||||
'format_note': 'Camera stream',
|
||||
'url': str_or_none(data.get('viewMp4Url')),
|
||||
'width': int_or_none(data.get('viewResolvtionsWidth')),
|
||||
'height': int_or_none(data.get('viewResolvtionsHeight')),
|
||||
'format_id': str_or_none(data.get('recordingId')),
|
||||
'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
|
||||
'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
|
||||
'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
|
||||
'ext': 'mp4',
|
||||
'filesize_approx': parse_filesize(data.get('fileSize')),
|
||||
'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
|
||||
'preference': 0
|
||||
})
|
||||
|
||||
|
@ -79,16 +117,16 @@ def _real_extract(self, url):
|
|||
formats.append({
|
||||
'format_note': 'Screen share stream',
|
||||
'url': str_or_none(data.get('shareMp4Url')),
|
||||
'width': int_or_none(data.get('shareResolvtionsWidth')),
|
||||
'height': int_or_none(data.get('shareResolvtionsHeight')),
|
||||
'format_id': str_or_none(data.get('shareVideoId')),
|
||||
'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
|
||||
'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
|
||||
'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
|
||||
'ext': 'mp4',
|
||||
'preference': -1
|
||||
})
|
||||
|
||||
return {
|
||||
'id': play_id,
|
||||
'title': data.get('topic'),
|
||||
'id': video_id,
|
||||
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
'http_headers': {
|
||||
|
|
|
@ -243,7 +243,7 @@ def _separate(expr, delim=',', max_split=None):
|
|||
return
|
||||
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
||||
in_quote, escaping, after_op, in_regex_char_group, in_unary_op = None, False, True, False, False
|
||||
for idx, char in enumerate(expr):
|
||||
if not in_quote and char in _MATCHING_PARENS:
|
||||
counters[_MATCHING_PARENS[char]] += 1
|
||||
|
@ -258,9 +258,11 @@ def _separate(expr, delim=',', max_split=None):
|
|||
elif in_quote == '/' and char in '[]':
|
||||
in_regex_char_group = char == '['
|
||||
escaping = not escaping and in_quote and char == '\\'
|
||||
after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
|
||||
in_unary_op = (not in_quote and not in_regex_char_group
|
||||
and after_op not in (True, False) and char in '-+')
|
||||
after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
|
||||
|
||||
if char != delim[pos] or any(counters.values()) or in_quote:
|
||||
if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
|
||||
pos = 0
|
||||
continue
|
||||
elif pos != delim_len:
|
||||
|
|
|
@ -243,7 +243,7 @@ def _dict_from_options_callback(
|
|||
if multiple_keys:
|
||||
allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
|
||||
mobj = re.match(
|
||||
fr'(?i)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
|
||||
fr'(?is)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
|
||||
value[0] if multiple_args else value)
|
||||
if mobj is not None:
|
||||
keys, val = mobj.group('keys').split(','), mobj.group('val')
|
||||
|
@ -526,22 +526,27 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
'--cn-verification-proxy',
|
||||
dest='cn_verification_proxy', default=None, metavar='URL',
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
geo.add_option(
|
||||
'--xff', metavar='VALUE',
|
||||
dest='geo_bypass', default="default",
|
||||
help=(
|
||||
'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
|
||||
'One of "default" (Only when known to be useful), "never", '
|
||||
'a two-letter ISO 3166-2 country code, or an IP block in CIDR notation'))
|
||||
geo.add_option(
|
||||
'--geo-bypass',
|
||||
action='store_true', dest='geo_bypass', default=True,
|
||||
help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (default)')
|
||||
action='store_const', dest='geo_bypass', const='default',
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
geo.add_option(
|
||||
'--no-geo-bypass',
|
||||
action='store_false', dest='geo_bypass',
|
||||
help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
|
||||
action='store_const', dest='geo_bypass', const='never',
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
geo.add_option(
|
||||
'--geo-bypass-country', metavar='CODE',
|
||||
dest='geo_bypass_country', default=None,
|
||||
help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
|
||||
'--geo-bypass-country', metavar='CODE', dest='geo_bypass',
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
geo.add_option(
|
||||
'--geo-bypass-ip-block', metavar='IP_BLOCK',
|
||||
dest='geo_bypass_ip_block', default=None,
|
||||
help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
|
||||
'--geo-bypass-ip-block', metavar='IP_BLOCK', dest='geo_bypass',
|
||||
help=optparse.SUPPRESS_HELP)
|
||||
|
||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||
selection.add_option(
|
||||
|
@ -1086,8 +1091,12 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
|
||||
verbosity.add_option(
|
||||
'-q', '--quiet',
|
||||
action='store_true', dest='quiet', default=False,
|
||||
action='store_true', dest='quiet', default=None,
|
||||
help='Activate quiet mode. If used with --verbose, print the log to stderr')
|
||||
verbosity.add_option(
|
||||
'--no-quiet',
|
||||
action='store_false', dest='quiet',
|
||||
help='Deactivate quiet mode. (Default)')
|
||||
verbosity.add_option(
|
||||
'--no-warnings',
|
||||
dest='no_warnings', action='store_true', default=False,
|
||||
|
|
|
@ -107,7 +107,7 @@ def run(self, info):
|
|||
options.extend(['-map', '-0:%d' % old_stream])
|
||||
new_stream -= 1
|
||||
options.extend([
|
||||
'-attach', thumbnail_filename,
|
||||
'-attach', self._ffmpeg_filename_argument(thumbnail_filename),
|
||||
'-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
|
||||
'-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
|
||||
|
||||
|
|
|
@ -809,7 +809,7 @@ def _get_infojson_opts(self, info, infofn):
|
|||
new_stream -= 1
|
||||
|
||||
yield (
|
||||
'-attach', infofn,
|
||||
'-attach', self._ffmpeg_filename_argument(infofn),
|
||||
f'-metadata:s:{new_stream}', 'mimetype=application/json',
|
||||
f'-metadata:s:{new_stream}', 'filename=info.json',
|
||||
)
|
||||
|
@ -898,8 +898,11 @@ def _needs_fixup(self, info):
|
|||
@PostProcessor._restrict_to(images=False)
|
||||
def run(self, info):
|
||||
if all(self._needs_fixup(info)):
|
||||
args = ['-f', 'mp4']
|
||||
if self.get_audio_codec(info['filepath']) == 'aac':
|
||||
args.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||
self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
|
||||
*self.stream_copy_opts(), '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
|
||||
*self.stream_copy_opts(), *args])
|
||||
return [], info
|
||||
|
||||
|
||||
|
|
|
@ -2187,10 +2187,11 @@ def _lock_file(f, exclusive, block):
|
|||
fcntl.lockf(f, flags)
|
||||
|
||||
def _unlock_file(f):
|
||||
try:
|
||||
fcntl.flock(f, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
fcntl.lockf(f, fcntl.LOCK_UN)
|
||||
with contextlib.suppress(OSError):
|
||||
return fcntl.flock(f, fcntl.LOCK_UN)
|
||||
with contextlib.suppress(OSError):
|
||||
return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
|
||||
return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
|
||||
|
||||
except ImportError:
|
||||
|
||||
|
@ -3278,8 +3279,14 @@ def multipart_encode(data, boundary=None):
|
|||
return out, content_type
|
||||
|
||||
|
||||
def variadic(x, allowed_types=(str, bytes, dict)):
|
||||
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
|
||||
def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
|
||||
if blocked_types is NO_DEFAULT:
|
||||
blocked_types = (str, bytes, collections.abc.Mapping)
|
||||
return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
|
||||
|
||||
|
||||
def variadic(x, allowed_types=NO_DEFAULT):
|
||||
return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
|
@ -3371,7 +3378,7 @@ def strip_jsonp(code):
|
|||
|
||||
def js_to_json(code, vars={}, *, strict=False):
|
||||
# vars is a dict of var, val pairs to substitute
|
||||
STRING_QUOTES = '\'"'
|
||||
STRING_QUOTES = '\'"`'
|
||||
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
||||
|
@ -3389,6 +3396,12 @@ def process_escape(match):
|
|||
else '' if escape == '\n'
|
||||
else escape)
|
||||
|
||||
def template_substitute(match):
|
||||
evaluated = js_to_json(match.group(1), vars, strict=strict)
|
||||
if evaluated[0] == '"':
|
||||
return json.loads(evaluated)
|
||||
return evaluated
|
||||
|
||||
def fix_kv(m):
|
||||
v = m.group(0)
|
||||
if v in ('true', 'false', 'null'):
|
||||
|
@ -3399,7 +3412,8 @@ def fix_kv(m):
|
|||
return ''
|
||||
|
||||
if v[0] in STRING_QUOTES:
|
||||
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
|
||||
v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
|
||||
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
|
||||
return f'"{escaped}"'
|
||||
|
||||
for regex, base in INTEGER_TABLE:
|
||||
|
@ -4091,6 +4105,10 @@ def data(self, data):
|
|||
def close(self):
|
||||
return self._out.strip()
|
||||
|
||||
# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
|
||||
# This will not trigger false positives since only UTF-8 text is being replaced
|
||||
dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
|
||||
|
||||
def parse_node(node):
|
||||
target = TTMLPElementParser()
|
||||
parser = xml.etree.ElementTree.XMLParser(target=target)
|
||||
|
@ -5461,7 +5479,7 @@ def traverse_obj(
|
|||
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
||||
casesense=True, is_user_input=False, traverse_string=False):
|
||||
"""
|
||||
Safely traverse nested `dict`s and `Sequence`s
|
||||
Safely traverse nested `dict`s and `Iterable`s
|
||||
|
||||
>>> obj = [{}, {"key": "value"}]
|
||||
>>> traverse_obj(obj, (1, "key"))
|
||||
|
@ -5469,7 +5487,7 @@ def traverse_obj(
|
|||
|
||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||
The next path will also be tested if the path branched but no results could be found.
|
||||
Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
|
||||
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
||||
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
||||
|
||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||
|
@ -5486,7 +5504,7 @@ def traverse_obj(
|
|||
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
||||
- `function`: Branch out and return values filtered by the function.
|
||||
Read as: `[value for key, value in obj if function(key, value)]`.
|
||||
For `Sequence`s, `key` is the index of the value.
|
||||
For `Iterable`s, `key` is the index of the value.
|
||||
For `re.Match`es, `key` is the group number (0 = full match)
|
||||
as well as additionally any group names, if given.
|
||||
- `dict` Transform the current object and return a matching dict.
|
||||
|
@ -5522,7 +5540,6 @@ def traverse_obj(
|
|||
If no `default` is given and the last path branches, a `list` of results
|
||||
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
||||
"""
|
||||
is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
|
||||
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
||||
|
||||
if isinstance(expected_type, type):
|
||||
|
@ -5535,7 +5552,9 @@ def apply_key(key, obj, is_last):
|
|||
result = None
|
||||
|
||||
if obj is None and traverse_string:
|
||||
pass
|
||||
if key is ... or callable(key) or isinstance(key, slice):
|
||||
branching = True
|
||||
result = ()
|
||||
|
||||
elif key is None:
|
||||
result = obj
|
||||
|
@ -5558,7 +5577,7 @@ def apply_key(key, obj, is_last):
|
|||
branching = True
|
||||
if isinstance(obj, collections.abc.Mapping):
|
||||
result = obj.values()
|
||||
elif is_sequence(obj):
|
||||
elif is_iterable_like(obj):
|
||||
result = obj
|
||||
elif isinstance(obj, re.Match):
|
||||
result = obj.groups()
|
||||
|
@ -5572,7 +5591,7 @@ def apply_key(key, obj, is_last):
|
|||
branching = True
|
||||
if isinstance(obj, collections.abc.Mapping):
|
||||
iter_obj = obj.items()
|
||||
elif is_sequence(obj):
|
||||
elif is_iterable_like(obj):
|
||||
iter_obj = enumerate(obj)
|
||||
elif isinstance(obj, re.Match):
|
||||
iter_obj = itertools.chain(
|
||||
|
@ -5596,7 +5615,7 @@ def apply_key(key, obj, is_last):
|
|||
} or None
|
||||
|
||||
elif isinstance(obj, collections.abc.Mapping):
|
||||
result = (obj.get(key) if casesense or (key in obj) else
|
||||
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
|
||||
next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||
|
||||
elif isinstance(obj, re.Match):
|
||||
|
@ -5608,7 +5627,7 @@ def apply_key(key, obj, is_last):
|
|||
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
||||
|
||||
elif isinstance(key, (int, slice)):
|
||||
if is_sequence(obj):
|
||||
if is_iterable_like(obj, collections.abc.Sequence):
|
||||
branching = isinstance(key, slice)
|
||||
with contextlib.suppress(IndexError):
|
||||
result = obj[key]
|
||||
|
|
Loading…
Reference in a new issue